One document matched: draft-ietf-mpls-forwarding-09.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- xml2rfc is available at http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [

  <!ENTITY RFC0791 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0791.xml">
  <!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
  <!ENTITY RFC2474 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2474.xml">
  <!ENTITY RFC2475 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2475.xml">
  <!ENTITY RFC2597 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2597.xml">
  <!ENTITY RFC3031 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3031.xml">
  <!ENTITY RFC3032 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3032.xml">
  <!ENTITY RFC3168 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3168.xml">
  <!ENTITY RFC3209 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3209.xml">
  <!ENTITY RFC3270 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3270.xml">
  <!ENTITY RFC3429 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3429.xml">
  <!ENTITY RFC3443 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3443.xml">
  <!ENTITY RFC3471 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3471.xml">
  <!ENTITY RFC3550 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml">
  <!ENTITY RFC3828 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3828.xml">
  <!ENTITY RFC3985 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3985.xml">
  <!ENTITY RFC4023 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4023.xml">
  <!ENTITY RFC4090 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4090.xml">
  <!ENTITY RFC4110 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4110.xml">
  <!ENTITY RFC4124 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4124.xml">
  <!ENTITY RFC4182 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4182.xml">
  <!ENTITY RFC4201 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4201.xml">
  <!ENTITY RFC4206 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4206.xml">
  <!ENTITY RFC4221 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4221.xml">
  <!ENTITY RFC4340 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4340.xml">
  <!ENTITY RFC4377 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4377.xml">
  <!ENTITY RFC4379 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4379.xml">
  <!ENTITY RFC4385 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4385.xml">
  <!ENTITY RFC4664 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4664.xml">
  <!ENTITY RFC4817 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4817.xml">
  <!ENTITY RFC4875 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4875.xml">
  <!ENTITY RFC4928 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4928.xml">
  <!ENTITY RFC4950 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4950.xml">
  <!ENTITY RFC4960 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4960.xml">
  <!ENTITY RFC5036 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5036.xml">
  <!ENTITY RFC5082 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5082.xml">
  <!ENTITY RFC5085 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5085.xml">
  <!ENTITY RFC5102 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5102.xml">
  <!ENTITY RFC5129 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5129.xml">
  <!ENTITY RFC5286 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5286.xml">
  <!ENTITY RFC5317 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5317.xml">
  <!ENTITY RFC5332 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5332.xml">
  <!ENTITY RFC5462 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5462.xml">
  <!ENTITY RFC5470 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5470.xml">
  <!ENTITY RFC5513 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5513.xml">
  <!ENTITY RFC5586 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5586.xml">
  <!ENTITY RFC5640 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5640.xml">
  <!ENTITY RFC5695 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5695.xml">
  <!ENTITY RFC5704 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5704.xml">
  <!ENTITY RFC5714 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5714.xml">
  <!ENTITY RFC5715 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5715.xml">
  <!ENTITY RFC5860 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5860.xml">
  <!ENTITY RFC5880 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5880.xml">
  <!ENTITY RFC5884 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5884.xml">
  <!ENTITY RFC5885 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5885.xml">
  <!ENTITY RFC5905 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5905.xml">
  <!ENTITY RFC5920 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5920.xml">
  <!ENTITY RFC6291 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6291.xml">
  <!ENTITY RFC6310 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6310.xml">
  <!ENTITY RFC6371 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6371.xml">
  <!ENTITY RFC6374 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6374.xml">
  <!ENTITY RFC6375 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6375.xml">
  <!ENTITY RFC6378 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6378.xml">
  <!ENTITY RFC6388 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6388.xml">
  <!ENTITY RFC6391 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6391.xml">
  <!ENTITY RFC6424 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6424.xml">
  <!ENTITY RFC6425 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6425.xml">
  <!ENTITY RFC6426 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6426.xml">
  <!ENTITY RFC6427 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6427.xml">
  <!ENTITY RFC6428 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6428.xml">
  <!ENTITY RFC6435 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6435.xml">
  <!ENTITY RFC6438 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6438.xml">
  <!ENTITY RFC6478 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6478.xml">
  <!ENTITY RFC6639 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6639.xml">
  <!ENTITY RFC6669 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6669.xml">
  <!ENTITY RFC6670 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6670.xml">
  <!ENTITY RFC6720 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6720.xml">
  <!ENTITY RFC6790 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6790.xml">
  <!ENTITY RFC6829 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6829.xml">
  <!ENTITY RFC6941 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6941.xml">
  <!ENTITY RFC6976 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6976.xml">
  <!ENTITY RFC6894 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6894.xml">
  <!ENTITY RFC6981 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6981.xml">
  <!ENTITY RFC7023 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7023.xml">
  <!ENTITY RFC7074 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7074.xml">
  <!ENTITY RFC7079 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7079.xml">

  <!ENTITY I-D.ietf-mpls-in-udp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-mpls-in-udp-05">
  <!ENTITY I-D.ietf-mpls-psc-updates SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-mpls-psc-updates-01">
  <!ENTITY I-D.ietf-mpls-special-purpose-labels SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-mpls-special-purpose-labels-03">
  <!ENTITY I-D.ietf-tictoc-1588overmpls SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-tictoc-1588overmpls-05">

  <!ENTITY I-D.ietf-rtgwg-remote-lfa SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtgwg-remote-lfa-04">
  <!ENTITY I-D.ietf-rtgwg-mrt-frr-architecture SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtgwg-mrt-frr-architecture-03">

  ]>

<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<?rfc tocdepth="4"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<?rfc comments="yes"?>
<?rfc inline="yes" ?>

<rfc category="info" ipr="trust200902"
     docName="draft-ietf-mpls-forwarding-09">

  <front>
    <title abbrev="MPLS Forwarding">
      MPLS Forwarding Compliance and Performance Requirements</title>

    <author role="editor"
	    fullname="Curtis Villamizar" initials="C." surname="Villamizar">
      <organization abbrev="OCCNC">
	Outer Cape Cod Network Consulting, LLC
      </organization>
      <address>
	<email>curtis@occnc.com</email>
      </address>
    </author>

    <author
	    fullname="Kireeti Kompella" initials="K." surname="Kompella">
      <organization>Juniper Networks</organization>
      <address>
	<email>kireeti@juniper.net</email>
      </address>
    </author>

    <author
	fullname="Shane Amante" initials="S." surname="Amante">
      <organization>Apple Inc.</organization>
      <address>
	<postal>
	  <street>1 Infinite Loop</street>
	  <city>Cupertino, California</city>
	  <code>95014</code>
	</postal>
	<email>samante@apple.com</email>
      </address>
    </author>

    <author
	    fullname="Andrew Malis" initials="A.G." surname="Malis">
      <organization abbrev="Huawei">
	Huawei Technologies
      </organization>
      <address>
        <email>agmalis@gmail.com</email>
      </address>
    </author>

    <author
	    fullname="Carlos Pignataro" initials="C.M." surname="Pignataro">
      <organization abbrev="Cisco">Cisco Systems</organization>
      <address>
	<postal>
	  <street>7200-12 Kit Creek Road</street>
	  <city>Research Triangle Park</city>
	  <code>27709</code>
	  <region>NC</region>
	  <country>US</country>
	</postal>
	<!--
	    phone: +1-919-392-7428
	    facsimile: +1-919-869-1438
	-->
	<email>cpignata@cisco.com</email>
      </address>
    </author>

    <date year="2014" />

    <area>Routing</area>
    <workgroup>MPLS</workgroup>

    <keyword>MPLS</keyword>
    <keyword>ECMP</keyword>
    <keyword>link bundling</keyword>
    <keyword>multipath</keyword>
    <keyword>MPLS-TP</keyword>
    <keyword>forwarding</keyword>

    <abstract>
      <t>
	This document provides guidelines for implementers regarding
	MPLS forwarding and a basis for evaluations of forwarding
	implementations.  Guidelines cover many aspects of MPLS
	forwarding.  Topics are highlighted where implementers might
	otherwise overlook practical requirements which are unstated
	or under emphasized or are optional for conformance to RFCs
	but are often considered mandatory by providers.
      </t>
    </abstract>

  </front>

  <middle>

    <section title="Introduction and Document Scope">

      <t>
	The initial purpose of this document was to address concerns
	raised on the MPLS WG mailing list about shortcomings in
	implementations of MPLS forwarding.  Documenting existing
	misconceptions and potential pitfalls might potentially avoid
	repeating past mistakes.  The document has grown to address a
	broad set of forwarding requirements.
      </t>
      <t>
	The focus of this document is MPLS forwarding, base pseudowire
	forwarding, and MPLS Operations, Administration, and
	Maintenance (OAM).  The use of pseudowire control word, and
	sequence number are discussed.  Specific pseudowire Attachment
	Circuit (AC) and Native Service Processing (NSP) are out of
	scope.  Specific pseudowire applications, such as various
	forms of Virtual Private Network (VPN), are out of scope.
      </t>
      <t>
	MPLS support for multipath techniques is considered essential
	by many service providers and is useful for other high
	capacity networks.  In order to obtain sufficient entropy from
	MPLS traffic service providers and others find it essential
	for the MPLS implementation to interpret the MPLS payload as
	IPv4 or IPv6 based on the contents of the first nibble of
	payload.  The use of IP addresses, the IP protocol field, and
	UDP and TCP port number fields in multipath load balancing are
	considered within scope.  The use of any other IP protocol
	fields, such as tunneling protocols carried within IP, are out
	of scope.
      </t>
      <t>
	Implementation details are a local matter and are out of
	scope.  Most interfaces today operate at 1 Gb/s or greater.
	It is assumed that all forwarding operations are implemented
	in specialized forwarding hardware rather than on a general
	purpose processor.  This is often referred to as "fast path"
	and "slow path" processing.  Some recommendations are made
	regarding implementing control or management plane
	functionality in specialized hardware or with limited
	assistance from specialized hardware.  This advice is based on
	expected control or management protocol loads and on the need
	for denial of service (DoS) protection.
      </t>

      <section title="Abbreviations">

	<t>
	  The following abbreviations are used.
	  <list style="hanging" hangIndent="6">

	    <t hangText="AC">
	      Attachment Circuit (<xref target="RFC3985" />)
	    </t>
	    <t hangText="ACH">
	      Associated Channel Header (pseudowires)
	    </t>
	    <t hangText="ACK">
	      Acknowledgement (TCP flag and type of TCP packet)
	    </t>
	    <t hangText="AIS">
	      Alarm Indication Signal (MPLS-TP OAM)
	    </t>
	    <t hangText="ATM">
	      Asynchronous Transfer Mode (legacy switched circuits)
	    </t>
	    <t hangText="BFD">
	      Bidirectional Forwarding Detection
	    </t>
	    <t hangText="BGP">
	      Border Gateway Protocol
	    </t>
	    <t hangText="CC-CV">
	      Connectivity Check and Connectivity Verification
	    </t>
	    <t hangText="CE">
	      Customer Edge (LDP,  RSVP-TE, other protocols)
	    </t>
	    <t hangText="CPU">
	      Central Processing Unit (computer or microprocessor)
	    </t>
	    <t hangText="CT">
	      Class Type (<xref target="RFC4124" />)
	    </t>
	    <t hangText="CW">
	      Control Word (<xref target="RFC4385" />)
	    </t>
	    <t hangText="DCCP">
	      Datagram Congestion Control Protocol
	    </t>
	    <t hangText="DDoS">
	      Distributed Denial of Service
	    </t>
	    <t hangText="DM">
	      Delay Measurement (MPLS-TP OAM)
	    </t>
	    <t hangText="DSCP">
	      Differentiated Services Code Point
	      (<xref target="RFC2474" />)
	    </t>
	    <t hangText="DWDM">
	      Dense Wave Division Multiplexing
	    </t>
	    <t hangText="DoS">
	      Denial of Service
	    </t>
	    <t hangText="E-LSP">
	      EXP-Inferred-PSC LSP (<xref target="RFC3270" />)
	    </t>
	    <t hangText="EBGP">
	      External BGP
	    </t>
	    <t hangText="ECMP">
	      Equal Cost Multi-Path
	    </t>
	    <t hangText="ECN">
	      Explicit Congestion Notification
	      (<xref target="RFC3168" /> and <xref target="RFC5129" />)
	    </t>
	    <t hangText="EL">
	      Entropy Label (<xref target="RFC6790" />)
	    </t>
	    <t hangText="ELI">
	      Entropy Label Indicator (<xref target="RFC6790" />)
	    </t>
	    <t hangText="EXP">
	      Experimental
	      (field in MPLS renamed to TC
	      in <xref target="RFC5462" />)
	    </t>
	    <t hangText="FEC">
	      Forwarding Equivalence Classes (LDP), also Forward Error
	      Correction in other context
	    </t>
	    <t hangText="FR">
	      Frame Relay (legacy switched circuits)
	    </t>
	    <t hangText="FRR">
	      Fast Reroute (<xref target="RFC4090" />)
	    </t>
	    <t hangText="G-ACh">
	      Generic Associated Channel (<xref target="RFC5586" />)
	    </t>
	    <t hangText="GAL">
	      Generic Associated Channel Label
	      (<xref target="RFC5586" />)
	    </t>
	    <t hangText="GFP">
	      Generic Framing Protocol (used in OTN)
	    </t>
	    <t hangText="GMPLS">
	      Generalized MPLS (<xref target="RFC3471" />)
	    </t>
	    <t hangText="GTSM">
	      Generalized TTL Security Mechanism
	      (<xref target="RFC5082" />)
	    </t>
	    <t hangText="Gb/s">
	      Gigabits per second (billion bits per second)
	    </t>
	    <t hangText="IANA">
	      Internet Assigned Numbers Authority
	    </t>
	    <t hangText="ILM">
	      Incoming Label Map (<xref target="RFC3031" />)
	    </t>
	    <t hangText="IP">
	      Internet Protocol
	    </t>
	    <t hangText="IPVPN">
	      Internet Protocol VPN
	    </t>
	    <t hangText="IPv4">
	      Internet Protocol version 4
	    </t>
	    <t hangText="IPv6">
	      Internet Protocol version 6
	    </t>
	    <t hangText="L-LSP">
	      Label-Only-Inferred-PSC LSP (<xref target="RFC3270" />)
	    </t>
	    <t hangText="L2VPN">
	      Layer 2 VPN
	    </t>
	    <t hangText="LDP">
	      Label Distribution Protocol (<xref target="RFC5036" />)
	    </t>
	    <t hangText="LER">
	      Label Edge Router (<xref target="RFC3031" />)
	    </t>
	    <t hangText="LM">
	      Loss Measurement (MPLS-TP OAM)
	    </t>
	    <t hangText="LSP">
	      Label Switched Path (<xref target="RFC3031" />)
	    </t>
	    <t hangText="LSR">
	      Label Switching Router (<xref target="RFC3031" />)
	    </t>
	    <t hangText="MP2MP">
	      Multipoint to Multipoint
	    </t>
	    <t hangText="MPLS">
	      MultiProtocol Label Switching (<xref target="RFC3031" />)
	    </t>
	    <t hangText="MPLS-TP">
	      MPLS Transport Profile
	      (<xref target="RFC5317" />)
	    </t>
	    <t hangText="Mb/s">
	      Megabits per second (million bits per second)
	    </t>
	    <t hangText="NSP">
	      Native Service Processing (<xref target="RFC3985" />)
	    </t>
	    <t hangText="NTP">
	      Network Time Protocol
	    </t>
	    <t hangText="OAM">
	      Operations, Administration, and Maintenance
	      (<xref target="RFC6291" />)
	    </t>
	    <t hangText="OOB">
	      Out-of-band (not carried within a data channel)
	    </t>
	    <t hangText="OTN">
	      Optical Transport Network
	    </t>
	    <t hangText="P">
	      Provider router (LDP, RSVP-TE, other protocols)
	    </t>
	    <t hangText="P2MP">
	      Point to Multi-Point
	    </t>
	    <t hangText="PE">
	      Provider Edge router (LDP, RSVP-TE, other protocols)
	    </t>
	    <t hangText="PHB">
	      Per-Hop-Behavior (<xref target="RFC2475" />)
	    </t>
	    <t hangText="PHP">
	      Penultimate Hop Popping (<xref target="RFC3443" />)
	    </t>
	    <t hangText="POS">
	      Packet over SONET
	    </t>
	    <t hangText="PSC">
	      This abbreviation has multiple interpretations.
	      <list style="numbers">
		<t>
		  Packet Switch Capable (<xref target="RFC3471" />
		</t>
		<t>
		  PHB Scheduling Class (<xref target="RFC3270" />)
		</t>
		<t>
		  Protection State Coordination
		  (<xref target="RFC6378" />)
		</t>
	      </list>
	    </t>
	    <t hangText="PTP">
	      Precision Time Protocol
	    </t>
	    <t hangText="PW">
	      Pseudowire
	    </t>
	    <t hangText="QoS">
	      Quality of Service
	    </t>
	    <t hangText="RA">
	      Router Alert (<xref target="RFC3032" />)
	    </t>
	    <t hangText="RDI">
	      Remote Defect Indication (MPLS-TP OAM)
	    </t>
	    <t hangText="RSVP-TE">
	      RSVP Traffic Engineering
	    </t>
	    <t hangText="RTP">
	      Real-Time Transport Protocol
	    </t>
	    <t hangText="SCTP">
	      Stream Control Transmission Protocol
	    </t>
	    <t hangText="SDH">
	      Synchronous Data Hierarchy (European SONET, a form of TDM)
	    </t>
	    <t hangText="SONET">
	      Synchronous Optical Network (US SDH, a form of TDM)
	    </t>
	    <t hangText="T-LDP">
	      Targeted LDP (LDP sessions over more than one hop)
	    </t>
	    <t hangText="TC">
	      Traffic Class (<xref target="RFC5462" />)
	    </t>
	    <t hangText="TCP">
	      Transmission Control Protocol
	    </t>
	    <t hangText="TDM">
	      Time-Division Multiplexing (legacy encapsulations)
	    </t>
	    <t hangText="TOS">
	      Type of Service (see <xref target="RFC2474" />)
	    </t>
	    <t hangText="TTL">
	      Time-to-live (a field in IP and MPLS headers)
	    </t>
	    <t hangText="UDP">
	      User Datagram Protocol
	    </t>
	    <t hangText="UHP">
	      Ultimate Hop Popping (opposite of PHP)
	    </t>
	    <t hangText="VCCV">
	      Virtual Circuit Connectivity Verification
	      (<xref target="RFC5085" />)
	    </t>
	    <t hangText="VLAN">
	      Virtual Local Area Network (Ethernet)
	    </t>
	    <t hangText="VOQ">
	      Virtual Output Queuing (switch fabric design)
	    </t>
	    <t hangText="VPN">
	      Virtual Private Network
	    </t>
	    <t hangText="WG">
	      Working Group
	    </t>

	  </list>
	</t>

      </section>

      <section title="Use of Requirements Language">

	<t>
	  This document is informational.  The upper case
	  <xref target="RFC2119" />
	  key words "MUST", "MUST NOT", "SHOULD", "SHOULD NOT", and
	  "MAY" are used in this document in the following cases.
	</t>
	<t>
	  <list style="numbers">
	    <t>
	      RFC 2119 keywords are used where requirements stated in
	      this document are called for in referenced RFCs.  In
	      most cases the RFC containing the requirement is cited
	      within the statement using an RFC 2119 keyword.
	    </t>
	    <t>
	      RFC 2119 keywords are used where explicitly noted that
	      the keywords indicate that operator experiences indicate
	      a requirement, but there are no existing RFC
	      requirements.
	    </t>
	  </list>
	</t>
	<t>
	  Advice provided by this document may be ignored by
	  implementations.  Similarly, implementations not claiming
	  conformance to specific RFCs may ignore the requirements of
	  those RFCs.  In both cases, implementers should consider the
	  risk of doing so.
	</t>

      </section>

      <section title="Apparent Misconceptions">

	<t>
	  In early generations of forwarding silicon (which might now be
	  behind us), there apparently were some misconceptions about
	  MPLS.  The following statements provide clarifications.
	  <list style="numbers">
	    <t>
	      There are practical reasons to have more than one or two
	      labels in an MPLS label stack.  Under some circumstances
	      the label stack can become quite deep.
	      See <xref target="sect.basics" />.
	    </t>
	    <t>
	      The label stack MUST be considered to be arbitrarily
	      deep.  Section 3.27.4. "Hierarchy: LSP Tunnels within
	      LSPs" of RFC3031 states "The label stack mechanism
	      allows LSP tunneling to nest to any depth."
	      <xref target="RFC3031" />
	      If a bottom of the label stack cannot be found, but
	      sufficient number of labels exist to forward, an LSR
	      MUST forward the packet.  An LSR MUST NOT assume the
	      packet is malformed unless the end of packet is found
	      before bottom of stack.
	      See <xref target="sect.basics" />.
	    </t>
	    <t>
	      In networks where deep label stacks are encountered,
	      they are not rare.  Full packet rate performance is
	      required regardless of label stack depth, except where
	      multiple pop operations are required.
	      See <xref target="sect.basics" />.
	    </t>
	    <t>
	      Research has shown that long bursts of short packets
	      with 40 byte or 44 byte IP payload sizes in these
	      bursts are quite common.
	      This is due to TCP ACK compression
	      <xref target="ACK-compression" />.
	      The following two sub-bullets constitutes advice that
	      reflects very common non-negotiable requirements of providers.
	      Implementers may ignore this advice but should consider
	      the risk of doing so.
	      <list style="letters">
		<t>
		  A forwarding engine SHOULD, if practical, be able to
		  sustain an arbitrarily long sequence of small packets
		  arriving at full interface rate.
		</t>
		<t>
		  If indefinite full packet rate for small packets is
		  not practical, a forwarding engine MUST be able to
		  buffer a long sequence of small packets inbound to
		  the on-chip decision engine and sustain full
		  interface rate for some reasonable average packet
		  rate.  Absent this small on-chip buffering, QoS
		  agnostic packet drops can occur.
		</t>
	      </list>
	      See <xref target="sect.pkt-rate" />.
	    </t>
	    <t>
	      The implementations and system designs MUST support
	      pseudowire control word (CW) if MPLS-TP is supported or if
	      ACH <xref target="RFC5586" /> is being used on a pseudowire.
	      The implementation and system design SHOULD support
	      pseudowire CW even if MPLS-TP and ACH
	      <xref target="RFC5586" />
	      are not used, using instead CW and VCCV Type 1
	      <xref target="RFC5085" />
	      to allow the use of multipath in the underlying network
	      topology without impacting the PW traffic.
	      <xref target="RFC7079" />
	      does note that there are still some deployments where
	      the CW is not always used.  It also notes that
	      many service providers do enable the CW.  See
	      <xref target="sect.pw-cw" />
	      for more discussion on why deployments SHOULD enable the
	      pseudowire CW.
	    </t>
	  </list>
	</t>
	<t>
	  The following statements provide clarification regarding
	  more recent requirements that are often missed.
	  <list style="numbers">
	    <t>
	      The implementer and system designer SHOULD support
	      adding a pseudowire Flow Label
	      <xref target="RFC6391" />.  Deployments MAY enable this
	      feature for appropriate pseudowire types.
	      See <xref target="sect.fat-pw" />.
	    </t>
	    <t>
	      The implementer and system designer SHOULD support
	      adding an MPLS entropy label <xref target="RFC6790" />.
	      Deployments MAY enable this feature.
	      See <xref target="sect.entropy" />.
	    </t>
	  </list>
	</t>
	<t>
	  Non-IETF definitions of MPLS exist and these should not be
	  used as normative texts in place of the relevant IETF RFCs.
	  <xref target="RFC5704" />
	  documents incompatibilities between the IETF definition of
	  MPLS and one such alternative MPLS definition which led to
	  significant issues in the resulting non-IETF specification.
	</t>

      </section>

      <section title="Target Audience">

	<t>
	  This document is intended for multiple audiences:
	  implementer (implementing MPLS forwarding in silicon or in
	  software); systems designer (putting together a MPLS
	  forwarding systems); deployer (running an MPLS network).
	  These guidelines are intended to serve the following
	  purposes:
	</t>
	<t>
	  <list style="numbers">
	    <t>
	      Explain what to do and what not to do when a deep label
	      stack is encountered. (audience: implementer)
	    </t>
	    <t>
	      Highlight pitfalls to look for when implementing an MPLS
	      forwarding chip. (audience: implementer)
	    </t>
	    <t>
	      Provide a checklist of features and performance
	      specifications to request.  (audience: systems
	      designer, deployer)
	    </t>
	    <t>
	      Provide a set of tests to perform.  (audience: systems
	      designer, deployer).
	    </t>
	  </list>
	</t>
	<t>
	  The implementer, systems designer, and deployer have a
	  transitive supplier customer relationship.  It is in the best
	  interest of the supplier to review their product against their
	  customer's checklist and secondary customer's checklist if
	  applicable.
	</t>
	<t>
	  This document identifies and explains many details and
	  potential pit-falls of MPLS forwarding.  It is likely that
	  the identified set of potential pit-falls will later prove
	  to be an incomplete set.
	</t>

      </section>

    </section>

    <section anchor="sect.issues" title="Forwarding Issues">

      <t>
	A brief review of forwarding issues is provided in the
	subsections that follow.  This section provides some
	background on why some of these requirements exist.  The
	questions to ask of suppliers is covered in
	<xref target="sect.ask" />.
	Some guidelines for testing are provided in
	<xref target="sect.test" />.
      </t>

      <section anchor="sect.basics" title="Forwarding Basics">

	<t>
	  Basic MPLS architecture and MPLS encapsulation, and
	  therefore packet forwarding are defined in <xref
	  target="RFC3031" /> and <xref target="RFC3032" />.  RFC3031
	  and RFC3032 are somewhat LDP centric.  RSVP-TE supports
	  traffic engineering (TE) and fast reroute, features that LDP
	  lacks.  The base document for RSVP-TE based MPLS is <xref
	  target="RFC3209" />.
	</t>
	<t>
	  A few RFCs update RFC3032.  Those with impact on forwarding
	  include the following.
	  <list style="numbers">
	    <t>
	      TTL processing is clarified in <xref target="RFC3443" />.
	    </t>
	    <t>
	      The use of MPLS Explicit NULL is modified in <xref
	      target="RFC4182" />.
	    </t>
	    <t>
	      Differentiated Services is supported by
	      <xref target="RFC3270" /> and <xref target="RFC4124" />.
	      The "EXP" field is renamed to "Traffic Class" in
	      <xref target="RFC5462" />, removing any misconception
	      that it was available for experimentation or could be
	      ignored.
	    </t>
	    <t>
	      ECN is supported by <xref target="RFC5129" />.
	    </t>
	    <t>
	      The MPLS G-ACh and GAL are defined in <xref
	      target="RFC5586" />.
	    </t>
	    <t>
	      <xref target="RFC5332" />
	      redefines the two data link layer codepoints for MPLS
	      packets.
	    </t>
	  </list>
	</t>
	<t>
	  Tunneling encapsulations carrying MPLS, such as
	  MPLS in IP <xref target="RFC4023" />,
	  MPLS in GRE <xref target="RFC4023" />,
	  MPLS in L2TPv3 <xref target="RFC4817" />,
	  or MPLS in UDP <xref target="I-D.ietf-mpls-in-udp" />,
	  are out of scope.
	</t>
	<t>
	  Other RFCs have implications to MPLS Forwarding and do not
	  update RFC3032 or RFC3209, including:
	  <list style="numbers">
	    <t>
	      The pseudowire (PW) Associated Channel Header (ACH),
	      defined by <xref target="RFC5085" />, later generalized
	      by the MPLS G-ACh <xref target="RFC5586" />.
	    </t>
	    <t>
	      The entropy label indicator (ELI) and entropy label (EL)
	      are defined by <xref target="RFC6790" />.
	    </t>
	  </list>
	</t>
	<t>
	  A few RFCs update RFC3209.  Those that are listed as
	  updating RFC3209 generally impact only RSVP-TE signaling.
	  Forwarding is modified by major extension built upon
	  RFC3209.
	</t>
	<t>
	  RFCs which impact forwarding are discussed in the following
	  subsections.
	</t>

	<section anchor="sect.resv-labels"
		 title="MPLS Special Purpose Labels">

	  <t>
	    <xref target="RFC3032" /> specifies that label values 0-15
	    are special purpose labels with special meanings.  
	    <xref target="I-D.ietf-mpls-special-purpose-labels" />
	    renamed these from the term "reserved labels" used in
	    [RFC3032] to "special purpose labels".
	    Three values
	    of NULL label are defined (two of which are later updated
	    by <xref target="RFC4182" />) and a router-alert label is
	    defined.  The original intent was that special purpose labels,
	    except the NULL labels, could be sent to the routing
	    engine CPU rather than be processed in forwarding
	    hardware.  Hardware support is required by new RFCs such
	    as those defining entropy label and OAM processed as a
	    result of receiving a GAL.  For new special purpose labels, some
	    accommodation is needed for LSR that will send the labels
	    to a general purpose CPU or other highly programmable
	    hardware.  For example, ELI will only be sent to LSR which
	    have signaled support for
	    <xref target="RFC6790" />
	    and high OAM packet rate must be negotiated among
	    endpoints.
	  </t>
	  <t>
	    <xref target="RFC3429" /> reserves a label for ITU-T
	    Y.1711, however Y.1711 does not work with multipath and
	    its use is strongly discouraged.
	  </t>
	  <t>
	    The current list of special purpose labels can be found on the
	    "Multiprotocol Label Switching Architecture (MPLS) Label
	    Values" registry reachable at IANA's pages at
	    <eref target="http://www.iana.org" />.
	  </t>
	  <t>
	    <xref target="I-D.ietf-mpls-special-purpose-labels" />
	    introduces an IANA "Extended Special Purpose MPLS Label
	    Values" registry and makes use of the "extension" label,
	    label 15, to indicate that the next label is an extended
	    special purpose label and requires special handling.  The
	    range of only 16 values for special purpose labels allows
	    a table to be used.  The range of extended special purpose
	    labels with 20 bits available for use may have to be
	    handled in some other way in the unlikely event that in
	    the future the range of currently reserved values
	    256-1048575 are used.  If only the standards action range,
	    16-239, and the experimental range, 240-255, are used,
	    then a table of 256 entries can be used.
	  </t>
	  <t>
	    Unknown special purpose labels and unknown extended
	    special purpose labels are handled the same.  When an
	    unknown special purpose label is encountered or a special
	    purpose label not directly handled in forwarding hardware
	    is encountered, the packet should be sent to a general
	    purpose CPU by default.  If this capability is supported,
	    there must be an option to either drop or rate limit such
	    packets on a per special purpose label value basis.
	  </t>

	</section>

	<section anchor="sect.qos" title="MPLS Differentiated Services">

	  <t>
	    <xref target="RFC2474" />
	    deprecates the IP Type of Service (TOS) and IP Precedence
	    (Prec) fields and replaces them with the Differentiated
	    Services Field more commonly known as the Differentiated
	    Services Code Point (DSCP) field.
	    <xref target="RFC2475" />
	    defines the Differentiated Services architecture, which in
	    other forums, is often called a Quality of Service (QoS)
	    architecture.
	  </t>
	  <t>
	    MPLS uses the Traffic Class (TC) field to support
	    Differentiated Services <xref target="RFC5462" />.  There
	    are two primary documents describing how DSCP is mapped
	    into TC.
	    <list style="numbers">
	      <t>
		<xref target="RFC3270" />
		defines E-LSP and L-LSP.  E-LSP use a static mapping
		of DSCP into TC.  L-LSP uses a per LSP mapping of DSCP
		into TC, with one PHB Scheduling Class (PSC) per
		L-LSP.  Each PSC can use multiple Per-Hop Behavior
		(PHB) values.  For example, the Assured Forwarding
		service defines three PSC, each with three PHB
		<xref target="RFC2597" />.
	      </t>
	      <t>
		<xref target="RFC4124" />
		defines assignment of a class-type (CT) to an LSP,
		where a per CT static mapping of TC to PHB is used.
		<xref target="RFC4124" />
		provides a means to support up to eight E-LSP-like
		mappings of DSCP to TC.
	      </t>
	    </list>
	  </t>
	  <t>
	    To meet Differentiated Services requirements specified in
	    <xref target="RFC3270" />, the following forwarding
	    requirements must be met.
	    An ingress LER MUST be able to select an LSP and then
	    apply a per LSP map of DSCP into TC.  A midpoint LSR MUST
	    be able to apply a per LSP map of TC to PHB.  The number
	    of mappings supported will be far less than the number of
	    LSP supported.
	  </t>
	  <t>
	    To meet Differentiated Services requirements specified in
	    <xref target="RFC4124" />, the following forwarding
	    requirements must be met.  An ingress LER MUST be able to
	    select an LSP and then apply a per LSP map of DSCP into
	    TC.  A midpoint LSR MUST be able to apply a per LSP map to
	    CT map and then use Class Type (CT) to map TC to PHB.
	    Since there are only eight allowed values of CT, only
	    eight maps of TC to PHB need to be supported.  The LSP
	    label can be used directly to find the TC to PHB mapping,
	    as is needed to support <xref target="RFC3270" /> L-LSP.
	  </t>
	  <t>
	    While support for
	    <xref target="RFC4124" />
	    and not
	    <xref target="RFC3270" />
	    would allow support for only eight mappings of TC to PHB,
	    it is common to support both and simply state a limit on
	    the number of unique TC to PHB mappings which can be
	    supported.
	  </t>

	</section>

	<section anchor="sect.time-sync" title="Time Synchronization">

	  <t>
	    PTP or NTP may be carried over MPLS
	    <xref target="I-D.ietf-tictoc-1588overmpls" />.  Generally
	    NTP will be carried within IP with IP carried in MPLS
	    <xref target="RFC5905" />.  Both PTP and NTP benefit from
	    accurate time stamping of incoming packets and the ability
	    to insert accurate time stamps in outgoing packets.
	    PTP correction which occurs when forwarding requires
	    updating a timestamp compensation field based on the
	    difference between packet arrival at an LSR and packet
	    transmit time at that same LSR.
	  </t>
	  <t>
	    Since the label stack depth may vary, hardware should
	    allow a timestamp to be placed in an outgoing packet at
	    any specified byte position.  It may be necessary to
	    modify layer-2 checksums or frame check sequences after
	    insertion.  PTP and NTP timestamp formats differ in such
	    a way as to require different implementations of the
	    timestamp correction.
	    If NTP or PTP is carried over UDP/IP or UDP/IP/MPLS, the
	    UDP checksum will also have to be updated.
	  </t>
	  <t>
	    Accurate time synchronization in addition to being
	    generally useful is required for MPLS-TP delay measurement
	    (DM) OAM.  See <xref target="sect.tp-oam" />.
	  </t>

	</section>

	<section anchor="sect.early-deep"
		 title="Uses of Multiple Label Stack Entries">

	  <t>
	    MPLS deployments in the early part of the prior decade
	    (circa 2000) tended to support either LDP or RSVP-TE.  LDP
	    was favored by some for its ability to scale to
	    a very large number of PE devices at the edge of the
	    network, without adding deployment complexity.  RSVP-TE
	    was favored, generally in the network core, where traffic
	    engineering and/or fast reroute were considered important.
	  </t>
	  <t>
	    Both LDP and RSVP-TE are used simultaneously within major
	    Service Provider networks using a technique known as "LDP
	    over RSVP-TE Tunneling".  
	    This technique allows service providers to carry LDP
	    tunnels inside RSVP-TE tunnels. This makes it possible to
	    take advantage of the Traffic Engineering and Fast
	    Re-Route on more expensive Inter-City and
	    Inter-Continental transport paths. The ingress RSVP-TE PEs
	    places many LDP tunnels on a single RSVP-TE LSP and
	    carries it to the egress RSVP-TE PE. The LDP PEs are
	    situated further from the core, for example within a metro
	    network.
	    LDP over RSVP-TE
	    tunneling requires a minimum of two MPLS labels: one each
	    for LDP and RSVP-TE.
	  </t>
	  <t>
	    The use of MPLS FRR <xref target="RFC4090" /> might add one
	    more label to MPLS traffic, but only when FRR protection
	    is in use (active).  If LDP over RSVP-TE is in use, and FRR
	    protection is in use, then at least three MPLS labels are
	    present on the label stack on the links through which the
	    Bypass LSP traverses.  FRR is covered in
	    <xref target="sect.frr" />.
	  </t>
	  <t>
	    LDP L2VPN, LDP IPVPN, BGP L2VPN, and BGP IPVPN added
	    support for VPN services that are deployed by the vast
	    majority of service providers.  These VPN services added
	    yet another label, bringing the label stack depth (when
	    FRR is active) to four.
	  </t>
	  <t>
	    Pseudowires and VPN are discussed in further detail in
	    <xref target="sect.pw" /> and
	    <xref target="sect.vpn" />.
	  </t>
	  <t>
	    MPLS hierarchy as described in
	    <xref target="RFC4206" />
	    and updated by
	    <xref target="RFC7074" />
	    can in principle add at least one additional label.  MPLS
	    hierarchy is discussed in
	    <xref target="sect.hierarchy" />.
	  </t>
	  <t>
	    Other features such as Entropy Label (discussed in
	    <xref target="sect.entropy" />) and Flow Label (discussed
	    in <xref target="sect.fat-pw" />) can add additional
	    labels to the label stack.
	  </t>
	  <t>
	    Although theoretical scenarios can easily result in eight
	    or more labels, such cases are rare if they occur at all
	    today.  For the purpose of forwarding, only the top label
	    needs to be examined if PHP is used, a few more if UHP is
	    used (see <xref target="sect.tp-uhp" />).  For deep label
	    stacks, quite a few labels may have to be examined for the
	    purpose of load balancing across parallel links (see
	    <xref target="sect.multipath" />), however this depth can
	    be bounded by a provider through use of Entropy Label.
	  </t>
	  <t>
	    Other creative use of MPLS within the IETF, such as the
	    use of MPLS label stack in source routing, may result in
	    label stacks that are considerably deeper than those
	    encountered today.
	  </t>

	</section>

	<section anchor="sect.link-bundle" title="MPLS Link Bundling">

	  <t>
	    MPLS Link Bundling was the first RFC to address the need for
	    multiple parallel links between nodes <xref target="RFC4201"
	    />.  MPLS Link Bundling is notable in that it tried not to
	    change MPLS forwarding, except in specifying the "All-Ones"
	    component link.  MPLS Link Bundling is seldom if ever
	    deployed.  Instead multipath techniques described in <xref
	    target="sect.multipath" /> are used.
	  </t>

	</section>

	<section anchor="sect.hierarchy" title="MPLS Hierarchy">

	  <t>
	    MPLS hierarchy is defined in <xref target="RFC4206" /> and
	    updated by <xref target="RFC7074" />.
	    Although RFC4206 is considered part of GMPLS, the Packet
	    Switching Capable (PSC) portion of the MPLS hierarchy are
	    applicable to MPLS and may be supported in an otherwise
	    GMPLS free implementation.  The MPLS PSC hierarchy remains
	    the most likely means of providing further scaling in an
	    RSVP-TE MPLS network, particularly where the network is
	    designed to provide RSVP-TE connectivity to the edges.
	    This is the case for envisioned MPLS-TP networks.  The use
	    of the MPLS PSC hierarchy can add at least one additional
	    label to a label stack, though it is likely that only one
	    layer of PSC will be used in the near future.
	  </t>

	</section>

	<section anchor="sect.frr" title="MPLS Fast Reroute (FRR)">

	  <t>
	    Fast reroute is defined by <xref target="RFC4090" />.  Two
	    significantly different methods are defined in RFC4090,
	    the "One-to-One Backup" method which uses the "Detour LSP"
	    and the "Facility Backup" which uses a "bypass tunnel".
	    These are commonly referred to as the detour and bypass
	    methods respectively.
	  </t>
	  <t>
	    The detour method makes use of a presignaled LSP.
	    Hardware assistance is needed for detour FRR only if
	    necessary to accomplish local repair of a large number of
	    LSP within the 10s of milliseconds target.  For each
	    affected LSP a swap operation must be reprogrammed or
	    otherwise switched over.  The use of detour FRR doubles
	    the number of LSP terminating at any given hop and will
	    increase the number of LSP within a network by a factor
	    dependent on the average detour path length.
	  </t>
	  <t>
	    The bypass method makes use of a tunnel that is unused
	    when no fault exists but may carry many LSP when a local
	    repair is required.  There is no presignaling indicating
	    which working LSP will be diverted into any specific
	    bypass LSP.  
	    If interface label space is used the bypass LSP MUST
	    extend one hop beyond the merge point, except if the merge
	    point is the egress and PHP is used.
	    If the bypass LSP are not extended in this way, then
	    the merge LSR (egress LSR of the bypass LSP)
	    MUST use platform label space (as defined in
	    <xref target="RFC3031" />) so that an LSP working path on
	    any given interface can be backed up using a bypass LSP
	    terminating on any other interface.  Hardware assistance
	    is needed if necessary to accomplish local repair of a
	    large number of LSP within the 10s of milliseconds target.
	    For each affected LSP a swap operation must be
	    reprogrammed or otherwise switched over with an additional
	    push of the bypass LSP label.  The use of
	    platform label space impacts the size of the LSR ILM for
	    LSR with a very large number of interfaces.
	  </t>
	  <t>
	    IP/LDP Fast Reroute (IP/LDR FRR)
	    <xref target="RFC5714" />
	    is also applicable in MPLS networks.
	    ECMP and Loop-Free
	    Alternates (LFA)
	    <xref target="RFC5286" />
	    are well established IP/LDP FRR techniques and were the
	    first methods to be widely deployed.
	    Work on IP/LDP FRR is ongoing within the IETF RTGWG.
	    Two topics actively discussed in RTGWG are
	    microloops and partial coverage of the established
	    techniques in some network topologies.
	    <xref target="RFC5715" />
	    covers the topic of IP/LDP Fast Reroute microloops and
	    microloops prevention.
	    RTGWG has developed additional IP/LDP FRR techniques to
	    handle coverage concerns.  RTGWG is extending LFA through
	    the use of remote LFA
	    <xref target="I-D.ietf-rtgwg-remote-lfa" />.
	    Other techniques that require new forwarding paths to be
	    established are also under consideration, including the
	    IPFRR "not-via" technique defined in
	    <xref target="RFC6981" />
	    and maximally redundant trees (MRT)
	    <xref target="I-D.ietf-rtgwg-mrt-frr-architecture" />.
	    ECMP, LFA (but not remote LFA) and MRT swap the top label
	    to an alternate MPLS label. The other methods operate in a
	    similar manner to RFC 4090 facility backup and push an
	    additional label.
	    IP/LDP FRR methods which push more than one label have
	    been suggested but are in early discussion.
	  </t>

	</section>

	<section anchor="sect.pw" title="Pseudowire Encapsulation">

	  <t>
	    The pseudowire (PW) architecture is defined in
	    <xref target="RFC3985" />.
	    A pseudowire, when carried over MPLS, adds one or more
	    additional label entries to the MPLS label stack.
	    A PW Control Word is defined in
	    <xref target="RFC4385" />
	    with motivation for defining the control word in
	    <xref target="RFC4928" />.
	    The PW Associated Channel defined in <xref
	    target="RFC4385" /> is used for OAM in <xref
	    target="RFC5085" />.
	    The PW Flow Label is defined in
	    <xref target="RFC6391" />
	    and is discussed further in this document in
	    <xref target="sect.fat-pw" />.
	  </t>
	  <t>
	    There are numerous pseudowire encapsulations, supporting
	    emulation of services such as Frame Relay, ATM, Ethernet,
	    TDM, and SONET/SDH over packet switched networks (PSNs)
	    using IP or MPLS.
	  </t>
	  <t>
	    The pseudowire encapsulation is out of scope for this
	    document.  Pseudowire impact on MPLS forwarding at
	    midpoint LSR is within scope.  The impact on ingress MPLS
	    push and egress MPLS UHP pop are within scope.  While
	    pseudowire encapsulation is out of scope, some advice is
	    given on sequence number support.
	  </t>

	  <section anchor="sect.pw-seq" title="Pseudowire Sequence Number">

	    <t>
	      Pseudowire (PW) sequence number support is most
	      important for PW payload types with a high expectation
	      of lossless and/or in-order delivery.  Identifying lost
	      PW packets and the exact amount of lost payload is critical
	      for PW services which maintain bit timing, such as Time
	      Division Multiplexing (TDM) services since these
	      services MUST compensate lost payload on a bit-for-bit
	      basis.
	    </t>
	    <t>
	      With PW services which maintain bit timing, packets that
	      have been received out of order also MUST be identified
	      and MAY be either re-ordered or dropped.  Resequencing
	      requires, in addition to sequence numbering, a "reorder
	      buffer" in the egress PE, and ability to reorder is
	      limited by the depth of this buffer. The down side of
	      maintaining a large reorder buffer is added end-to-end
	      service delay.
	    </t>
	    <t>
	      For PW services which maintain bit timing or any other
	      service where jitter must be bounded, a jitter buffer is
	      always necessary.  The jitter buffer is needed
	      regardless of whether reordering is done.  In order to
	      be effective, a reorder buffer must often be larger than
	      a jitter buffer needs to be creating a tradeoff between
	      reducing loss and minimizing delay.
	    </t>
	    <t>
	      PW services which are not timing critical bit streams in
	      nature are cell oriented or frame oriented.  Though
	      resequencing support may be beneficial to PW cell and
	      frame oriented payloads such as ATM, FR and Ethernet,
	      this support is desirable but not required.
	      Requirements to handle out of order packets at all vary
	      among services and deployments.  For example for
	      Ethernet PW, occasional (very rare) reordering is
	      usually acceptable.  If the Ethernet PW is carrying
	      MPLS-TP, then this reordering may be acceptable.
	    </t>
	    <t>
	      Reducing jitter is best done by an end-system, given
	      that the tradeoff of loss vs delay varies among
	      services.  For example with interactive real time
	      services low delay is preferred, while with
	      non-interactive (one way) real time services low loss is
	      preferred.  The same end-site may be receiving both
	      types of traffic.  Regardless of this, bounded jitter is
	      sometimes a requirement for specific deployments.
	    </t>

	    <t>
	      Packet reordering should be rare except in a small number
	      of circumstances, most of which are due to network
	      design or equipment design errors:
	      <list style="numbers">
		<t>
		  The most common case is where reordering is rare,
		  occurring only when a network or equipment
		  fault forces traffic on a new path with different
		  delay. The packet loss that accompanies a network or
		  equipment fault is generally more disruptive than
		  any reordering which may occur.
		</t>
		<t>
		  A path change can be caused by reasons other than a
		  network or equipment fault, such as administrative
		  routing change.  This may result in packet
		  reordering but generally without any packet loss.
		</t>
		<t>
		  If the edge is not using pseudowire control word
		  (CW) and the core is using multipath, reordering
		  will be far more common.  If this is occurring,
		  using CW on the edge will solve the problem.
		  Without CW, resequencing is not possible since the
		  sequence number is contained in the CW.
		</t>
		<t>
		  Another avoidable case is where some core equipment
		  has multipath and for some reason insists on
		  periodically installing a new random number as the
		  multipath hash seed.  If supporting MPLS-TP,
		  equipment MUST provide a means to disable periodic
		  hash reseeding and deployments MUST disable periodic
		  hash reseeding.  Operator experience dictates that
		  even if not supporting MPLS-TP, equipment SHOULD
		  provide a means to disable periodic hash reseeding
		  and deployments SHOULD disable periodic hash
		  reseeding.
		</t>
	      </list>
	    </t>
	    <t>
	      In provider networks which use multipath techniques and
	      which may occasionally rebalance traffic or which may
	      change PW paths occasionally for other reasons,
	      reordering may be far more common than loss.  Where
	      reordering is more common than loss, resequencing
	      packets is beneficial, rather than dropping packets at
	      egress when out of order arrival occurs.  Resequencing is
	      most important for PW payload types with a high
	      expectation of lossless delivery since in such cases out
	      of order delivery within the network results in PW loss.
	    </t>

	  </section>

	</section>

	<section anchor="sect.vpn" title="Layer-2 and Layer-3 VPN">

	  <t>
	    Layer-2 VPN
	    <xref target="RFC4664" />
	    and Layer-3 VPN
	    <xref target="RFC4110" />
	    add one or more label entry to the MPLS label stack.  VPN
	    encapsulations are out of scope for this document.  Its
	    impact on forwarding at midpoint LSR are within scope.
	  </t>

	  <t>
	    Any of these services may be used on an MPLS entropy label
	    enabled ingress and egress (see
	    <xref target="sect.entropy" />
	    for discussion of entropy label) which would add an
	    additional two labels to the MPLS label stack.  The need to
	    provide a useful entropy label value impacts the
	    requirements of the VPN ingress LER but is out of
	    scope for this document.
	  </t>

	</section>

      </section>

      <section anchor="sect.mcast" title="MPLS Multicast">

	<t>
	  MPLS Multicast encapsulation is clarified in
	  <xref target="RFC5332" />.
	  MPLS Multicast may be signaled using RSVP-TE
	  <xref target="RFC4875" />
	  or LDP
	  <xref target="RFC6388" />.
	</t>
	<t>
	  <xref target="RFC4875" />
	  defines a root initiated RSVP-TE LSP setup rather than leaf
	  initiated join used in IP multicast.
	  <xref target="RFC6388" />
	  defines a leaf initiated LDP setup.
	  Both
	  <xref target="RFC4875" />
	  and
	  <xref target="RFC6388" />
	  define point to multipoint (P2MP) LSP setup.
	  <xref target="RFC6388" />
	  also defined multipoint to multipoint (MP2MP) LSP setup.
	</t>
	<t>
	  The P2MP LSP have a single source.  An LSR may be a leaf
	  node, an intermediate node, or a "bud" node.  A bud serves
	  as both a leaf and intermediate.  At a leaf an MPLS pop is
	  performed.  The payload may be a IP Multicast packet that
	  requires further replication.  At an intermediate node a
	  MPLS swap operation is performed.  The bud requires that
	  both a pop operation and a swap operation be performed for
	  the same incoming packet.
	</t>
	<t>
	  One strategy to support P2MP functionality is to pop at the
	  LSR interface serving as ingress to the P2MP traffic and
	  then optionally push labels at each LSR interface serving as
	  egress to the P2MP traffic at that same LSR.  A given LSR
	  egress chip may support multiple egress interfaces, each of
	  which requires a copy, but each with a different set of
	  added labels and layer-2 encapsulation.  Some physical
	  interfaces may have multiple sub-interfaces (such as
	  Ethernet VLAN or channelized interfaces) each requiring a
	  copy.
	</t>
	<t>
	  If packet replication is performed at LSR ingress, then the
	  ingress interface performance may suffer.  If the packet
	  replication is performed within a LSR switching fabric and
	  at LSR egress, congestion of egress interfaces cannot make
	  use of backpressure to ingress interfaces using techniques
	  such as virtual output queuing (VOQ).  If buffering is
	  primarily supported at egress, then the need for
	  backpressure is minimized.  There may be no good solution
	  for high volumes of multicast traffic if VOQ is used.
	</t>
	<t>
	  Careful consideration should be given to the performance
	  characteristics of high fanout multicast for equipment that
	  is intended to be used in such a role.
	</t>
	<t>
	  MP2MP LSP differ in that any branch may provide an input,
	  including a leaf.  Packets must be replicated onto all other
	  branches.  This forwarding is often implemented as multiple
	  P2MP forwarding trees, one for each potential input
	  interface at a given LSR.
	</t>

      </section>

      <section anchor="sect.pkt-rate" title="Packet Rates">

	<t>
	  While average packet size of Internet traffic may be large,
	  long sequences of small packets have both been predicted in
	  theory and observed in practice.  Traffic compression and
	  TCP ACK compression can conspire to create long sequences of
	  packets of 40-44 bytes in payload length.  If carried over
	  Ethernet, the 64 byte minimum payload applies, yielding a
	  packet rate of approximately 150 Mpps (million packets per
	  second) for the duration of the burst on a nominal 100 Gb/s
	  link.  The peak rate for other encapsulations can be as high
	  as 250 Mpps (for example IP or MPLS encapsulated using GFP
	  over OTN ODU4).
	</t>
	<t>
	  It is possible that the packet rates achieved by a specific
	  implementation is acceptable for a minimum payload size,
	  such as 64 byte (64B) payload for Ethernet, but the achieved
	  rate declines to an unacceptable level for other packet
	  sizes, such as 65B payload.  There are other packet rates of
	  interest besides TCP ACK.  For example, a TCP ACK carried
	  over an Ethernet PW over MPLS over Ethernet may occupy 82B
	  or 82B plus an increment of 4B if additional MPLS labels are
	  present.
	</t>
	<t>
	  A graph of packet rate vs. packet size often displays a
	  sawtooth.  The sawtooth is commonly due to a memory
	  bottleneck and memory widths, sometimes internal cache, but
	  often a very wide external buffer memory interface.  In some
	  cases it may be due to a fabric transfer width.  A fine
	  packing, rounding up to the nearest 8B or 16B will result in
	  a fine sawtooth with small degradation for 65B, and even
	  less for 82B packets.  A course packing, rounding up to 64B
	  can yield a sharper drop in performance for 65B packets, or
	  perhaps more important, a larger drop for 82B packets.
	</t>
	<t>
	  The loss of some TCP ACK packets are not the primary concern
	  when such a burst occurs.  When a burst occurs, any other
	  packets, regardless of packet length and packet QoS are
	  dropped once on-chip input buffers prior to the decision
	  engine are exceeded.  Buffers in front of the packet
	  decision engine are often very small or non-existent (less
	  than one packet of buffer) causing significant QoS agnostic
	  packet drop.
	</t>
	<t>
	  Internet service providers and content providers at one time
	  specified full rate forwarding with 40 byte payload packets
	  as a requirement.  Today, this requirement often can be
	  waived if the provider can be convinced that when long
	  sequence of short packets occur no packets will be dropped.
	</t>
	<t>
	  Many equipment suppliers have pointed out that the extra
	  cost in designing hardware capable of processing the minimum
	  size packets at full line rate is significant for very high
	  speed interfaces.  If hardware is not capable of processing
	  the minimum size packets at full line rate, then that
	  hardware MUST be capable of handling large burst of small
	  packets, a condition which is often observed.  This level of
	  performance is necessary to meet Differentiated Services
	  <xref target="RFC2475" />
	  requirements for without it, packets are lost prior to
	  inspection of the IP DSCP field
	  <xref target="RFC2474" />
	  or MPLS TC field <xref target="RFC5462" />.
	</t>
	<t>
	  With adequate on-chip buffers before the packet decision
	  engine, an LSR can absorb a long sequence of short packets.
	  Even if the output is slowed to the point where light
	  congestion occurs, the packets, having cleared the decision
	  process, can make use of larger VOQ or output side buffers
	  and be dealt with according to configured QoS treatment,
	  rather than dropped completely at random.
	</t>
	<t>
	  These on-chip buffers need not contribute significant delay
	  since they are only used when the packet decision engine is
	  unable to keep up, not in response to congestion, plus these
	  buffers are quite small.  For example, an on-chip buffer
	  capable of handling 4K packets of 64 bytes in length, or
	  256KB, corresponds to 200 usec on a 10 Gb/s link and 20 usec
	  on a 100 Gb/s link.  If the packet decision engine is
	  capable of handling packets at 90% of the full rate for
	  small packets, then the maximum added delay is 20 usec and
	  2 usec respectively, and this delay only applies if a 4K
	  burst of short packets occurs.  When no burst of short
	  packets was being processed, no delay is added.  These
	  buffers are only needed on high speed interfaces where it is
	  difficult to process small packets at full line rate.
	</t>
	<t>
	  Packet rate requirements apply regardless of which network
	  tier equipment is deployed in.  Whether deployed in the
	  network core or near the network edges, one of the two
	  conditions MUST be met if Differentiated Services
	  requirements are to be met:
	  <list style="numbers">
	    <t>
	      Packets must be processed at full line rate with minimum
	      sized packets.  -OR-
	    </t>
	    <t>
	      Packets must be processed at a rate well under generally
	      accepted average packet sizes, with sufficient buffering
	      prior to the packet decision engine to accommodate long
	      bursts of small packets.
	    </t>
	  </list>
	</t>

      </section>

      <section anchor="sect.multipath" title="MPLS Multipath Techniques">

	<t>
	  In any large provider, service providers and content
	  providers, hash based multipath techniques are used in the
	  core and in the edge.  In many of these providers hash based
	  multipath is also used in the larger metro networks.
	</t>
	<t>
	  The Differentiated Services requirements for good reasons
	  dictate that packets within a common microflow SHOULD NOT be
	  reordered [RFC2474].  Service providers generally impose
	  stronger requirements, commonly requiring that packets
	  within a microflow MUST NOT be reordered except in rare
	  circumstances such as load balancing across multiple links
	  or path change for load balancing or path change for other
	  reason.
	</t>
	<t>
	  The most common multipath techniques are ECMP applied at
	  the IP forwarding level, Ethernet LAG with inspection of the
	  IP payload, and multipath on links carrying both IP and
	  MPLS, where the IP header is inspected below the MPLS label
	  stack.  In most core networks, the vast majority of traffic
	  is MPLS encapsulated.
	</t>
	<t>
	  In order to support an adequately balanced load distribution
	  across multiple links, IP header information must be used.
	  Common practice today is to reinspect the IP headers at each
	  LSR and use the label stack and IP header information in a
	  hash performed at each LSR.  Further details are provided in
	  <xref target="sect.mp-hash" />.
	</t>
	<t>
	  The use of this technique is so ubiquitous in provider
	  networks that lack of support for multipath makes any
	  product unsuitable for use in large core networks.  This
	  will continue to be the case in the near future, even as
	  deployment of MPLS entropy label begins to relax the core
	  LSR multipath performance requirements given the existing
	  deployed base of edge equipment without the ability to add
	  an entropy label.
	</t>
	<t>
	  A generation of edge equipment supporting the ability to add
	  an MPLS entropy label is needed before the performance
	  requirements for core LSR can be relaxed.  However, it is
	  likely that two generations of deployment in the future will
	  allow core LSR to support full packet rate only when a
	  relatively small number of MPLS labels need to be inspected
	  before hashing.  For now, don't count on it.
	</t>
	<t>
	  Common practice today is to reinspect the packet at each LSR
	  and use information from the packet combined plus a hash seed
	  that is selected by each LSR.  Where flow labels or entropy
	  labels are used, a hash seed must be used when creating
	  these labels.
	</t>

	<section anchor="sect.pw-cw" title="Pseudowire Control Word">

	  <t>
	    Within the core of a network some form of multipath is
	    almost certain to be used.  Multipath techniques deployed
	    today are likely to be looking beneath the label stack for
	    an opportunity to hash on IP addresses.
	  </t>
	  <t>
	    A pseudowire encapsulated at a network edge must have a
	    means to prevent reordering within the core if the
	    pseudowire will be crossing a network core, or any part of
	    a network topology where multipath is used
	    (see <xref target="RFC4385" />
	    and <xref target="RFC4928" />).
	  </t>
	  <t>
	    Not supporting the ability to encapsulate a pseudowire
	    with a control word may lock a product out from
	    consideration.  A pseudowire capability without control
	    word support might be sufficient for applications that
	    are strictly both intra-metro and low bandwidth.  However
	    a provider with other applications will very likely not
	    tolerate having equipment which can only support a subset
	    of their pseudowire needs.
	  </t>

	</section>

	<section anchor="sect.large-uflow" title="Large Microflows">

	  <t>
	    Where multipath makes use of a simple hash and simple load
	    balance such as modulo or other fixed allocation (see
	    <xref target="sect.multipath" />) the presence of large
	    microflows that each consumes 10% of the capacity of a
	    component link of a potentially congested composite link,
	    one such microflow can upset the traffic balance and more
	    than one can in effect reduce the effective capacity of
	    the entire composite link by more than 10%.
	  </t>
	  <t>
	    When even a very small number of large microflows are
	    present, there is a significant probability that more
	    than one of these large microflows could fall on the same
	    component link.  If the traffic contribution from large
	    microflows is small, the probability for three or more
	    large microflows on the same component link drops
	    significantly.  Therefore in a network where a significant
	    number of parallel 10 Gb/s links exists, even a 1 Gb/s
	    pseudowire or other large microflow that could not
	    otherwise be subdivided into smaller flows should carry a
	    flow label or entropy label if possible.
	  </t>
	  <t>
	    Active management of the hash space to better accommodate
	    large microflows has been implemented and deployed in the
	    past, however such techniques are out of scope for this
	    document.
	  </t>

	</section>

	<section anchor="sect.fat-pw" title="Pseudowire Flow Label">

	  <t>
	    Unlike a pseudowire control word, a pseudowire flow label
	    <xref target="RFC6391" />, is required only for relatively
	    large capacity pseudowires.  There are many cases where a
	    pseudowire flow label makes sense.  Any service such as a
	    VPN which carries IP traffic within a pseudowire can make
	    use of a pseudowire flow label.
	  </t>
	  <t>
	    Any pseudowire carried over MPLS which makes use of the
	    pseudowire control word and does not carry a
	    flow label is in effect a single microflow (in
	    <xref target="RFC2475" /> terms) and may result in the
	    types of problems described in
	    <xref target="sect.large-uflow" />.
	  </t>

	</section>

	<section anchor="sect.entropy" title="MPLS Entropy Label">

	  <t>
	    The MPLS entropy label simplifies flow group
	    identification <xref target="RFC6790" /> at midpoint LSRs.
	    Prior to the MPLS entropy label midpoint LSRs needed to
	    inspect the entire label stack and often the IP headers to
	    provide an adequate distribution of traffic when using
	    multipath techniques (see <xref target="sect.mp-hash" />).
	    With the use of MPLS entropy label, a hash can be
	    performed closer to network edges, placed in the label
	    stack, and used by midpoint LSRs without fully reinspecting
	    the label stack and inspecting the payload.
	  </t>
	  <t>
	    The MPLS entropy label is capable of avoiding full label
	    stack and payload inspection within the core where
	    performance levels are most difficult to achieve (see
	    <xref target="sect.pkt-rate" />).
	    The label stack inspection can be terminated as soon as the
	    first entropy label is encountered, which is generally after a
	    small number of labels are inspected.
	  </t>
	  <t>
	    In order to provide these benefits in the core, LSR closer
	    to the edge must be capable of adding an entropy label.
	    This support may not be required in the access tier, the
	    tier closest to the customer, but is likely to be required
	    in the edge or the border to the network core.  LSR peering
	    with external networks will also need to be able to add an
	    entropy label on incoming traffic.
	  </t>

	</section>

	<section anchor="sect.mp-hash"
		 title="Fields Used for Multipath Load Balance">

	  <t>
	    The most common multipath techniques are based on a hash
	    over a set of fields.  Regardless of whether a hash is
	    used or some other method is used, the there is a limited
	    set of fields which can safely be used for multipath.
	  </t>

	  <section anchor="sect.label-hash" title="MPLS Fields in Multipath">

	    <t>
	      If the "outer" or "first" layer of encapsulation is
	      MPLS, then label stack entries are used in the hash.
	      Within a finite amount of time (and for small packets
	      arriving at high speed that time can be quite limited)
	      only a finite number of label entries can be inspected.
	      Pipelined or parallel architectures improve this, but
	      the limit is still finite.
	    </t>
	    <t>
	      The following guidelines are provided for use of MPLS
	      fields in multipath load balancing.
	      <list style="numbers">
		<t>
		  Only the 20 bit label field SHOULD be used.  The TTL
		  field SHOULD NOT be used.  The S bit MUST NOT be
		  used.  The TC field (formerly EXP) MUST NOT be used.
		  See text following this list for reasons.
		</t>
		<t>
		  If an ELI label is found, then if the LSR supports
		  entropy label, the EL label field in the next label
		  entry (the EL) SHOULD be used and label entries
		  below that label SHOULD NOT be used and the MPLS
		  payload SHOULD NOT be used.
		  See below this list for reasons.
		</t>
		<t>
		  Special purpose labels (label values 0-15) MUST NOT
		  be used.  Extended special purpose labels (any label
		  following label 15) MUST NOT be used.  In
		  particular, GAL and RA MUST NOT be used so that OAM
		  traffic follows the same path as payload packets
		  with the same label stack.
		</t>
		<t>
		  If a new special purpose label or extended special
		  purpose label is defined which requires special load
		  balance processing, then, as is the case for the ELI
		  label, a special action may be needed rather than
		  skipping the special purpose label or extended
		  special purpose label.
		</t>
		<t>
		  The most entropy is generally found in the label
		  stack entries near the bottom of the label stack
		  (innermost label, closest to S=1 bit).  If the
		  entire label stack cannot be used (or entire stack
		  up to an EL), then it is better to use as many
		  labels as possible closest to the bottom of stack.
		</t>
		<t>
		  If no ELI is encountered, and the first nibble of
		  payload contains a 4 (IPv4) or 6 (IPv6), an
		  implementation SHOULD support the ability to
		  interpret the payload as IPv4 or IPv6 and extract
		  and use appropriate fields from the IP headers.
		  This feature is considered a non-negotiable requirement by
		  many service providers.  If supported, there MUST be
		  a way to disable it (if, for example, PW without CW
		  are used).  This ability to disable this feature is
		  considered a non-negotiable requirement by many service
		  providers.  Therefore an implementation has a very
		  strong incentive to support both options.
		</t>
		<t>
		  A label which is popped at egress (UHP pop) SHOULD
		  NOT be used.  A label which is popped at the
		  penultimate hop (PHP pop) SHOULD be used.
		</t>
	      </list>
	    </t>
	    <t>
	      Apparently some chips have made use of the TC (formerly
	      EXP) bits as a source of entropy.  This is very harmful
	      since it will reorder Assured Forwarding (AF) traffic
	      <xref target="RFC2597" />
	      when a subset does not conform to the configured rates
	      and is remarked but not dropped at a prior LSR.  Traffic
	      which uses MPLS ECN
	      <xref target="RFC5129" />
	      can also be reordered if TC is used for entropy.
	      Therefore, as stated in the guidelines above, the TC
	      field (formerly EXP) MUST NOT be used in multipath load
	      balancing as it violates Differentiated Services Ordered
	      Aggregate (OA) requirements in these two instances.
	    </t>
	    <t>
	      Use of the MPLS label entry S bit would result in
	      putting OAM traffic on a different path if the addition
	      of a GAL at the bottom of stack removed the S bit from
	      the prior label.
	    </t>
	    <t>
	      If an ELI label is found, then if the LSR supports
	      entropy label, the EL label field in the next label
	      entry (the EL) SHOULD be used and the search for
	      additional entropy within the packet SHOULD be
	      terminated.  Failure to terminate the search will impact
	      client MPLS-TP LSP carried within server MPLS LSP.  A
	      network operator has the option to use administrative
	      attributes as a means to identify LSR which do not
	      terminate the entropy search at the first EL.
	      Administrative attributes are defined in
	      <xref target="RFC3209" />.  Some configuration is
	      required to support this.
	    </t>
	    <t>
	      If the label removed by a PHP pop is not used, then for
	      any PW for which CW is used, there is no basis for
	      multipath load split.  In some networks it is infeasible
	      to put all PW traffic on one component link.  Any PW
	      which does not use CW will be improperly split
	      regardless of whether the label removed by a PHP pop is
	      used.  Therefore the PHP pop label SHOULD be used as
	      recommended above.
	    </t>

	  </section>

	  <section anchor="sect.ip-hash" title="IP Fields in Multipath">

	    <t>
	      Inspecting the IP payload provides the most entropy in
	      provider networks.  The practice of looking past the
	      bottom of stack label for an IP payload is well accepted
	      and documented in
	      <xref target="RFC4928" />
	      and in other RFCs.
	    </t>
	    <t>
	      Where IP is mentioned in the document, both IPv4 and
	      IPv6 apply.  All LSRs MUST fully support IPv6.
	      <!-- or face the wrath of Shane -->
	    </t>
	    <t>
	      When information in the IP header is used, the following
	      guidelines apply:
	      <list style="numbers">
		<t>
		  Both the IP source address and IP destination
		  address SHOULD be used.  There MAY be an option to
		  reverse the order of these addresses, improving the
		  ability to provide symmetric paths in some cases.
		  Many service providers require that both addresses
		  be used.
		</t>
		<t>
		  Implementations SHOULD allow inspection of the IP
		  protocol field and use of the UDP or TCP port
		  numbers.  For many service providers this feature is
		  considered mandatory, particularly for enterprise,
		  data center, or edge equipment.  If this feature is
		  provided, it SHOULD be possible to disable use of
		  TCP and UDP ports.  Many service providers consider
		  it a non-negotiable requirement that use of UDP and TCP ports
		  can be disabled.  Therefore there is a strong
		  incentive for implementations to provide both
		  options.
		</t>
		<t>
		  Equipment suppliers MUST NOT make assumptions that
		  because the IP version field is equal to 4 (an IPv4
		  packet) that the IP protocol will either be TCP (IP
		  protocol 6) or UDP (IP protocol 17) and blindly
		  fetch the data at the offset where the TCP or UDP
		  ports would be found.  With IPv6, TCP and UDP port
		  numbers are not at fixed offsets.  With IPv4 packets
		  carrying IP options, TCP and UDP port numbers are
		  not at fixed offsets.
		</t>
		<t>
		  The IPv6 header flow field SHOULD be used.  This is
		  the explicit purpose of the IPv6 flow field, however
		  observed flow fields rarely contains a non-zero
		  value.  Some uses of the flow field have been
		  defined such as <xref target="RFC6438" />.  In the
		  absence of MPLS encapsulation, the IPv6 flow field
		  can serve a role equivalent to entropy label.
		</t>
		<t>
		  Support for other protocols that share a common
		  Layer-4 header such as
		  RTP <xref target="RFC3550" />,
		  UDP-Lite <xref target="RFC3828" />,
		  SCTP <xref target="RFC4960" /> and
		  DCCP <xref target="RFC4340" />
		  SHOULD be provided, particularly for edge or access
		  equipment where additional entropy may be needed.
		  Equipment SHOULD also use RTP, UDP-lite, SCTP and
		  DCCP headers when creating an entropy label.
		</t>
		<t>
		  <!-- request to break this up into sub-bullets -->
		  The following IP header fields should not or must
		  not be used:
		  <list style="letters">
		    <t>
		      Similar to avoiding TC in MPLS, the IP DSCP, and
		      ECN bits MUST NOT be used.
		    </t>
		    <t>
		      The IPv4 TTL or IPv6 Hop Count SHOULD NOT be
		      used.
		    </t>
		    <t>
		      Note that the IP TOS field was deprecated
		      (<xref target="RFC0791" /> was updated by
		      <xref target="RFC2474" />).
		      No part of the IP DSCP field can be used
		      (formerly IP PREC and IP TOS bits).
		    </t>
		  </list>
		</t>
		<t>
		  Some IP encapsulations support tunneling, such as
		  IP-in-IP, GRE, L2TPv3, and IPSEC.  These provide a
		  greater source of entropy which some provider
		  networks carrying large amounts of tunneled traffic
		  may need, for example as used in
		  <xref target="RFC5640" />
		  for GRE and L2TPv3.  The use of tunneling header
		  information is out of scope for this document.
		</t>
	      </list>
	    </t>
	    <t>
	      This document makes the following recommendations.
	      These recommendations are not required to claim
	      compliance to any existing RFC therefore implementers
	      are free to ignore them, but due to service provider
	      requirements should consider the risk of doing so.
	      The use of IP addresses MUST be supported and TCP and
	      UDP ports (conditional on the protocol field and
	      properly located) MUST be supported.  The ability to
	      disable use of UDP and TCP ports MUST be available.
	    </t>
	    <t>
	      Though potentially very useful in some networks, it is
	      uncommon to support using payloads of tunneling
	      protocols carried over IP.  Though the use of tunneling
	      protocol header information is out of scope for this
	      document, it is not discouraged.
	    </t>

	  </section>

	  <section anchor="sect.fl-gen" title="Fields Used in Flow Label">

	    <t>
	      The ingress to a pseudowire (PW) can extract information
	      from the payload being encapsulated to create a flow
	      label.  <xref target="RFC6391" /> references IP carried
	      in Ethernet as an example.  The Native Service
	      Processing (NSP) function defined in
	      <xref target="RFC3985" /> differs with pseudowire type.
	      It is in the NSP function where information for a
	      specific type of PW can be extracted for use in a flow
	      label.  Which fields to use for any given PW NSP is out
	      of scope for this document.
	    </t>

	  </section>

	  <section anchor="sect.el-gen" title="Fields Used in Entropy Label">

	    <t>
	      An entropy label is added at the ingress to an LSP.  The
	      payload being encapsulated is most often MPLS, a PW, or
	      IP.  The payload type is identified by the layer-2
	      encapsulation (Ethernet, GFP, POS, etc).
	    </t>
	    <t>
	      If the payload is MPLS, then the information used to
	      create an entropy label is the same information used for
	      local load balancing (see
	      <xref target="sect.label-hash" />).  This information
	      MUST be extracted for use in generating an entropy label
	      even if the LSR local egress interface is not a
	      multipath.
	    </t>
	    <t>
	      Of the non-MPLS payload types, only payloads that are
	      forwarded are of interest.  For example, ARP is not
	      forwarded and CNLP (used only for ISIS) is not
	      forwarded.
	    </t>
	    <t>
	      The non-MPLS payload type of greatest interest are IPv4
	      and IPv6.  The guidelines in
	      <xref target="sect.ip-hash" />
	      apply to fields used to create and entropy label.
	    </t>
	    <t>
	      The IP tunneling protocols mentioned in
	      <xref target="sect.ip-hash" />
	      may be more applicable to generation of an entropy label
	      at edge or access where deep packet inspection is
	      practical due to lower interface speeds than in the core
	      where deep packet inspection may be impractical.
	    </t>

	  </section>

	</section>

      </section>

      <section anchor="sect.tp-uhp" title="MPLS-TP and UHP">

	<t>
	  MPLS-TP introduces forwarding demands that will be extremely
	  difficult to meet in a core network.  Most troublesome is
	  the requirement for Ultimate Hop Popping (UHP, the opposite
	  of Penultimate Hop Popping or PHP).  Using UHP opens the
	  possibility of one or more MPLS pop operation plus an MPLS
	  swap operation for each packet.  The potential for multiple
	  lookups and multiple counter instances per packet exists.
	</t>
	<t>
	  As networks grow and tunneling of LDP LSPs into RSVP-TE LSPs
	  is used, and/or RSVP-TE hierarchy is used, the requirement to
	  perform one or two or more MPLS pop operations plus a MPLS
	  swap operation (and possibly a push or two) increases.  If
	  MPLS-TP LM (link monitoring) OAM is enabled at each layer,
	  then a packet and byte count MUST be maintained for each pop
	  and swap operation so as to offer OAM for each layer.
	</t>

      </section>

      <section anchor="sect.oam-gtsm" title="Local Delivery of Packets">

	<t>
	  There are a number of situations in which packets are
	  destined to a local address or where a return packet must be
	  generated.  There is a need to mitigate the potential for
	  outage as a result of either attacks on network
	  infrastructure, or in some cases unintentional
	  misconfiguration resulting in processor overload.  Some
	  hardware assistance is needed for all traffic destined to
	  the general purpose CPU that is used in MPLS control
	  protocol processing or network management protocol
	  processing and in most cases to other general purpose CPUs
	  residing on an LSR.  This is due to the ease of overwhelming
	  such a processor with traffic arriving on LSR high speed
	  interfaces, whether the traffic is malicious or not.
	</t>
	<t>
	  Denial of service (DoS) protection is an area requiring hardware
	  support that is often overlooked or inadequately considered.
	  Hardware assist is also needed for OAM, particularly the
	  more demanding MPLS-TP OAM.
	</t>

	<section anchor="sect.gtsm" title="DoS Protection">

	  <t>
	    Modern equipment supports a number of control plane and
	    management plane protocols.  Generally no single means of
	    protecting network equipment from denial of service (DoS)
	    attacks is sufficient, particularly for high speed
	    interfaces.  This problem is not specific to MPLS, but is
	    a topic that cannot be ignored when implementing or
	    evaluating MPLS implementations.
	  </t>
	  <t>
	    Two types of protections are often cited as primary means
	    of protecting against attacks of all kinds.
	    <list style="hanging" hangIndent="4">
	      <t hangText="Isolated Control/Management Traffic">
		<vspace blankLines="0" />
		Control and Management traffic can be carried
		out-of-band (OOB), meaning not intermixed with
		payload.  For MPLS, use of G-ACh and GAL to carry
		control and management traffic provides a means of
		isolation from potentially malicious payload.  Used
		alone, the compromise of a single node, including a
		small computer at a network operations center, could
		compromise an entire network.  Implementations which
		send all G-ACh/GAL traffic directly to a routing
		engine CPU are subject to DoS attack as a result of
		such a compromise.
	      </t>
	      <t hangText="Cryptographic Authentication">
		<vspace blankLines="0" />
		Cryptographic authentication can very effectively
		prevent malicious injection of control or management
		traffic.  Cryptographic authentication can in some
		circumstances be subject to DoS attack by overwhelming
		the capacity of the decryption with a high volume of
		malicious traffic.  For very low speed interfaces,
		cryptographic authentication can be performed by the
		general purpose CPU used as a routing engine.  For all
		other cases, cryptographic hardware may be needed.
		For very high speed interfaces, even cryptographic
		hardware can be overwhelmed.
	      </t>
	    </list>
	  </t>
	  <t>
	    Some control and management protocols are often carried
	    with payload traffic.  This is commonly the case with BGP,
	    T-LDP, and SNMP.  It is often the case with RSVP-TE.
	    Even when carried over G-ACh/GAL additional measures can
	    reduce the potential for a minor breach to be leveraged to
	    a full network attack.
	  </t>
	  <t>
	    Some of the additional protections are supported by
	    hardware packet filtering.
	    <list style="hanging" hangIndent="4">
	      <t hangText="GTSM">
		<vspace blankLines="0" />
		<xref target="RFC5082" />
		defines a mechanism that uses the IPv4 TTL or IPv6 Hop
		Limit fields to insure control traffic that can only
		originate from an immediate neighbor is not forged and
		originating from a distant source.  GTSM can be
		applied to many control protocols which are routable,
		for example LDP <xref target="RFC6720" />.
	      </t>
	      <t hangText="IP Filtering">
		<vspace blankLines="0" />
		At the very minimum, packet filtering plus
		classification and use of multiple queues supporting
		rate limiting is needed for traffic that could
		potentially be sent to a general purpose CPU used as a
		routing engine.  The first level of filtering only
		allows connections to be initiated from specific IP
		prefixes to specific destination ports and then
		preferably passes traffic directly to a cryptographic
		engine and/or rate limits.  The second level of
		filtering passes connected traffic, such as TCP
		connections having received at least one authenticated
		SYN or having been locally initiated.  The second
		level of filtering only passes traffic to specific
		address and port pairs to be checked for cryptographic
		authentication.
	      </t>
	    </list>
	  </t>
	  <t>
	    The cryptographic authentication is generally the last
	    resort in DoS attack mitigation.  If a packet must be
	    first sent to a general purpose CPU, then sent to a
	    cryptographic engine, a DoS attack is possible on high
	    speed interfaces.  Only where hardware can fully process a
	    cryptographic authentication without intervention from a
	    general purpose CPU to find the authentication field and
	    to identify the portion of packet to run the cryptographic
	    algorithm over is cryptographic authentication beneficial
	    in protecting against DoS attacks.
	  </t>
	  <t>
	    For chips supporting multiple 100 Gb/s interfaces, only a
	    very large number of parallel cryptographic engines can
	    provide the processing capacity to handle a large scale
	    DoS or distributed DoS (DDoS) attack.  For many forwarding
	    chips this much processing power requires significant chip
	    real estate and power, and therefore reduces system space
	    and power density.  For this reason, cryptographic
	    authentication is not considered a viable first line of
	    defense.
	  </t>
	  <t>
	    For some networks the first line of defense is some means
	    of supporting OOB control and management traffic.  In the
	    past this OOB channel might make use of overhead bits in
	    SONET or OTN or a dedicated DWDM wavelength.  G-ACh and
	    GAL provide an alternative OOB mechanism which is
	    independent of underlying layers.  In other networks,
	    including most IP/MPLS networks, perimeter filtering
	    serves a similar purpose, though less effective without
	    extreme vigilance.
	  </t>
	  <t>
	    A second line of defense is filtering, including GTSM.
	    For protocols such as EBGP, GTSM and other filtering is
	    often the first line of defense.  Cryptographic
	    authentication is usually the last line of defense and
	    insufficient by itself to mitigate DoS or DDoS attacks.
	  </t>

	</section>

	<section anchor="sect.oam" title="MPLS OAM">

	  <t>
	    <xref target="RFC4377" />
	    defines requirements for MPLS OAM that predate MPLS-TP.
            <xref target="RFC4379" />
	    defines what is commonly referred to as LSP Ping and LSP
	    Traceroute.
            <xref target="RFC4379" />
	    is updated by
            <xref target="RFC6424" />
	    supporting MPLS tunnels and stitched LSP and P2MP LSP.
            <xref target="RFC4379" />
	    is updated by
            <xref target="RFC6425" />
	    supporting P2MP LSP.
	    <xref target="RFC4379" />
	    is updated by
            <xref target="RFC6426" />
	    to support MPLS-TP connectivity verification (CV) and route
	    tracing.
	  </t>
	  <t>
	    <xref target="RFC4950" />
	    extends the ICMP format to support TTL expiration that may
	    occur when using IP traceroute within an MPLS tunnel.  The
	    ICMP message generation can be implemented in forwarding
	    hardware, but if sent to a general purpose CPU must be
	    rate limited to avoid a potential denial or service (DoS)
	    attack.
	  </t>
	  <t>
            <xref target="RFC5880" />
	    defines Bidirectional Forwarding Detection (BFD), a
	    protocol intended to detect faults in the bidirectional
	    path between two forwarding engines.
            <xref target="RFC5884" />
	    and
            <xref target="RFC5885" />
	    define BFD for MPLS.
	    BFD can provide failure detection on any kind of path
	    between systems, including direct physical links, virtual
	    circuits, tunnels, MPLS Label Switched Paths (LSPs),
	    multihop routed paths, and unidirectional links as long as
	    there is some return path.
	  </t>
	  <t>
	    The processing requirements for BFD are less than for LSP
	    Ping, making BFD somewhat better suited for relatively
	    high rate proactive monitoring.  BFD does not verify that
	    the data plane matches the control plane, where LSP Ping
	    does.  LSP Ping is somewhat better suited for on-demand
	    monitoring including relatively low rate periodic
	    verification of data plane and as a diagnostic tool.
	  </t>
	  <t>
	    Hardware assistance is often provided for BFD response
	    where BFD setup or parameter change is not involved and
	    may be necessary for relatively high rate proactive
	    monitoring.  If both BFD and LSP Ping are recognized in
	    filtering prior to passing traffic to a general purpose
	    CPU, appropriate DoS protection can be applied (see <xref
	    target="sect.gtsm" />).  Failure to recognize BFD and LSP
	    Ping and at least rate limit creates the potential for
	    misconfiguration to cause outages rather than cause errors
	    in the misconfigured OAM.
	  </t>

	</section>

	<section anchor="sect.pw-oam" title="Pseudowire OAM">

	  <t>
	    Pseudowire OAM makes use of the control channel provided
	    by Virtual Circuit Connectivity Verification (VCCV)
	    <xref target="RFC5085" />.
	    VCCV makes use of the Pseudowire Control Word.
	    BFD support over VCCV is defined by
	    <xref target="RFC5885" />.
	    <xref target="RFC5885" />
	    is updated by
	    <xref target="RFC6478" />
	    in support of static pseudowires.
	    <xref target="RFC4379" />
	    is updated by
	    <xref target="RFC6829" />
	    supporting LSP Ping for Pseudowire FEC advertised over IPv6.
	  </t>

	  <t>
	    G-ACh/GAL (defined in <xref target="RFC5586" />) is the
	    preferred MPLS-TP OAM control channel and applies to any
	    MPLS-TP end points, including Pseudowire.
	    See <xref target="sect.tp-oam" /> for an overview of
	    MPLS-TP OAM.
	  </t>

	</section>

	<section anchor="sect.tp-oam" title="MPLS-TP OAM">

	  <t>
	    <xref target="RFC6669" />
	    summarizes the MPLS-TP OAM toolset, the set of protocols
	    supporting the MPLS-TP OAM requirements specified in
	    <xref target="RFC5860" />
	    and supported by the MPLS-TP OAM framework defined in
	    <xref target="RFC6371" />.
	  </t>
	  <t>
	    The MPLS-TP OAM toolset includes:
	    <list style="hanging" hangIndent="4">
	      <t hangText="CC-CV">
		<vspace blankLines="0" />
		<xref target="RFC6428" />
		defines BFD extensions to support proactive
		Connectivity Check and Connectivity Verification
		(CC-CV) applications.
		<xref target="RFC6426" />
		provides LSP ping extensions that are used to
		implement on-demand connectivity verification.
	      </t>
	      <t hangText="RDI">
		<vspace blankLines="0" />
		Remote Defect Indication (RDI) is triggered by
		failure of proactive CC-CV, which is BFD based.  For
		fast RDI initiation, RDI SHOULD be initiated and
		handled by hardware if BFD is handled in forwarding
		hardware.
		<xref target="RFC6428" />
		provides an extension for BFD that includes the RDI
		indication in the BFD format and a specification of
		how this indication is to be used.
	      </t>
	      <t hangText="Route Tracing">
		<vspace blankLines="0" />
		<xref target="RFC6426" />
		specifies that the LSP ping enhancements for MPLS-TP
		on-demand connectivity verification include
		information on the use of LSP ping for route tracing
		of an MPLS-TP path.
	      </t>
	      <t hangText="Alarm Reporting">
		<vspace blankLines="0" />
		<xref target="RFC6427" />
		describes the details of a new protocol supporting
		Alarm Indication Signal (AIS), Link Down Indication,
		and fault management.  Failure to support this
		functionality in forwarding hardware can potentially
		result in failure to meet protection recovery time
		requirements and is therefore strongly recommended.
	      </t>
	      <t hangText="Lock Instruct">
		<vspace blankLines="0" />
		Lock instruct is initiated on-demand and therefore
		need not be implemented in forwarding hardware.
		<xref target="RFC6435" />
		defines a lock instruct protocol.
	      </t>
	      <t hangText="Lock Reporting">
		<vspace blankLines="0" />
		<xref target="RFC6427" />
		covers lock reporting.  Lock reporting need not be
		implemented in forwarding hardware.
	      </t>
	      <t hangText="Diagnostic">
		<vspace blankLines="0" />
		<xref target="RFC6435" />
		defines protocol support for loopback.  Loopback
		initiation is on-demand and therefore need not be
		implemented in forwarding hardware.  Loopback of
		packet traffic SHOULD be implemented in forwarding
		hardware on high speed interfaces.
	      </t>
	      <t hangText="Packet Loss and Delay Measurement">
		<vspace blankLines="0" />
		<xref target="RFC6374" />
		and
		<xref target="RFC6375" />
		define a protocol and profile for packet loss
		measurement (LM) and delay measurement (DM).  LM
		requires a very accurate capture and insertion of
		packet and byte counters when a packet is transmitted
		and capture of packet and byte counters when a packet
		is received.  This capture and insertion MUST be
		implemented in forwarding hardware for LM OAM if high
		accuracy is needed.  DM requires very accurate capture and
		insertion of a timestamp on transmission and capture
		of timestamp when a packet is received.  This
		timestamp capture and insertion MUST be implemented in
		forwarding hardware for DM OAM if high accuracy is
		needed.
	      </t>
	    </list>
	  </t>
	  <t>
	    See <xref target="sect.oam" /> for discussion of hardware
	    support necessary for BFD and LSP Ping.
	  </t>
	  <t>
	    CC-CV and alarm reporting is tied to protection and
	    therefore SHOULD be supported in forwarding hardware in
	    order to provide protection for a large number of affected
	    LSP within target response intervals.  Since CC-CV is
	    supported by BFD, for MPLS-TP providing hardware
	    assistance for BFD processing helps insure that protection
	    recovery time requirements can be met even for faults
	    affecting a large number of LSP.
	  </t>
	  <t>
	    MPLS-TP Protection State Coordination (PSC) is defined by
	    <xref target="RFC6378" />
	    and updated by
	    <xref target="I-D.ietf-mpls-psc-updates" />,
	    correcting some errors in
	    <xref target="RFC6378" />.
	  </t>

	</section>

	<section anchor="sect.oam-iwk"
		 title="MPLS OAM and Layer-2 OAM Interworking">

	  <t>
	    <xref target="RFC6670" />
	    provides the reasons for selecting a single MPLS-TP OAM
	    solution and examines the consequences were ITU-T to
	    develop a second OAM solution that is based on Ethernet
	    encodings and mechanisms.
	  </t>
	  <t>
	    <xref target="RFC6310" /> and
	    <xref target="RFC7023" />
	    specifies the mapping of defect states between many types
	    of hardware Attachment Circuits (ACs) and associated
	    Pseudowires (PWs).  This functionality SHOULD be supported
	    in forwarding hardware.
	  </t>
	  <t>
	    It is beneficial if an MPLS OAM implementation can
	    interwork with the underlying server layer and provide a
	    means to interwork with a client layer.  For example,
	    <xref target="RFC6427" />
	    specifies an inter-layer propagation of AIS and LDI from
	    MPLS server layer to client MPLS layers.  Where the server
	    layer is a Layer-2, such as Ethernet, PPP over SONET/SDH,
	    or GFP over OTN, interwork among layers is also
	    beneficial.  For high speed interfaces, supporting this
	    interworking in forwarding hardware helps insure that
	    protection based on this interworking can meet recovery
	    time requirements even for faults affecting a large number
	    of LSP.
	  </t>

	</section>

	<section anchor="sect.oam-hdwr"
		 title="Extent of OAM Support by Hardware">

	  <t>
	    Where certain requirements must be met, such as relatively
	    high CC-CV rates and a large number of interfaces, or
	    strict protection recovery time requirements and a
	    moderate number of affected LSP, some OAM functionality
	    must be supported by forwarding hardware.  In other cases,
	    such as highly accurate LM and DM OAM or strict protection
	    recovery time requirements with a large number of affected
	    LSP, OAM functionality must be entirely implemented in
	    forwarding hardware.
	  </t>
	  <t>
	    Where possible, implementation in forwarding hardware
	    should be in programmable hardware such that if standards
	    are later changed or extended these changes are likely to
	    be accommodated with hardware reprogramming rather than
	    replacement.
	  </t>
	  <t>
	    For some functionality there is a strong case for an
	    implementation in dedicated forwarding hardware.  Examples
	    include packet and byte counters needed for LM OAM as well
	    as needed for management protocols.  Similarly the capture
	    and insertion of packet and byte counts or timestamps
	    needed for transmitted LM or DM or time synchronization
	    packets MUST be implemented in forwarding hardware if high
	    accuracy is required.
	  </t>
	  <t>
	    For some functions there is a strong case to provide
	    limited support in forwarding hardware but may make use of
	    an external general purpose processor if performance
	    criteria can be met.  For example origination of RDI
	    triggered by CC-CV, response to RDI, and Protection State
	    Coordination (PSC) functionality
	    may be supported by hardware, but expansion to a large
	    number of client LSP and transmission of AIS or RDI to the
	    client LSP may occur in a general purpose processor.  Some
	    forwarding hardware supports one or more on-chip general
	    purpose processors which may be well suited for such a
	    role.  
	    <xref target="I-D.ietf-mpls-psc-updates" />, being a very
	    recent document that affects a protection state machine
	    that requires hardware support, underscores the importance
	    of having a degree of programmability in forwarding hardware.
	  </t>
	  <t>
	    The customer (system supplier or provider) should not
	    dictate design, but should independently validate target
	    functionality and performance.  However, it is not
	    uncommon for service providers and system implementers to
	    insist on reviewing design details (under NDA) due to past
	    experiences with suppliers and to reject suppliers who are
	    unwilling to provide details.
	  </t>

	</section>

	<section title="Support for IPFIX in Hardware">

	  <t>
	    The IPFIX architecture is defined by
	    <xref target="RFC5470" />.
	    IPFIX supports per flow statistics.  IPFIX infomation
	    elements (IEs) are defined in
	    <xref target="RFC5102" />
	    and include IEs for MPLS.
	  </t>
	  <t>
	    The forwarding chips used in core routers are not
	    optimized for high touch applications like IPFIX.  Often
	    support for IPFIX in core routers is limited to optional
	    IPFIX metering, which involves a 1-in-N packet sampling,
	    limited filtering support, and redirection to either an
	    internal CPU or an external interface.  The CPU or device
	    at the other end of the external interface then implements
	    the full IPFIX filtering and IPFIX collector
	    functionality.
	  </t>
	  <t>
	    LSR which are intended to be deployed further from the
	    core may support lower capacity interfaces but support
	    higher touch applications on the forwarding hardware and
	    may provide dedicated hardware to support a greater subset
	    IPFIX functionality before handing off to a general
	    purpose CPU.  In some cases, far from the core the entire
	    IPFIX functionality up to and including the collector may
	    be implemented in hardware and firmware in the forwarding
	    silicon.  It is also worth noting that at lower speeds a
	    general purpose CPU may become adequate to implement
	    IPFIX, particularly if metering is used.
	  </t>

	</section>

      </section>

      <section anchor="sect.no-of-flows" title="Number and Size of Flows">

	<t>
	  Service provider networks may carry up to hundreds of
	  millions of flows on 10 Gb/s links.  Most flows are very
	  short lived, many under a second.  A subset of the flows are
	  low capacity and somewhat long lived.  When Internet traffic
	  dominates capacity a very small subset of flows are high
	  capacity and/or very long lived.
	</t>
	<t>
	  Two types of limitations with regard to number and size of
	  flows have been observed.
	  <list style="numbers">
	    <t>
	      Some hardware cannot handle some high capacity flows
	      because of internal paths which are limited, such as per
	      packet backplane paths or paths internal or external to
	      chips such as buffer memory paths.  Such designs can
	      handle aggregates of smaller flows.  Some hardware with
	      acknowledged limitations has been successfully deployed
	      but may be increasingly problematic if the capacity of
	      large microflows in deployed networks continues to grow.
	    </t>
	    <t>
	      Some hardware approaches cannot handle a large number of
	      flows, or a large number of large flows due to
	      attempting to count per flow, rather than deal with
	      aggregates of flows.  Hash techniques scale with regard
	      to number of flows due to a fixed hash size with many
	      flows falling into the same hash bucket.  Techniques
	      that identify individual flows have been implemented but
	      have never successfully deployed for Internet traffic.
	    </t>
	  </list>
	</t>

      </section>

    </section>

    <section anchor="sect.ask"
	     title="Questions for Suppliers">

      <t>
	The following questions should be asked of a supplier.  These
	questions are grouped into broad categories.  The questions
	themselves are intended to be an open ended question to the
	supplier.  The tests in <xref target="sect.test" /> are
	intended to verify whether the supplier disclosed any
	compliance or performance limitations completely and
	accurately.
      </t>

      <section title="Basic Compliance">

	<t>
          <list counter="q" hangIndent="4" style="format Q#%d">
	    <t>
	      Can the implementation forward packets with an
	      arbitrarily large stack depth?
	      What limitations exist, and under what circumstances
	      do further limitations come into play (such as high
	      packet rate or specific features enabled or specific
	      types of packet processing)?
	      See <xref target="sect.basics" />.
	    </t>
	    <t>
	      Is the entire set of basic MPLS functionality
	      described in <xref target="sect.basics" /> supported?
	    </t>
	    <t>
	      Are the set of MPLS special purpose labels handled
	      correctly and with adequate performance?  Are extended
	      special purpose labels handled correctly and with
	      adequate performance?
	      See <xref target="sect.resv-labels" />.
	    </t>
	    <t>
	      Are mappings of label value and TC to PHB handled
	      correctly, including RFC3270 L-LSP mappings and
	      RFC4124 CT mappings to PHB?
	      See <xref target="sect.qos" />.
	    </t>
	    <t>
	      Is time synchronization adequately supported in
	      forwarding hardware?
	      <list style="letters">
		<t>
		  Are both PTP and NTP formats supported?
		</t>
		<t>
		  Is the accuracy of timestamp insertion and
		  incoming stamping sufficient?
		</t>
	      </list>
	      See <xref target="sect.time-sync" />.
	    </t>
	    <t>
	      Is link bundling supported?
	      <list style="letters">
		<t>
		  Can LSP be pinned to specific components?
		</t>
		<t>
		  Is the "all-ones" component link supported?
		</t>
	      </list>
	      See <xref target="sect.link-bundle" />.
	    </t>
	    <t>
	      Is MPLS hierarchy supported?
	      <list style="letters">
		<t>
		  Are both PHP and UHP supported?  What limitations
		  exist on the number of pop operations with UHP?
		</t>
		<t>
		  Are the pipe, short-pipe, and uniform models
		  supported?  Are TTL and TC values updated
		  correctly at egress where applicable?
		</t>
	      </list>
	      See <xref target="sect.hierarchy" /> regarding MPLS
	      hierarchy.  See <xref target="RFC3443" /> regarding
	      PHP, UHP, and pipe, short-pipe, and uniform models.
	    </t>
	    <t>
	      Is FRR supported?
	      <list style="letters">
		<t>
		  Are both "One-to-One Backup" and "Facility Backup"
		  supported?
		</t>
		<t>
		  What forms of IPFRR/LDPFRR are supported?
		</t>
		<t>
		  How quickly does protection recovery occur?
		</t>
		<t>
		  Does protection recovery speed increase when a fault
		  affects a large numbers of protected LSP, and if so
		  by how much?
		</t>
	      </list>
	      See <xref target="sect.frr" />.
	    </t>
	    <t>
	      Are pseudowire sequence numbers handled correctly?
	      See <xref target="sect.pw-seq" />.
	    </t>
	    <t>
	      Is VPN LER functionality handled correctly and without
	      performance issues?
	      See <xref target="sect.vpn" />.
	    </t>
	    <t>
	      Is MPLS multicast (P2MP and MP2MP) handled correctly?
	      <list style="letters">
		<t>
		  Are packets dropped on uncongested outputs if some
		  outputs are congested?
		</t>
		<t>
		  Is performance limited in high fanout situations?
		</t>
	      </list>
	      See <xref target="sect.mcast" />.
	    </t>
	  </list>
	</t>

      </section>

      <section title="Basic Performance">

	<t>
          <list counter="q" hangIndent="4" style="format Q#%d">
	    <t>
	      Can very small packets be forwarded at full line rate
	      on all interfaces indefinitely?
	      What limitations exist, and under what circumstances
	      do further limitations come into play (such as
	      specific features enabled or specific types of packet
	      processing)?
	    </t>
	    <t>
	      Customers must decide whether to relax the prior
	      requirement and to what extent.  If the answer to the
	      prior question indicates that limitations exist, then:
	      <list style="letters">
		<t>
		  What is the smallest packet size where full line
		  rate forwarding can be supported?
		</t>
		<t>
		  What is the longest burst of full rate small
		  packets that can be supported?
		</t>
	      </list>
	      Specify circumstances (such as specific features
	      enabled or specific types of packet processing) often
	      impact these rates and burst sizes.
	    </t>
	    <t>
	      How many pop operations can be supported along with a
	      swap operation at full line rate while maintaining
	      per LSP packet and byte counts for each pop and swap?
	      This requirement is particularly relevant for MPLS-TP.
	    </t>
	    <t>
	      How many label push operations can be supported.
	      While this limitation is rarely an issue, it applies
	      to both PHP and UHP, unlike the pop limit which
	      applies to UHP.
	    </t>
	    <t>
	      For a worst case where all packets arrive on one LSP,
	      what is the counter overflow time?  Are any means
	      provided to avoid polling all counters at short
	      intervals?  This applies to both MPLS and MPLS-TP.
	    </t>
	  </list>
	</t>

      </section>

      <section title="Multipath Capabilities and Performance">

	<t>
	  Multipath capabilities and performance do not apply to
	  MPLS-TP but apply to MPLS and apply if MPLS-TP is carried
	  in MPLS.
	  <list counter="q" hangIndent="4" style="format Q#%d">
	    <t>
	      How are large microflows accommodated?  Is there
	      active management of the hash space mapping to output
	      ports?  See <xref target="sect.large-uflow" />.
	    </t>
	    <t>
	      How many MPLS labels can be included in a hash based
	      on the MPLS label stack?
	    </t>
	    <t>
	      Is packet rate performance decreased beyond some
	      number of labels?
	    </t>
	    <t>
	      Can the IP header and payload information below the
	      MPLS stack be used in the hash?  If so, which IP
	      fields, payload types and payload fields are
	      supported?
	    </t>
	    <t>
	      At what maximum MPLS label stack depth can Bottom of
	      Stack and an IP header appear without impacting packet
	      rate performance?
	    </t>
	    <t>
	      Are special purpose labels excluded from the label stack
	      hash?  Are extended purpose labels excluded from the
	      label stack hash?
	      See <xref target="sect.label-hash" />.
	    </t>
	    <t>
	      How is multipath performance affected by high capacity
	      flows or an extremely large number of flows, or by
	      very short lived flows?
	      See <xref target="sect.no-of-flows" />.
	    </t>
	  </list>
	</t>

      </section>

      <section title="Pseudowire Capabilities and Performance">

	<t>
          <list counter="q" hangIndent="4" style="format Q#%d">
	    <t>
	      Is the pseudowire control word supported?
	    </t>
	    <t>
	      What is the maximum rate of pseudowire encapsulation
	      and decapsulation?  Apply the same questions as in
	      Base Performance for any packet based pseudowire such
	      as IP VPN or Ethernet.
	    </t>
	    <t>
	      Does inclusion of a pseudowire control word impact
	      performance?
	    </t>
	    <t>
	      Are flow labels supported?
	    </t>
	    <t>
	      If so, what fields are hashed on for the flow label
	      for different types of pseudowires?
	    </t>
	    <t>
	      Does inclusion of a flow label impact performance?
	    </t>
	  </list>
	</t>

      </section>

      <section title="Entropy Label Support and Performance">

	<t>
          <list counter="q" hangIndent="4" style="format Q#%d">
	    <t>
	      Can an entropy label be added when acting as in
	      ingress LER and can it be removed when acting as an
	      egress LER?
	    </t>
	    <t>
	      If so, what fields are hashed on for the entropy label?
	    </t>
	    <t>
	      Does adding or removing an entropy label impact packet
	      rate performance?
	    </t>
	    <t>
	      Can an entropy label be detected in the label stack,
	      used in the hash, and properly terminate the search
	      for further information to hash on?
	    </t>
	    <t>
	      Does using an entropy label have any negative impact
	      on performance?  It should have no impact or a
	      positive impact.
	    </t>
	  </list>
	</t>

      </section>

      <section anchor="sect.q-dos" title="DoS Protection">

	<t>
	  <list counter="q" hangIndent="4" style="format Q#%d">
	    <t>
	      For each control and management plane protocol in use,
	      what measures are taken to provide DoS attack
	      hardening?
	    </t>
	    <t>
	      Have DoS attack tests been performed?
	    </t>
	    <t>
	      Can compromise of an internal computer on a management
	      subnet be leveraged for any form of attack including
	      DoS attack?
	    </t>
	  </list>
	</t>

      </section>

      <section title="OAM Capabilities and Performance">

	<t>
	  <list counter="q" hangIndent="4" style="format Q#%d">
	    <t>
	      What OAM proactive and on-demand mechanisms are
	      supported?
	    </t>
	    <t>
	      What performance limits exist under high proactive
	      monitoring rates?
	    </t>
	    <t>
	      Can excessively high proactive monitoring rates impact
	      control plane performance or cause control plane
	      instability?
	    </t>
	    <t>
	      Ask the prior questions for each of the following.
	      <list style="letters">
		<t>MPLS OAM</t>
		<t>Pseudowire OAM</t>
		<t>MPLS-TP OAM</t>
		<t>Layer-2 OAM Interworking</t>
	      </list>
	      See <xref target="sect.oam-gtsm" />.
	    </t>
	  </list>
	</t>

      </section>

    </section>

    <section anchor="sect.test"
	     title="Forwarding Compliance and Performance Testing">

      <t>
	Packet rate performance of equipment supporting a large number
	of 10 Gb/s or 100 Gb/s links is not possible using desktop
	computers or workstations.  The use of high end workstations
	as a source of test traffic was barely viable 20 years ago,
	but is no longer at all viable.  Though custom microcode has
	been used on specialized router forwarding cards to serve the
	purpose of generating test traffic and measuring it, for the
	most part performance testing will require specialized test
	equipment.  There are multiple sources of suitable equipment.
      </t>
      <t>
	The set of tests listed here do not correspond one-to-one to
	the set of questions in <xref target="sect.ask" />.  The same
	categorization is used and these tests largely serve to
	validate answers provided to the prior questions, and can
	also provide answers where a supplier is unwilling to disclose
	compliance or performance.
      </t>
      <t>
	Performance testing is the domain of the IETF Benchmark
	Methodology Working Group (BMWG).  Below are brief
	descriptions of conformance and performance tests.  Some very
	basic tests are specified in <xref target="RFC5695" /> which
	partially cover only the basic performance test T#3.
      </t>
      <t>
	The following tests should be performed by the systems
	designer, or deployer, or performed by the supplier on their
	behalf if it is not practical for the potential customer to
	perform the tests directly.  These tests are grouped into
	broad categories.
      </t>
      <t>
	The tests in
	<xref target="q-bc" />
	should be repeated under various conditions to retest basic
	performance when critical capabilities are enabled.  Complete
	repetition of the performance tests enabling each capability
	and combinations of capabilities would be very time intensive,
	therefore a reduced set of performance tests can be used to
	gauge the impact of enabling specific capabilities.
      </t>

      <section anchor="q-bc" title="Basic Compliance">

	<t>
          <list counter="t" hangIndent="4" style="format T#%d">
	    <t>
	      Test forwarding at a high rate for packets with
	      varying number of label entries.  While packets with
	      more than a dozen label entries are unlikely to be
	      used in any practical scenario today, it is useful to
	      know if limitations exists.
	    </t>
	    <t>
	      For each of the questions listed under "Basic
	      Compliance" in <xref target="sect.ask" />, verify the
	      claimed compliance.  For any functionality considered
	      critical to a deployment, where applicable performance
	      using each capability under load should be verified in
	      addition to basic compliance.
	    </t>
	  </list>
	</t>

      </section>

      <section title="Basic Performance">

	<t>
          <list counter="t" hangIndent="4" style="format T#%d">
	    <t>
	      Test packet forwarding at full line rate with small
	      packets.  See <xref target="RFC5695" />.  The most
	      likely case to fail is the smallest packet size.  Also
	      test with packet sizes in four byte increments ranging
	      from payload sizes or 40 to 128 bytes.
	    </t>
	    <t>
	      If the prior tests did not succeed for all packet
	      sizes, then perform the following tests.
	      <list style="letters">
		<t>
		  Increase the packet size by 4 bytes until a size
		  is found that can be forwarded at full rate.
		</t>
		<t>
		  Inject bursts of consecutive small packets into a
		  stream of larger packets.  Allow some time for
		  recovery between bursts.  Increase the number of
		  packets in the burst until packets are dropped.
		</t>
	      </list>
	    </t>
	    <t>
	      Send test traffic where a swap operation is required.
	      Also set up multiple LSP carried over other LSP where
	      the device under test (DUT) is the egress of these
	      LSP.  Create test packets such that the swap operation
	      is performed after pop operations, increasing the
	      number of pop operations until forwarding of small
	      packets at full line rate can no longer be supported.
	      Also check to see how many pop operations can be
	      supported before the full set of counters can no
	      longer be maintained.  This requirement is
	      particularly relevant for MPLS-TP.
	    </t>
	    <t>
	      Send all traffic on one LSP and see if the counters
	      become inaccurate.  Often counters on silicon are much
	      smaller than the 64 bit packet and byte counters in
	      various IETF MIBs.  System developers should consider what
	      counter polling rate is necessary to maintain accurate
	      counters and whether those polling rates are
	      practical.

	      Relevant MIBs for MPLS are discussed in
	      <xref target="RFC4221" /> and
	      <xref target="RFC6639" />.
	    </t>
	    <t>
	      <xref target="RFC6894" />
	      provides a good basis for MPLS FRR testing.  Similar
	      testing should be performed to determine restoration
	      times, however this testing is far more difficult to
	      perform due to the need for a simulated test topology
	      that is capable of simulating the signaling used in
	      restoration.  The simulated topology should be
	      comparable with the target deployment in the number of
	      nodes and links and in resource usage flooding and setup
	      delays.  Some commercial test equipment can support this
	      type of testing.
	    </t>
	  </list>
	</t>

      </section>

      <section anchor="q-multipath"
	       title="Multipath Capabilities and Performance">

	<t>
	  Multipath capabilities do not apply to MPLS-TP but apply
	  to MPLS and apply if MPLS-TP is carried in MPLS.
          <list counter="t" hangIndent="4" style="format T#%d">
	    <t>
	      Send traffic at a rate well exceeding the capacity of
	      a single multipath component link, and where entropy
	      exists only below the top of stack.  If only the top
	      label is used this test will fail immediately.
	    </t>
	    <t>
	      Move the labels with entropy down in the stack until
	      either the full forwarding rate can no longer be
	      supported or most or all packets try to use the same
	      component link.
	    </t>
	    <t>
	      Repeat the two tests above with the entropy contained
	      in IP headers or IP payload fields below the label
	      stack rather than in the label stack.  Test with the
	      set of IP headers or IP payload fields considered
	      relevant to the deployment or to the target market.
	    </t>
	    <t>
	      Determine whether traffic that contains a pseudowire
	      control word is interpreted as IP traffic.
	      Information in the payload MUST NOT be used in the
	      load balancing if the first nibble of the packet is
	      not 4 or 6 (IPv4 or IPv6).
	    </t>
	    <t>
	      Determine whether special purpose labels and extended
	      special purpose labels are excluded from the label stack
	      hash.  They MUST be excluded.
	    </t>
	    <t>
	      Perform testing in the presence of combinations of:
	      <list style="letters">
		<t>
		  Very large microflows.
		</t>
		<t>
		  Relatively short lived high capacity flows.
		</t>
		<t>
		  Extremely large numbers of flows.
		</t>
		<t>
		  Very short lived small flows.
		</t>
	      </list>
	    </t>
	  </list>
	</t>

      </section>

      <section title="Pseudowire Capabilities and Performance">

	<t>
          <list counter="t" hangIndent="4" style="format T#%d">
	    <t>
	      Ensure that pseudowire can be set up with a pseudowire
	      label and pseudowire control word added at ingress and
	      the pseudowire label and pseudowire control word
	      removed at egress.
	    </t>
	    <t>
	      For pseudowire that contains variable length payload
	      packets, repeat performance tests listed under "Basic
	      Performance" for pseudowire ingress and egress
	      functions.
	    </t>
	    <t>
	      Repeat pseudowire performance tests with and without
	      a pseudowire control word.
	    </t>
	    <t>
	      Determine whether pseudowire can be set up with a
	      pseudowire label, flow label, and pseudowire control
	      word added at ingress and the pseudowire label, flow
	      label, and pseudowire control word removed at egress.
	    </t>
	    <t>
	      Determine which payload fields are used to create the
	      flow label and whether the set of fields and algorithm
	      provide sufficient entropy for load balancing.
	    </t>
	    <t>
	      Repeat pseudowire performance tests with flow labels
	      included.
	    </t>
	  </list>
	</t>

      </section>

      <section title="Entropy Label Support and Performance">

	<t>
          <list counter="t" hangIndent="4" style="format T#%d">
	    <t>
	      Determine whether entropy labels can be added at
	      ingress and removed at egress.
	    </t>
	    <t>
	      Determine which fields are used to create an entropy
	      label.  Labels further down in the stack, including
	      entropy labels further down and IP headers or IP
	      payload fields where applicable should be used.
	      Determine whether the set of fields and algorithm
	      provide sufficient entropy for load balancing.
	    </t>
	    <t>
	      Repeat performance tests under "Basic Performance"
	      when entropy labels are used, where ingress or egress
	      is the device under test (DUT).
	    </t>
	    <t>
	      Determine whether an ELI is detected when acting as a
	      midpoint LSR and whether the search for further
	      information on which to base the load balancing is
	      used.  Information below the entropy label SHOULD NOT
	      be used.
	    </t>
	    <t>
	      Ensure that the entropy label indicator and entropy
	      label (ELI and EL) are removed from the label stack
	      during UHP and PHP operations.
	    </t>
	    <t>
	      Insure that operations on the TC field when adding and
	      removing entropy label are correctly carried out.  If
	      TC is changed during a swap operation, the ability to
	      transfer that change MUST be provided.  The ability to
	      suppress the transfer of TC MUST also be provided.  See
	      "pipe", "short pipe", and "uniform" models in
	      <xref target="RFC3443" />.
	    </t>
	    <t>
	      Repeat performance tests for a midpoint LSR with entropy
	      labels found at various label stack depths.
	    </t>
	  </list>
	</t>

      </section>

      <section anchor="sect.dos-attack" title="DoS Protection">

	<t>
          <list counter="t" hangIndent="4" style="format T#%d">
	    <t>
	      Actively attack LSR under high protocol churn load and
	      determine control plane performance impact or
	      successful DoS under test conditions.  Specifically
	      test for the following.
	      <list style="letters">
		<t>
		  TCP SYN attack against control plane and
		  management plane protocols using TCP, including
		  CLI access (typically SSH protected login),
		  NETCONF, etc.
		</t>
		<t>
		  High traffic volume attack against control plane
		  and management plane protocols not using TCP.
		</t>
		<t>
		  Attacks which can be performed from a compromised
		  management subnet computer, but not one with
		  authentication keys.
		</t>
		<t>
		  Attacks which can be performed from a compromised
		  peer within the control plane (internal domain and
		  external domain).
		  <!-- where does KARP work on key dist stand? -->
		  Assume that per peering keys and per router ID
		  keys rather than network wide keys are in use.
		</t>
	      </list>
	      See <xref target="sect.gtsm" />.
	    </t>
	  </list>
	</t>

      </section>

      <section title="OAM Capabilities and Performance">

	<t>
          <list counter="t" hangIndent="4" style="format T#%d">
	    <t>
	      Determine maximum sustainable rates of BFD traffic.
	      If BFD requires CPU intervention, determine both
	      maximum rates and CPU loading when multiple interfaces
	      are active.
	    </t>
	    <t>
	      Verify LSP Ping and LSP Traceroute capability.
	    </t>
	    <t>
	      Determine maximum rates of MPLS-TP CC-CV traffic.  If
	      CC-CV requires CPU intervention, determine both
	      maximum rates and CPU loading when multiple interfaces
	      are active.
	    </t>
	    <t>
	      Determine MPLS-TP DM precision.
	    </t>
	    <t>
	      Determine MPLS-TP LM accuracy.
	    </t>
	    <t>
	      Verify MPLS-TP AIS/RDI and Protection State Coordination
	      (PSC) functionality,
	      protection speed, and AIS/RDI notification speed when
	      a large number of Management Entities (ME) must be
	      notified with AIS/RDI.
	    </t>
	  </list>
	</t>

      </section>

    </section>

    <section anchor="sect.ack" title="Acknowledgements">

      <t>
	Numerous very useful comments have been received in private
	email.  Some of these contributions are acknowledged here,
	approximately in chronologic order.
      </t>
      <t>
	Paul Doolan provided a brief review resulting in a number of
	clarifications, most notably regarding on-chip vs. system
	buffering, 100 Gb/s link speed assumptions in the 150 Mpps
	figure, and handling of large microflows.  Pablo Frank
	reminded us of the sawtooth effect in PPS vs. packet size
	graphs, prompting the addition of a few paragraphs on this.
	Comments from Lou Berger at IETF-85 prompted the addition of
	<xref target="sect.no-of-flows" />.
      </t>
      <t>
	Valuable comments were received on the BMWG mailing list.  Jay
	Karthik pointed out testing methodology hints that after
	discussion were deemed out of scope and were removed but may
	benefit later work in BMWG.
      </t>
      <t>
	Nabil Bitar pointed out the need to cover QoS (Differentiated
	Services), MPLS multicast (P2MP and MP2MP), and MPLS-TP OAM.
	Nabil also provided a number of clarifications to the
	questions and tests in <xref target="sect.ask" /> and
	<xref target="sect.test" />.
      </t>
      <t>
	Mark Szczesniak provided a thorough review and a number of
	useful comments and suggestions that improved the document.
      </t>
      <t>
	Gregory Mirsky and Thomas Beckhaus provided useful comments
	during the MPLS RT review.
      </t>
      <t>
	Tal Mizrahi provided comments that prompted clarifications
	regarding timestamp processing, local delivery of packets, and
	the need for hardware assistance in processing OAM traffic.
      </t>
      <t>
	Alexander (Sasha) Vainshtein pointed out errors in
	<xref target="sect.pw-seq" />
	and suggested new text which after lengthy discussion resulted
	in restating the summarization of requirements from PWE3 RFCs
	and more clearly stating the benefits and drawbacks of packet
	resequencing based on PW sequence number.
      </t>
      <t>
	Loa Anderson provided useful comments and corrections prior to
	WGLC.  Adrian Farrel provided useful comments and corrections
	prior as part of the AD review.
      </t>
      <t>
	Discussion with Steve Kent during SecDir review resulted in
	expansion of <xref target="sect.security" />, briefly
	summarizing security considerations related to forwarding in
	normative references.  Tom Petch pointed out some editorial
	errors in private email plus an important math error.  Al
	Morton during OpsDir review prompted clarification in the
	target audience section, suggested more clear wording in
	places, and found numerous editorial errors.
      </t>
      <t>
	Discussion with Steward Bryant and Alia Atlas as part of IESG
	review resulted in coverage of IPFIX and improvements to
	document coverage of MPLS FRR, and IP/LDP FRR, plus some
	corrections to the text elsewhere.
      </t>

    </section>

    <section anchor="sect.iana" title="IANA Considerations">

      <t>
	This memo includes no request to IANA.
      </t>

    </section>

    <section anchor="sect.security" title="Security Considerations">

      <t>
	This document reviews forwarding behavior specified elsewhere
	and points out compliance and performance requirements.  As
	such it introduces no new security requirements or concerns.
      </t>
      <t>
	Discussion of hardware support and other equipment hardening
	against DoS attack can be found in
	<xref target="sect.gtsm" />.
	<xref target="sect.q-dos" />
	provides a list of question regarding DoS to be asked of
	suppliers.
	<xref target="sect.dos-attack" />
	suggests types of testing that can provide some assurance of
	the effectiveness of supplier DoS hardening claims.
      </t>
      <t>
	Knowledge of potential performance shortcomings may serve to
	help new implementations avoid pitfalls.  It is unlikely that
	such knowledge could be the basis of new denial of service as
	these pitfalls are already widely known in the service
	provider community and among leading equipment suppliers.  In
	practice extreme data and packet rate are needed to affect
	existing equipment and to affect networks that may be still
	vulnerable due to failure to implement adequate protection.
	The extreme data and packet rates make this type of denial of
	service unlikely and make undetectable denial of service of
	this type impossible.
      </t>
      <t>
	The set of normative references each contain security
	considerations.  A brief summarization of MPLS security
	considerations applicable to forwarding follows:
      </t>
      <t>
	<list style="numbers">
	  <t>
	    MPLS encapsulation does not support an authentication
	    extension.  This is reflected in the security section of
	    <xref target="RFC3032" />.
	    Documents which clarify MPLS header fields such as TTL
	    <xref target="RFC3443" />,
	    the explicit null label 
	    <xref target="RFC4182" />,
	    renaming EXP to TC 
	    <xref target="RFC5462" />,
	    ECN for MPLS 
	    <xref target="RFC5129" />, and
	    MPLS Ethernet encapsulation
	    <xref target="RFC5332" />
	    make no changes to security considerations in
	    <xref target="RFC3032" />.
	  </t>
	  <t>
	    Some cited RFCs are related to Diffserv forwarding.
	    <xref target="RFC3270" />
	    refers to MPLS and Diffserv security.  
	    <xref target="RFC2474" />
	    mentions theft of service and denial of service due to
	    mismarking.
	    <xref target="RFC2474" />
	    mentions IPsec interaction, but with MPLS, not being
	    carried by IP, this type of interaction in
	    <xref target="RFC2474" />
	    is not relevant.
	  </t>
	  <t>
	    <xref target="RFC3209" />
	    is cited here due only to make-before-break forwarding
	    requirements.  This is related to resource sharing and the
	    theft of service and denial of service concerns in
	    <xref target="RFC2474" />
	    apply.
	  </t>
	  <t>
	    <xref target="RFC4090" />
	    defines FRR which provides protection but does not add
	    security concerns.  RFC4201 defines link bundling but
	    raises no additional security concerns.
	  </t>
	  <t>
	    Various OAM control channels are defined in 
	    <xref target="RFC4385" />
	    (PW CW),
	    <xref target="RFC5085" />
	    (VCCV),
	    <xref target="RFC5586" />
	    (G-Ach and GAL).
	    These documents describe potential abuse of these OAM
	    control channels.
	  </t>
	  <t>
	    <xref target="RFC4950" />
	    defines ICMP extensions when MPLS TTL expires and payload
	    is IP.  This provides MPLS header information which is of
	    no use to an IP attacker, but sending this information can
	    be suppressed through configuration.
	  </t>
	  <t>
	    GTSM 
	    <xref target="RFC5082" />
	    provides a means to improve protection against high
	    traffic volume spoofing as a form of DoS attack.
	  </t>
	  <t>
	    BFD 
	    <xref target="RFC5880" />
	    <xref target="RFC5884" />
	    <xref target="RFC5885" />
	    provides a form of OAM used in MPLS and MPLS-TP.  The
	    security considerations related to the OAM control channel
	    are relevant.  The BFD payload supports authentication
	    unlike the MPLS encapsulation or MPLS or PW control
	    channel encapsulation is carried in.  Where an IP return
	    OAM path is used IPsec is suggested as a means of securing
	    the return path.
	  </t>
	  <t>
	    Other forms of OAM are supported by 
	    <xref target="RFC6374" />
	    <xref target="RFC6375" />
	    (Loss and Delay Measurement), 
	    <xref target="RFC6428" />
	    (Connectivity Check/Verification based on BFD), and
	    <xref target="RFC6427" />
	    (Fault Management).  The security considerations related
	    to the OAM control channel are relevant.  IP return paths,
	    where used, can be secured with IPsec.
	  </t>
	  <t>
	    Linear protection is defined by 
	    <xref target="RFC6378" />
	    and updated by
	    <xref target="I-D.ietf-mpls-psc-updates" />.
	    Security concerns related to MPLS encapsulation and OAM
	    control channels apply.  Security concerns reiterate
	    <xref target="RFC5920" />
	    as applied to protection switching.
	  </t>
	  <t>
	    The PW Flow Label 
	    <xref target="RFC6391" />
	    and MPLS Entropy Label
	    <xref target="RFC6790" />
	    affect multipath load balancing.  Security concerns
	    reiterate
	    <xref target="RFC5920" />.
	    Security impacts would be limited to load distribution.
	  </t>
	</list>
      </t>
      <t>
	MPLS security including data plane security is discussed in
	greater detail in
	<xref target="RFC5920" />
	(MPLS/GMPLS Security Framework).
	The MPLS-TP security framework
	<xref target="RFC6941" />
	build upon this, focusing largely on the MPLS-TP OAM additions
	and OAM channels with some attention given to using network
	management in place of control plane setup.  In both security
	framework documents MPLS is assumed to run within a "trusted
	zone", defined as being where a single service provider (SP)
	has total operational control over that part of the network.
      </t>
      <t>
	If control plane security and management plane security are
	sufficiently robust, compromise of a single network element
	may result in chaos in the data plane anywhere in the network
	through denial of service attacks, but not a Byzantine
	security failure in which other network elements are fully
	compromised.
      </t>
      <t>
	MPLS security, or lack of, can affect whether traffic can be
	misrouted and lost, or intercepted, or intercepted and
	reinserted (a man-in-the-middle attack) or spoofed.  End user
	applications, including control plane and management plane
	protocols used by the SP, are expected to make use of
	appropriate end-to-end authentication and where appropriate
	end-to-end encryption.
      </t>

    </section>

    <section title="Organization of References Section">

      <t>
	The References section is split into Normative and Informative
	subsections.  References that directly specify forwarding
	encapsulations or behaviors are listed as normative.
	References which describe signaling only, though normative
	with respect to signaling, are listed as informative.  They
	are informative with respect to MPLS forwarding.
      </t>

    </section>

  </middle>

  <back>

    <references title="Normative References">

      &RFC2119;
      &RFC3032;
      &RFC3209;
      &RFC3270;
      &RFC3443;
      &RFC4090;
      &RFC4182;
      &RFC4201;
      &RFC4385;
      &RFC4950;
      &RFC5082;
      &RFC5085;
      &RFC5129;
      &RFC5332;
      &RFC5586;
      &RFC5880;
      &RFC5884;
      &RFC5885;
      &RFC6374;
      &RFC6375;
      &RFC6378;
      &RFC6391;
      &RFC6427;
      &RFC6428;
      &RFC6790;

      &I-D.ietf-mpls-psc-updates;

    </references>

    <references title="Informative References">

      &RFC0791;
      &RFC2474;
      &RFC2475;
      &RFC2597;
      &RFC3031;
      &RFC3168;
      &RFC3429;
      &RFC3471;
      &RFC3550;
      &RFC3828;
      &RFC3985;
      &RFC4023;
      &RFC4110;
      &RFC4124;
      &RFC4206;
      &RFC4221;
      &RFC4340;
      &RFC4377;
      &RFC4379;
      &RFC4664;
      &RFC4817;
      &RFC4875;
      &RFC4928;
      &RFC4960;
      &RFC5036;
      &RFC5102;
      &RFC5286;
      &RFC5317;
      &RFC5462;
      &RFC5470;
      <!-- RFC5513; -->
      &RFC5640;
      &RFC5695;
      &RFC5704;
      &RFC5714;
      &RFC5715;
      &RFC5860;
      &RFC5905;
      &RFC5920;
      &RFC6291;
      &RFC6310;
      &RFC6371;
      &RFC6388;
      &RFC6424;
      &RFC6425;
      &RFC6426;
      &RFC6435;
      &RFC6438;
      &RFC6478;
      &RFC6639;
      &RFC6669;
      &RFC6670;
      &RFC6720;
      &RFC6829;
      &RFC6941;
      <!-- RFC6976; -->
      &RFC6894;
      &RFC6981;
      &RFC7023;
      &RFC7074;
      &RFC7079;

      &I-D.ietf-mpls-in-udp;
      &I-D.ietf-mpls-special-purpose-labels;
      &I-D.ietf-tictoc-1588overmpls;

      &I-D.ietf-rtgwg-remote-lfa;
      &I-D.ietf-rtgwg-mrt-frr-architecture;

      <reference anchor="ACK-compression">
        <front>
          <title>Observations and Dynamics of a Congestion Control
          Algorithm: The Effects of Two-Way Traffic</title>
          <author fullname="Zhang, L." />
          <author fullname="Shenker, S" />
          <author fullname="Clark, D. D." />
          <date year="1991" />
        </front>
	<seriesInfo name="Proc. ACM SIGCOMM, ACM Computer
			  Communications Review (CCR)"
		    value="Vol 21, No 4, 1991, pp.133-147." />
      </reference>

    </references>

  </back>
</rfc>

PAFTECH AB 2003-20262026-04-23 16:23:19