One document matched: draft-ietf-mpls-forwarding-09.xml
<?xml version="1.0" encoding="US-ASCII"?>
<!-- xml2rfc is available at http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC0791 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0791.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2474 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2474.xml">
<!ENTITY RFC2475 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2475.xml">
<!ENTITY RFC2597 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2597.xml">
<!ENTITY RFC3031 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3031.xml">
<!ENTITY RFC3032 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3032.xml">
<!ENTITY RFC3168 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3168.xml">
<!ENTITY RFC3209 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3209.xml">
<!ENTITY RFC3270 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3270.xml">
<!ENTITY RFC3429 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3429.xml">
<!ENTITY RFC3443 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3443.xml">
<!ENTITY RFC3471 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3471.xml">
<!ENTITY RFC3550 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml">
<!ENTITY RFC3828 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3828.xml">
<!ENTITY RFC3985 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3985.xml">
<!ENTITY RFC4023 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4023.xml">
<!ENTITY RFC4090 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4090.xml">
<!ENTITY RFC4110 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4110.xml">
<!ENTITY RFC4124 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4124.xml">
<!ENTITY RFC4182 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4182.xml">
<!ENTITY RFC4201 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4201.xml">
<!ENTITY RFC4206 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4206.xml">
<!ENTITY RFC4221 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4221.xml">
<!ENTITY RFC4340 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4340.xml">
<!ENTITY RFC4377 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4377.xml">
<!ENTITY RFC4379 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4379.xml">
<!ENTITY RFC4385 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4385.xml">
<!ENTITY RFC4664 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4664.xml">
<!ENTITY RFC4817 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4817.xml">
<!ENTITY RFC4875 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4875.xml">
<!ENTITY RFC4928 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4928.xml">
<!ENTITY RFC4950 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4950.xml">
<!ENTITY RFC4960 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4960.xml">
<!ENTITY RFC5036 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5036.xml">
<!ENTITY RFC5082 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5082.xml">
<!ENTITY RFC5085 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5085.xml">
<!ENTITY RFC5102 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5102.xml">
<!ENTITY RFC5129 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5129.xml">
<!ENTITY RFC5286 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5286.xml">
<!ENTITY RFC5317 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5317.xml">
<!ENTITY RFC5332 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5332.xml">
<!ENTITY RFC5462 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5462.xml">
<!ENTITY RFC5470 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5470.xml">
<!ENTITY RFC5513 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5513.xml">
<!ENTITY RFC5586 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5586.xml">
<!ENTITY RFC5640 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5640.xml">
<!ENTITY RFC5695 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5695.xml">
<!ENTITY RFC5704 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5704.xml">
<!ENTITY RFC5714 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5714.xml">
<!ENTITY RFC5715 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5715.xml">
<!ENTITY RFC5860 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5860.xml">
<!ENTITY RFC5880 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5880.xml">
<!ENTITY RFC5884 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5884.xml">
<!ENTITY RFC5885 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5885.xml">
<!ENTITY RFC5905 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5905.xml">
<!ENTITY RFC5920 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5920.xml">
<!ENTITY RFC6291 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6291.xml">
<!ENTITY RFC6310 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6310.xml">
<!ENTITY RFC6371 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6371.xml">
<!ENTITY RFC6374 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6374.xml">
<!ENTITY RFC6375 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6375.xml">
<!ENTITY RFC6378 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6378.xml">
<!ENTITY RFC6388 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6388.xml">
<!ENTITY RFC6391 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6391.xml">
<!ENTITY RFC6424 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6424.xml">
<!ENTITY RFC6425 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6425.xml">
<!ENTITY RFC6426 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6426.xml">
<!ENTITY RFC6427 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6427.xml">
<!ENTITY RFC6428 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6428.xml">
<!ENTITY RFC6435 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6435.xml">
<!ENTITY RFC6438 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6438.xml">
<!ENTITY RFC6478 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6478.xml">
<!ENTITY RFC6639 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6639.xml">
<!ENTITY RFC6669 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6669.xml">
<!ENTITY RFC6670 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6670.xml">
<!ENTITY RFC6720 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6720.xml">
<!ENTITY RFC6790 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6790.xml">
<!ENTITY RFC6829 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6829.xml">
<!ENTITY RFC6941 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6941.xml">
<!ENTITY RFC6976 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6976.xml">
<!ENTITY RFC6894 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6894.xml">
<!ENTITY RFC6981 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6981.xml">
<!ENTITY RFC7023 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7023.xml">
<!ENTITY RFC7074 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7074.xml">
<!ENTITY RFC7079 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7079.xml">
<!ENTITY I-D.ietf-mpls-in-udp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-mpls-in-udp-05">
<!ENTITY I-D.ietf-mpls-psc-updates SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-mpls-psc-updates-01">
<!ENTITY I-D.ietf-mpls-special-purpose-labels SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-mpls-special-purpose-labels-03">
<!ENTITY I-D.ietf-tictoc-1588overmpls SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-tictoc-1588overmpls-05">
<!ENTITY I-D.ietf-rtgwg-remote-lfa SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtgwg-remote-lfa-04">
<!ENTITY I-D.ietf-rtgwg-mrt-frr-architecture SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtgwg-mrt-frr-architecture-03">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<?rfc tocdepth="4"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<?rfc comments="yes"?>
<?rfc inline="yes" ?>
<rfc category="info" ipr="trust200902"
docName="draft-ietf-mpls-forwarding-09">
<front>
<title abbrev="MPLS Forwarding">
MPLS Forwarding Compliance and Performance Requirements</title>
<author role="editor"
fullname="Curtis Villamizar" initials="C." surname="Villamizar">
<organization abbrev="OCCNC">
Outer Cape Cod Network Consulting, LLC
</organization>
<address>
<email>curtis@occnc.com</email>
</address>
</author>
<author
fullname="Kireeti Kompella" initials="K." surname="Kompella">
<organization>Juniper Networks</organization>
<address>
<email>kireeti@juniper.net</email>
</address>
</author>
<author
fullname="Shane Amante" initials="S." surname="Amante">
<organization>Apple Inc.</organization>
<address>
<postal>
<street>1 Infinite Loop</street>
<city>Cupertino, California</city>
<code>95014</code>
</postal>
<email>samante@apple.com</email>
</address>
</author>
<author
fullname="Andrew Malis" initials="A.G." surname="Malis">
<organization abbrev="Huawei">
Huawei Technologies
</organization>
<address>
<email>agmalis@gmail.com</email>
</address>
</author>
<author
fullname="Carlos Pignataro" initials="C.M." surname="Pignataro">
<organization abbrev="Cisco">Cisco Systems</organization>
<address>
<postal>
<street>7200-12 Kit Creek Road</street>
<city>Research Triangle Park</city>
<code>27709</code>
<region>NC</region>
<country>US</country>
</postal>
<!--
phone: +1-919-392-7428
facsimile: +1-919-869-1438
-->
<email>cpignata@cisco.com</email>
</address>
</author>
<date year="2014" />
<area>Routing</area>
<workgroup>MPLS</workgroup>
<keyword>MPLS</keyword>
<keyword>ECMP</keyword>
<keyword>link bundling</keyword>
<keyword>multipath</keyword>
<keyword>MPLS-TP</keyword>
<keyword>forwarding</keyword>
<abstract>
<t>
This document provides guidelines for implementers regarding
MPLS forwarding and a basis for evaluations of forwarding
implementations. Guidelines cover many aspects of MPLS
forwarding. Topics are highlighted where implementers might
otherwise overlook practical requirements which are unstated
or under emphasized or are optional for conformance to RFCs
but are often considered mandatory by providers.
</t>
</abstract>
</front>
<middle>
<section title="Introduction and Document Scope">
<t>
The initial purpose of this document was to address concerns
raised on the MPLS WG mailing list about shortcomings in
implementations of MPLS forwarding. Documenting existing
misconceptions and potential pitfalls might potentially avoid
repeating past mistakes. The document has grown to address a
broad set of forwarding requirements.
</t>
<t>
The focus of this document is MPLS forwarding, base pseudowire
forwarding, and MPLS Operations, Administration, and
Maintenance (OAM). The use of pseudowire control word, and
sequence number are discussed. Specific pseudowire Attachment
Circuit (AC) and Native Service Processing (NSP) are out of
scope. Specific pseudowire applications, such as various
forms of Virtual Private Network (VPN), are out of scope.
</t>
<t>
MPLS support for multipath techniques is considered essential
by many service providers and is useful for other high
capacity networks. In order to obtain sufficient entropy from
MPLS traffic service providers and others find it essential
for the MPLS implementation to interpret the MPLS payload as
IPv4 or IPv6 based on the contents of the first nibble of
payload. The use of IP addresses, the IP protocol field, and
UDP and TCP port number fields in multipath load balancing are
considered within scope. The use of any other IP protocol
fields, such as tunneling protocols carried within IP, are out
of scope.
</t>
<t>
Implementation details are a local matter and are out of
scope. Most interfaces today operate at 1 Gb/s or greater.
It is assumed that all forwarding operations are implemented
in specialized forwarding hardware rather than on a general
purpose processor. This is often referred to as "fast path"
and "slow path" processing. Some recommendations are made
regarding implementing control or management plane
functionality in specialized hardware or with limited
assistance from specialized hardware. This advice is based on
expected control or management protocol loads and on the need
for denial of service (DoS) protection.
</t>
<section title="Abbreviations">
<t>
The following abbreviations are used.
<list style="hanging" hangIndent="6">
<t hangText="AC">
Attachment Circuit (<xref target="RFC3985" />)
</t>
<t hangText="ACH">
Associated Channel Header (pseudowires)
</t>
<t hangText="ACK">
Acknowledgement (TCP flag and type of TCP packet)
</t>
<t hangText="AIS">
Alarm Indication Signal (MPLS-TP OAM)
</t>
<t hangText="ATM">
Asynchronous Transfer Mode (legacy switched circuits)
</t>
<t hangText="BFD">
Bidirectional Forwarding Detection
</t>
<t hangText="BGP">
Border Gateway Protocol
</t>
<t hangText="CC-CV">
Connectivity Check and Connectivity Verification
</t>
<t hangText="CE">
Customer Edge (LDP, RSVP-TE, other protocols)
</t>
<t hangText="CPU">
Central Processing Unit (computer or microprocessor)
</t>
<t hangText="CT">
Class Type (<xref target="RFC4124" />)
</t>
<t hangText="CW">
Control Word (<xref target="RFC4385" />)
</t>
<t hangText="DCCP">
Datagram Congestion Control Protocol
</t>
<t hangText="DDoS">
Distributed Denial of Service
</t>
<t hangText="DM">
Delay Measurement (MPLS-TP OAM)
</t>
<t hangText="DSCP">
Differentiated Services Code Point
(<xref target="RFC2474" />)
</t>
<t hangText="DWDM">
Dense Wave Division Multiplexing
</t>
<t hangText="DoS">
Denial of Service
</t>
<t hangText="E-LSP">
EXP-Inferred-PSC LSP (<xref target="RFC3270" />)
</t>
<t hangText="EBGP">
External BGP
</t>
<t hangText="ECMP">
Equal Cost Multi-Path
</t>
<t hangText="ECN">
Explicit Congestion Notification
(<xref target="RFC3168" /> and <xref target="RFC5129" />)
</t>
<t hangText="EL">
Entropy Label (<xref target="RFC6790" />)
</t>
<t hangText="ELI">
Entropy Label Indicator (<xref target="RFC6790" />)
</t>
<t hangText="EXP">
Experimental
(field in MPLS renamed to TC
in <xref target="RFC5462" />)
</t>
<t hangText="FEC">
Forwarding Equivalence Classes (LDP), also Forward Error
Correction in other context
</t>
<t hangText="FR">
Frame Relay (legacy switched circuits)
</t>
<t hangText="FRR">
Fast Reroute (<xref target="RFC4090" />)
</t>
<t hangText="G-ACh">
Generic Associated Channel (<xref target="RFC5586" />)
</t>
<t hangText="GAL">
Generic Associated Channel Label
(<xref target="RFC5586" />)
</t>
<t hangText="GFP">
Generic Framing Protocol (used in OTN)
</t>
<t hangText="GMPLS">
Generalized MPLS (<xref target="RFC3471" />)
</t>
<t hangText="GTSM">
Generalized TTL Security Mechanism
(<xref target="RFC5082" />)
</t>
<t hangText="Gb/s">
Gigabits per second (billion bits per second)
</t>
<t hangText="IANA">
Internet Assigned Numbers Authority
</t>
<t hangText="ILM">
Incoming Label Map (<xref target="RFC3031" />)
</t>
<t hangText="IP">
Internet Protocol
</t>
<t hangText="IPVPN">
Internet Protocol VPN
</t>
<t hangText="IPv4">
Internet Protocol version 4
</t>
<t hangText="IPv6">
Internet Protocol version 6
</t>
<t hangText="L-LSP">
Label-Only-Inferred-PSC LSP (<xref target="RFC3270" />)
</t>
<t hangText="L2VPN">
Layer 2 VPN
</t>
<t hangText="LDP">
Label Distribution Protocol (<xref target="RFC5036" />)
</t>
<t hangText="LER">
Label Edge Router (<xref target="RFC3031" />)
</t>
<t hangText="LM">
Loss Measurement (MPLS-TP OAM)
</t>
<t hangText="LSP">
Label Switched Path (<xref target="RFC3031" />)
</t>
<t hangText="LSR">
Label Switching Router (<xref target="RFC3031" />)
</t>
<t hangText="MP2MP">
Multipoint to Multipoint
</t>
<t hangText="MPLS">
MultiProtocol Label Switching (<xref target="RFC3031" />)
</t>
<t hangText="MPLS-TP">
MPLS Transport Profile
(<xref target="RFC5317" />)
</t>
<t hangText="Mb/s">
Megabits per second (million bits per second)
</t>
<t hangText="NSP">
Native Service Processing (<xref target="RFC3985" />)
</t>
<t hangText="NTP">
Network Time Protocol
</t>
<t hangText="OAM">
Operations, Administration, and Maintenance
(<xref target="RFC6291" />)
</t>
<t hangText="OOB">
Out-of-band (not carried within a data channel)
</t>
<t hangText="OTN">
Optical Transport Network
</t>
<t hangText="P">
Provider router (LDP, RSVP-TE, other protocols)
</t>
<t hangText="P2MP">
Point to Multi-Point
</t>
<t hangText="PE">
Provider Edge router (LDP, RSVP-TE, other protocols)
</t>
<t hangText="PHB">
Per-Hop-Behavior (<xref target="RFC2475" />)
</t>
<t hangText="PHP">
Penultimate Hop Popping (<xref target="RFC3443" />)
</t>
<t hangText="POS">
Packet over SONET
</t>
<t hangText="PSC">
This abbreviation has multiple interpretations.
<list style="numbers">
<t>
Packet Switch Capable (<xref target="RFC3471" />
</t>
<t>
PHB Scheduling Class (<xref target="RFC3270" />)
</t>
<t>
Protection State Coordination
(<xref target="RFC6378" />)
</t>
</list>
</t>
<t hangText="PTP">
Precision Time Protocol
</t>
<t hangText="PW">
Pseudowire
</t>
<t hangText="QoS">
Quality of Service
</t>
<t hangText="RA">
Router Alert (<xref target="RFC3032" />)
</t>
<t hangText="RDI">
Remote Defect Indication (MPLS-TP OAM)
</t>
<t hangText="RSVP-TE">
RSVP Traffic Engineering
</t>
<t hangText="RTP">
Real-Time Transport Protocol
</t>
<t hangText="SCTP">
Stream Control Transmission Protocol
</t>
<t hangText="SDH">
Synchronous Data Hierarchy (European SONET, a form of TDM)
</t>
<t hangText="SONET">
Synchronous Optical Network (US SDH, a form of TDM)
</t>
<t hangText="T-LDP">
Targeted LDP (LDP sessions over more than one hop)
</t>
<t hangText="TC">
Traffic Class (<xref target="RFC5462" />)
</t>
<t hangText="TCP">
Transmission Control Protocol
</t>
<t hangText="TDM">
Time-Division Multiplexing (legacy encapsulations)
</t>
<t hangText="TOS">
Type of Service (see <xref target="RFC2474" />)
</t>
<t hangText="TTL">
Time-to-live (a field in IP and MPLS headers)
</t>
<t hangText="UDP">
User Datagram Protocol
</t>
<t hangText="UHP">
Ultimate Hop Popping (opposite of PHP)
</t>
<t hangText="VCCV">
Virtual Circuit Connectivity Verification
(<xref target="RFC5085" />)
</t>
<t hangText="VLAN">
Virtual Local Area Network (Ethernet)
</t>
<t hangText="VOQ">
Virtual Output Queuing (switch fabric design)
</t>
<t hangText="VPN">
Virtual Private Network
</t>
<t hangText="WG">
Working Group
</t>
</list>
</t>
</section>
<section title="Use of Requirements Language">
<t>
This document is informational. The upper case
<xref target="RFC2119" />
key words "MUST", "MUST NOT", "SHOULD", "SHOULD NOT", and
"MAY" are used in this document in the following cases.
</t>
<t>
<list style="numbers">
<t>
RFC 2119 keywords are used where requirements stated in
this document are called for in referenced RFCs. In
most cases the RFC containing the requirement is cited
within the statement using an RFC 2119 keyword.
</t>
<t>
RFC 2119 keywords are used where explicitly noted that
the keywords indicate that operator experiences indicate
a requirement, but there are no existing RFC
requirements.
</t>
</list>
</t>
<t>
Advice provided by this document may be ignored by
implementations. Similarly, implementations not claiming
conformance to specific RFCs may ignore the requirements of
those RFCs. In both cases, implementers should consider the
risk of doing so.
</t>
</section>
<section title="Apparent Misconceptions">
<t>
In early generations of forwarding silicon (which might now be
behind us), there apparently were some misconceptions about
MPLS. The following statements provide clarifications.
<list style="numbers">
<t>
There are practical reasons to have more than one or two
labels in an MPLS label stack. Under some circumstances
the label stack can become quite deep.
See <xref target="sect.basics" />.
</t>
<t>
The label stack MUST be considered to be arbitrarily
deep. Section 3.27.4. "Hierarchy: LSP Tunnels within
LSPs" of RFC3031 states "The label stack mechanism
allows LSP tunneling to nest to any depth."
<xref target="RFC3031" />
If a bottom of the label stack cannot be found, but
sufficient number of labels exist to forward, an LSR
MUST forward the packet. An LSR MUST NOT assume the
packet is malformed unless the end of packet is found
before bottom of stack.
See <xref target="sect.basics" />.
</t>
<t>
In networks where deep label stacks are encountered,
they are not rare. Full packet rate performance is
required regardless of label stack depth, except where
multiple pop operations are required.
See <xref target="sect.basics" />.
</t>
<t>
Research has shown that long bursts of short packets
with 40 byte or 44 byte IP payload sizes in these
bursts are quite common.
This is due to TCP ACK compression
<xref target="ACK-compression" />.
The following two sub-bullets constitutes advice that
reflects very common non-negotiable requirements of providers.
Implementers may ignore this advice but should consider
the risk of doing so.
<list style="letters">
<t>
A forwarding engine SHOULD, if practical, be able to
sustain an arbitrarily long sequence of small packets
arriving at full interface rate.
</t>
<t>
If indefinite full packet rate for small packets is
not practical, a forwarding engine MUST be able to
buffer a long sequence of small packets inbound to
the on-chip decision engine and sustain full
interface rate for some reasonable average packet
rate. Absent this small on-chip buffering, QoS
agnostic packet drops can occur.
</t>
</list>
See <xref target="sect.pkt-rate" />.
</t>
<t>
The implementations and system designs MUST support
pseudowire control word (CW) if MPLS-TP is supported or if
ACH <xref target="RFC5586" /> is being used on a pseudowire.
The implementation and system design SHOULD support
pseudowire CW even if MPLS-TP and ACH
<xref target="RFC5586" />
are not used, using instead CW and VCCV Type 1
<xref target="RFC5085" />
to allow the use of multipath in the underlying network
topology without impacting the PW traffic.
<xref target="RFC7079" />
does note that there are still some deployments where
the CW is not always used. It also notes that
many service providers do enable the CW. See
<xref target="sect.pw-cw" />
for more discussion on why deployments SHOULD enable the
pseudowire CW.
</t>
</list>
</t>
<t>
The following statements provide clarification regarding
more recent requirements that are often missed.
<list style="numbers">
<t>
The implementer and system designer SHOULD support
adding a pseudowire Flow Label
<xref target="RFC6391" />. Deployments MAY enable this
feature for appropriate pseudowire types.
See <xref target="sect.fat-pw" />.
</t>
<t>
The implementer and system designer SHOULD support
adding an MPLS entropy label <xref target="RFC6790" />.
Deployments MAY enable this feature.
See <xref target="sect.entropy" />.
</t>
</list>
</t>
<t>
Non-IETF definitions of MPLS exist and these should not be
used as normative texts in place of the relevant IETF RFCs.
<xref target="RFC5704" />
documents incompatibilities between the IETF definition of
MPLS and one such alternative MPLS definition which led to
significant issues in the resulting non-IETF specification.
</t>
</section>
<section title="Target Audience">
<t>
This document is intended for multiple audiences:
implementer (implementing MPLS forwarding in silicon or in
software); systems designer (putting together a MPLS
forwarding systems); deployer (running an MPLS network).
These guidelines are intended to serve the following
purposes:
</t>
<t>
<list style="numbers">
<t>
Explain what to do and what not to do when a deep label
stack is encountered. (audience: implementer)
</t>
<t>
Highlight pitfalls to look for when implementing an MPLS
forwarding chip. (audience: implementer)
</t>
<t>
Provide a checklist of features and performance
specifications to request. (audience: systems
designer, deployer)
</t>
<t>
Provide a set of tests to perform. (audience: systems
designer, deployer).
</t>
</list>
</t>
<t>
The implementer, systems designer, and deployer have a
transitive supplier customer relationship. It is in the best
interest of the supplier to review their product against their
customer's checklist and secondary customer's checklist if
applicable.
</t>
<t>
This document identifies and explains many details and
potential pit-falls of MPLS forwarding. It is likely that
the identified set of potential pit-falls will later prove
to be an incomplete set.
</t>
</section>
</section>
<section anchor="sect.issues" title="Forwarding Issues">
<t>
A brief review of forwarding issues is provided in the
subsections that follow. This section provides some
background on why some of these requirements exist. The
questions to ask of suppliers is covered in
<xref target="sect.ask" />.
Some guidelines for testing are provided in
<xref target="sect.test" />.
</t>
<section anchor="sect.basics" title="Forwarding Basics">
<t>
Basic MPLS architecture and MPLS encapsulation, and
therefore packet forwarding are defined in <xref
target="RFC3031" /> and <xref target="RFC3032" />. RFC3031
and RFC3032 are somewhat LDP centric. RSVP-TE supports
traffic engineering (TE) and fast reroute, features that LDP
lacks. The base document for RSVP-TE based MPLS is <xref
target="RFC3209" />.
</t>
<t>
A few RFCs update RFC3032. Those with impact on forwarding
include the following.
<list style="numbers">
<t>
TTL processing is clarified in <xref target="RFC3443" />.
</t>
<t>
The use of MPLS Explicit NULL is modified in <xref
target="RFC4182" />.
</t>
<t>
Differentiated Services is supported by
<xref target="RFC3270" /> and <xref target="RFC4124" />.
The "EXP" field is renamed to "Traffic Class" in
<xref target="RFC5462" />, removing any misconception
that it was available for experimentation or could be
ignored.
</t>
<t>
ECN is supported by <xref target="RFC5129" />.
</t>
<t>
The MPLS G-ACh and GAL are defined in <xref
target="RFC5586" />.
</t>
<t>
<xref target="RFC5332" />
redefines the two data link layer codepoints for MPLS
packets.
</t>
</list>
</t>
<t>
Tunneling encapsulations carrying MPLS, such as
MPLS in IP <xref target="RFC4023" />,
MPLS in GRE <xref target="RFC4023" />,
MPLS in L2TPv3 <xref target="RFC4817" />,
or MPLS in UDP <xref target="I-D.ietf-mpls-in-udp" />,
are out of scope.
</t>
<t>
Other RFCs have implications to MPLS Forwarding and do not
update RFC3032 or RFC3209, including:
<list style="numbers">
<t>
The pseudowire (PW) Associated Channel Header (ACH),
defined by <xref target="RFC5085" />, later generalized
by the MPLS G-ACh <xref target="RFC5586" />.
</t>
<t>
The entropy label indicator (ELI) and entropy label (EL)
are defined by <xref target="RFC6790" />.
</t>
</list>
</t>
<t>
A few RFCs update RFC3209. Those that are listed as
updating RFC3209 generally impact only RSVP-TE signaling.
Forwarding is modified by major extension built upon
RFC3209.
</t>
<t>
RFCs which impact forwarding are discussed in the following
subsections.
</t>
<section anchor="sect.resv-labels"
title="MPLS Special Purpose Labels">
<t>
<xref target="RFC3032" /> specifies that label values 0-15
are special purpose labels with special meanings.
<xref target="I-D.ietf-mpls-special-purpose-labels" />
renamed these from the term "reserved labels" used in
[RFC3032] to "special purpose labels".
Three values
of NULL label are defined (two of which are later updated
by <xref target="RFC4182" />) and a router-alert label is
defined. The original intent was that special purpose labels,
except the NULL labels, could be sent to the routing
engine CPU rather than be processed in forwarding
hardware. Hardware support is required by new RFCs such
as those defining entropy label and OAM processed as a
result of receiving a GAL. For new special purpose labels, some
accommodation is needed for LSR that will send the labels
to a general purpose CPU or other highly programmable
hardware. For example, ELI will only be sent to LSR which
have signaled support for
<xref target="RFC6790" />
and high OAM packet rate must be negotiated among
endpoints.
</t>
<t>
<xref target="RFC3429" /> reserves a label for ITU-T
Y.1711, however Y.1711 does not work with multipath and
its use is strongly discouraged.
</t>
<t>
The current list of special purpose labels can be found on the
"Multiprotocol Label Switching Architecture (MPLS) Label
Values" registry reachable at IANA's pages at
<eref target="http://www.iana.org" />.
</t>
<t>
<xref target="I-D.ietf-mpls-special-purpose-labels" />
introduces an IANA "Extended Special Purpose MPLS Label
Values" registry and makes use of the "extension" label,
label 15, to indicate that the next label is an extended
special purpose label and requires special handling. The
range of only 16 values for special purpose labels allows
a table to be used. The range of extended special purpose
labels with 20 bits available for use may have to be
handled in some other way in the unlikely event that in
the future the range of currently reserved values
256-1048575 are used. If only the standards action range,
16-239, and the experimental range, 240-255, are used,
then a table of 256 entries can be used.
</t>
<t>
Unknown special purpose labels and unknown extended
special purpose labels are handled the same. When an
unknown special purpose label is encountered or a special
purpose label not directly handled in forwarding hardware
is encountered, the packet should be sent to a general
purpose CPU by default. If this capability is supported,
there must be an option to either drop or rate limit such
packets on a per special purpose label value basis.
</t>
</section>
<section anchor="sect.qos" title="MPLS Differentiated Services">
<t>
<xref target="RFC2474" />
deprecates the IP Type of Service (TOS) and IP Precedence
(Prec) fields and replaces them with the Differentiated
Services Field more commonly known as the Differentiated
Services Code Point (DSCP) field.
<xref target="RFC2475" />
defines the Differentiated Services architecture, which in
other forums, is often called a Quality of Service (QoS)
architecture.
</t>
<t>
MPLS uses the Traffic Class (TC) field to support
Differentiated Services <xref target="RFC5462" />. There
are two primary documents describing how DSCP is mapped
into TC.
<list style="numbers">
<t>
<xref target="RFC3270" />
defines E-LSP and L-LSP. E-LSP use a static mapping
of DSCP into TC. L-LSP uses a per LSP mapping of DSCP
into TC, with one PHB Scheduling Class (PSC) per
L-LSP. Each PSC can use multiple Per-Hop Behavior
(PHB) values. For example, the Assured Forwarding
service defines three PSC, each with three PHB
<xref target="RFC2597" />.
</t>
<t>
<xref target="RFC4124" />
defines assignment of a class-type (CT) to an LSP,
where a per CT static mapping of TC to PHB is used.
<xref target="RFC4124" />
provides a means to support up to eight E-LSP-like
mappings of DSCP to TC.
</t>
</list>
</t>
<t>
To meet Differentiated Services requirements specified in
<xref target="RFC3270" />, the following forwarding
requirements must be met.
An ingress LER MUST be able to select an LSP and then
apply a per LSP map of DSCP into TC. A midpoint LSR MUST
be able to apply a per LSP map of TC to PHB. The number
of mappings supported will be far less than the number of
LSP supported.
</t>
<t>
To meet Differentiated Services requirements specified in
<xref target="RFC4124" />, the following forwarding
requirements must be met. An ingress LER MUST be able to
select an LSP and then apply a per LSP map of DSCP into
TC. A midpoint LSR MUST be able to apply a per LSP map to
CT map and then use Class Type (CT) to map TC to PHB.
Since there are only eight allowed values of CT, only
eight maps of TC to PHB need to be supported. The LSP
label can be used directly to find the TC to PHB mapping,
as is needed to support <xref target="RFC3270" /> L-LSP.
</t>
<t>
While support for
<xref target="RFC4124" />
and not
<xref target="RFC3270" />
would allow support for only eight mappings of TC to PHB,
it is common to support both and simply state a limit on
the number of unique TC to PHB mappings which can be
supported.
</t>
</section>
<section anchor="sect.time-sync" title="Time Synchronization">
<t>
PTP or NTP may be carried over MPLS
<xref target="I-D.ietf-tictoc-1588overmpls" />. Generally
NTP will be carried within IP with IP carried in MPLS
<xref target="RFC5905" />. Both PTP and NTP benefit from
accurate time stamping of incoming packets and the ability
to insert accurate time stamps in outgoing packets.
PTP correction which occurs when forwarding requires
updating a timestamp compensation field based on the
difference between packet arrival at an LSR and packet
transmit time at that same LSR.
</t>
<t>
Since the label stack depth may vary, hardware should
allow a timestamp to be placed in an outgoing packet at
any specified byte position. It may be necessary to
modify layer-2 checksums or frame check sequences after
insertion. PTP and NTP timestamp formats differ in such
a way as to require different implementations of the
timestamp correction.
If NTP or PTP is carried over UDP/IP or UDP/IP/MPLS, the
UDP checksum will also have to be updated.
</t>
<t>
Accurate time synchronization in addition to being
generally useful is required for MPLS-TP delay measurement
(DM) OAM. See <xref target="sect.tp-oam" />.
</t>
</section>
<section anchor="sect.early-deep"
title="Uses of Multiple Label Stack Entries">
<t>
MPLS deployments in the early part of the prior decade
(circa 2000) tended to support either LDP or RSVP-TE. LDP
was favored by some for its ability to scale to
a very large number of PE devices at the edge of the
network, without adding deployment complexity. RSVP-TE
was favored, generally in the network core, where traffic
engineering and/or fast reroute were considered important.
</t>
<t>
Both LDP and RSVP-TE are used simultaneously within major
Service Provider networks using a technique known as "LDP
over RSVP-TE Tunneling".
This technique allows service providers to carry LDP
tunnels inside RSVP-TE tunnels. This makes it possible to
take advantage of the Traffic Engineering and Fast
Re-Route on more expensive Inter-City and
Inter-Continental transport paths. The ingress RSVP-TE PEs
places many LDP tunnels on a single RSVP-TE LSP and
carries it to the egress RSVP-TE PE. The LDP PEs are
situated further from the core, for example within a metro
network.
LDP over RSVP-TE
tunneling requires a minimum of two MPLS labels: one each
for LDP and RSVP-TE.
</t>
<t>
The use of MPLS FRR <xref target="RFC4090" /> might add one
more label to MPLS traffic, but only when FRR protection
is in use (active). If LDP over RSVP-TE is in use, and FRR
protection is in use, then at least three MPLS labels are
present on the label stack on the links through which the
Bypass LSP traverses. FRR is covered in
<xref target="sect.frr" />.
</t>
<t>
LDP L2VPN, LDP IPVPN, BGP L2VPN, and BGP IPVPN added
support for VPN services that are deployed by the vast
majority of service providers. These VPN services added
yet another label, bringing the label stack depth (when
FRR is active) to four.
</t>
<t>
Pseudowires and VPN are discussed in further detail in
<xref target="sect.pw" /> and
<xref target="sect.vpn" />.
</t>
<t>
MPLS hierarchy as described in
<xref target="RFC4206" />
and updated by
<xref target="RFC7074" />
can in principle add at least one additional label. MPLS
hierarchy is discussed in
<xref target="sect.hierarchy" />.
</t>
<t>
Other features such as Entropy Label (discussed in
<xref target="sect.entropy" />) and Flow Label (discussed
in <xref target="sect.fat-pw" />) can add additional
labels to the label stack.
</t>
<t>
Although theoretical scenarios can easily result in eight
or more labels, such cases are rare if they occur at all
today. For the purpose of forwarding, only the top label
needs to be examined if PHP is used, a few more if UHP is
used (see <xref target="sect.tp-uhp" />). For deep label
stacks, quite a few labels may have to be examined for the
purpose of load balancing across parallel links (see
<xref target="sect.multipath" />), however this depth can
be bounded by a provider through use of Entropy Label.
</t>
<t>
Other creative use of MPLS within the IETF, such as the
use of MPLS label stack in source routing, may result in
label stacks that are considerably deeper than those
encountered today.
</t>
</section>
<section anchor="sect.link-bundle" title="MPLS Link Bundling">
<t>
MPLS Link Bundling was the first RFC to address the need for
multiple parallel links between nodes <xref target="RFC4201"
/>. MPLS Link Bundling is notable in that it tried not to
change MPLS forwarding, except in specifying the "All-Ones"
component link. MPLS Link Bundling is seldom if ever
deployed. Instead multipath techniques described in <xref
target="sect.multipath" /> are used.
</t>
</section>
<section anchor="sect.hierarchy" title="MPLS Hierarchy">
<t>
MPLS hierarchy is defined in <xref target="RFC4206" /> and
updated by <xref target="RFC7074" />.
Although RFC4206 is considered part of GMPLS, the Packet
Switching Capable (PSC) portion of the MPLS hierarchy are
applicable to MPLS and may be supported in an otherwise
GMPLS free implementation. The MPLS PSC hierarchy remains
the most likely means of providing further scaling in an
RSVP-TE MPLS network, particularly where the network is
designed to provide RSVP-TE connectivity to the edges.
This is the case for envisioned MPLS-TP networks. The use
of the MPLS PSC hierarchy can add at least one additional
label to a label stack, though it is likely that only one
layer of PSC will be used in the near future.
</t>
</section>
<section anchor="sect.frr" title="MPLS Fast Reroute (FRR)">
<t>
Fast reroute is defined by <xref target="RFC4090" />. Two
significantly different methods are defined in RFC4090,
the "One-to-One Backup" method which uses the "Detour LSP"
and the "Facility Backup" which uses a "bypass tunnel".
These are commonly referred to as the detour and bypass
methods respectively.
</t>
<t>
The detour method makes use of a presignaled LSP.
Hardware assistance is needed for detour FRR only if
necessary to accomplish local repair of a large number of
LSP within the 10s of milliseconds target. For each
affected LSP a swap operation must be reprogrammed or
otherwise switched over. The use of detour FRR doubles
the number of LSP terminating at any given hop and will
increase the number of LSP within a network by a factor
dependent on the average detour path length.
</t>
<t>
The bypass method makes use of a tunnel that is unused
when no fault exists but may carry many LSP when a local
repair is required. There is no presignaling indicating
which working LSP will be diverted into any specific
bypass LSP.
If interface label space is used the bypass LSP MUST
extend one hop beyond the merge point, except if the merge
point is the egress and PHP is used.
If the bypass LSP are not extended in this way, then
the merge LSR (egress LSR of the bypass LSP)
MUST use platform label space (as defined in
<xref target="RFC3031" />) so that an LSP working path on
any given interface can be backed up using a bypass LSP
terminating on any other interface. Hardware assistance
is needed if necessary to accomplish local repair of a
large number of LSP within the 10s of milliseconds target.
For each affected LSP a swap operation must be
reprogrammed or otherwise switched over with an additional
push of the bypass LSP label. The use of
platform label space impacts the size of the LSR ILM for
LSR with a very large number of interfaces.
</t>
<t>
IP/LDP Fast Reroute (IP/LDR FRR)
<xref target="RFC5714" />
is also applicable in MPLS networks.
ECMP and Loop-Free
Alternates (LFA)
<xref target="RFC5286" />
are well established IP/LDP FRR techniques and were the
first methods to be widely deployed.
Work on IP/LDP FRR is ongoing within the IETF RTGWG.
Two topics actively discussed in RTGWG are
microloops and partial coverage of the established
techniques in some network topologies.
<xref target="RFC5715" />
covers the topic of IP/LDP Fast Reroute microloops and
microloops prevention.
RTGWG has developed additional IP/LDP FRR techniques to
handle coverage concerns. RTGWG is extending LFA through
the use of remote LFA
<xref target="I-D.ietf-rtgwg-remote-lfa" />.
Other techniques that require new forwarding paths to be
established are also under consideration, including the
IPFRR "not-via" technique defined in
<xref target="RFC6981" />
and maximally redundant trees (MRT)
<xref target="I-D.ietf-rtgwg-mrt-frr-architecture" />.
ECMP, LFA (but not remote LFA) and MRT swap the top label
to an alternate MPLS label. The other methods operate in a
similar manner to RFC 4090 facility backup and push an
additional label.
IP/LDP FRR methods which push more than one label have
been suggested but are in early discussion.
</t>
</section>
<section anchor="sect.pw" title="Pseudowire Encapsulation">
<t>
The pseudowire (PW) architecture is defined in
<xref target="RFC3985" />.
A pseudowire, when carried over MPLS, adds one or more
additional label entries to the MPLS label stack.
A PW Control Word is defined in
<xref target="RFC4385" />
with motivation for defining the control word in
<xref target="RFC4928" />.
The PW Associated Channel defined in <xref
target="RFC4385" /> is used for OAM in <xref
target="RFC5085" />.
The PW Flow Label is defined in
<xref target="RFC6391" />
and is discussed further in this document in
<xref target="sect.fat-pw" />.
</t>
<t>
There are numerous pseudowire encapsulations, supporting
emulation of services such as Frame Relay, ATM, Ethernet,
TDM, and SONET/SDH over packet switched networks (PSNs)
using IP or MPLS.
</t>
<t>
The pseudowire encapsulation is out of scope for this
document. Pseudowire impact on MPLS forwarding at
midpoint LSR is within scope. The impact on ingress MPLS
push and egress MPLS UHP pop are within scope. While
pseudowire encapsulation is out of scope, some advice is
given on sequence number support.
</t>
<section anchor="sect.pw-seq" title="Pseudowire Sequence Number">
<t>
Pseudowire (PW) sequence number support is most
important for PW payload types with a high expectation
of lossless and/or in-order delivery. Identifying lost
PW packets and the exact amount of lost payload is critical
for PW services which maintain bit timing, such as Time
Division Multiplexing (TDM) services since these
services MUST compensate lost payload on a bit-for-bit
basis.
</t>
<t>
With PW services which maintain bit timing, packets that
have been received out of order also MUST be identified
and MAY be either re-ordered or dropped. Resequencing
requires, in addition to sequence numbering, a "reorder
buffer" in the egress PE, and ability to reorder is
limited by the depth of this buffer. The down side of
maintaining a large reorder buffer is added end-to-end
service delay.
</t>
<t>
For PW services which maintain bit timing or any other
service where jitter must be bounded, a jitter buffer is
always necessary. The jitter buffer is needed
regardless of whether reordering is done. In order to
be effective, a reorder buffer must often be larger than
a jitter buffer needs to be creating a tradeoff between
reducing loss and minimizing delay.
</t>
<t>
PW services which are not timing critical bit streams in
nature are cell oriented or frame oriented. Though
resequencing support may be beneficial to PW cell and
frame oriented payloads such as ATM, FR and Ethernet,
this support is desirable but not required.
Requirements to handle out of order packets at all vary
among services and deployments. For example for
Ethernet PW, occasional (very rare) reordering is
usually acceptable. If the Ethernet PW is carrying
MPLS-TP, then this reordering may be acceptable.
</t>
<t>
Reducing jitter is best done by an end-system, given
that the tradeoff of loss vs delay varies among
services. For example with interactive real time
services low delay is preferred, while with
non-interactive (one way) real time services low loss is
preferred. The same end-site may be receiving both
types of traffic. Regardless of this, bounded jitter is
sometimes a requirement for specific deployments.
</t>
<t>
Packet reordering should be rare except in a small number
of circumstances, most of which are due to network
design or equipment design errors:
<list style="numbers">
<t>
The most common case is where reordering is rare,
occurring only when a network or equipment
fault forces traffic on a new path with different
delay. The packet loss that accompanies a network or
equipment fault is generally more disruptive than
any reordering which may occur.
</t>
<t>
A path change can be caused by reasons other than a
network or equipment fault, such as administrative
routing change. This may result in packet
reordering but generally without any packet loss.
</t>
<t>
If the edge is not using pseudowire control word
(CW) and the core is using multipath, reordering
will be far more common. If this is occurring,
using CW on the edge will solve the problem.
Without CW, resequencing is not possible since the
sequence number is contained in the CW.
</t>
<t>
Another avoidable case is where some core equipment
has multipath and for some reason insists on
periodically installing a new random number as the
multipath hash seed. If supporting MPLS-TP,
equipment MUST provide a means to disable periodic
hash reseeding and deployments MUST disable periodic
hash reseeding. Operator experience dictates that
even if not supporting MPLS-TP, equipment SHOULD
provide a means to disable periodic hash reseeding
and deployments SHOULD disable periodic hash
reseeding.
</t>
</list>
</t>
<t>
In provider networks which use multipath techniques and
which may occasionally rebalance traffic or which may
change PW paths occasionally for other reasons,
reordering may be far more common than loss. Where
reordering is more common than loss, resequencing
packets is beneficial, rather than dropping packets at
egress when out of order arrival occurs. Resequencing is
most important for PW payload types with a high
expectation of lossless delivery since in such cases out
of order delivery within the network results in PW loss.
</t>
</section>
</section>
<section anchor="sect.vpn" title="Layer-2 and Layer-3 VPN">
<t>
Layer-2 VPN
<xref target="RFC4664" />
and Layer-3 VPN
<xref target="RFC4110" />
add one or more label entry to the MPLS label stack. VPN
encapsulations are out of scope for this document. Its
impact on forwarding at midpoint LSR are within scope.
</t>
<t>
Any of these services may be used on an MPLS entropy label
enabled ingress and egress (see
<xref target="sect.entropy" />
for discussion of entropy label) which would add an
additional two labels to the MPLS label stack. The need to
provide a useful entropy label value impacts the
requirements of the VPN ingress LER but is out of
scope for this document.
</t>
</section>
</section>
<section anchor="sect.mcast" title="MPLS Multicast">
<t>
MPLS Multicast encapsulation is clarified in
<xref target="RFC5332" />.
MPLS Multicast may be signaled using RSVP-TE
<xref target="RFC4875" />
or LDP
<xref target="RFC6388" />.
</t>
<t>
<xref target="RFC4875" />
defines a root initiated RSVP-TE LSP setup rather than leaf
initiated join used in IP multicast.
<xref target="RFC6388" />
defines a leaf initiated LDP setup.
Both
<xref target="RFC4875" />
and
<xref target="RFC6388" />
define point to multipoint (P2MP) LSP setup.
<xref target="RFC6388" />
also defined multipoint to multipoint (MP2MP) LSP setup.
</t>
<t>
The P2MP LSP have a single source. An LSR may be a leaf
node, an intermediate node, or a "bud" node. A bud serves
as both a leaf and intermediate. At a leaf an MPLS pop is
performed. The payload may be a IP Multicast packet that
requires further replication. At an intermediate node a
MPLS swap operation is performed. The bud requires that
both a pop operation and a swap operation be performed for
the same incoming packet.
</t>
<t>
One strategy to support P2MP functionality is to pop at the
LSR interface serving as ingress to the P2MP traffic and
then optionally push labels at each LSR interface serving as
egress to the P2MP traffic at that same LSR. A given LSR
egress chip may support multiple egress interfaces, each of
which requires a copy, but each with a different set of
added labels and layer-2 encapsulation. Some physical
interfaces may have multiple sub-interfaces (such as
Ethernet VLAN or channelized interfaces) each requiring a
copy.
</t>
<t>
If packet replication is performed at LSR ingress, then the
ingress interface performance may suffer. If the packet
replication is performed within a LSR switching fabric and
at LSR egress, congestion of egress interfaces cannot make
use of backpressure to ingress interfaces using techniques
such as virtual output queuing (VOQ). If buffering is
primarily supported at egress, then the need for
backpressure is minimized. There may be no good solution
for high volumes of multicast traffic if VOQ is used.
</t>
<t>
Careful consideration should be given to the performance
characteristics of high fanout multicast for equipment that
is intended to be used in such a role.
</t>
<t>
MP2MP LSP differ in that any branch may provide an input,
including a leaf. Packets must be replicated onto all other
branches. This forwarding is often implemented as multiple
P2MP forwarding trees, one for each potential input
interface at a given LSR.
</t>
</section>
<section anchor="sect.pkt-rate" title="Packet Rates">
<t>
While average packet size of Internet traffic may be large,
long sequences of small packets have both been predicted in
theory and observed in practice. Traffic compression and
TCP ACK compression can conspire to create long sequences of
packets of 40-44 bytes in payload length. If carried over
Ethernet, the 64 byte minimum payload applies, yielding a
packet rate of approximately 150 Mpps (million packets per
second) for the duration of the burst on a nominal 100 Gb/s
link. The peak rate for other encapsulations can be as high
as 250 Mpps (for example IP or MPLS encapsulated using GFP
over OTN ODU4).
</t>
<t>
It is possible that the packet rates achieved by a specific
implementation is acceptable for a minimum payload size,
such as 64 byte (64B) payload for Ethernet, but the achieved
rate declines to an unacceptable level for other packet
sizes, such as 65B payload. There are other packet rates of
interest besides TCP ACK. For example, a TCP ACK carried
over an Ethernet PW over MPLS over Ethernet may occupy 82B
or 82B plus an increment of 4B if additional MPLS labels are
present.
</t>
<t>
A graph of packet rate vs. packet size often displays a
sawtooth. The sawtooth is commonly due to a memory
bottleneck and memory widths, sometimes internal cache, but
often a very wide external buffer memory interface. In some
cases it may be due to a fabric transfer width. A fine
packing, rounding up to the nearest 8B or 16B will result in
a fine sawtooth with small degradation for 65B, and even
less for 82B packets. A course packing, rounding up to 64B
can yield a sharper drop in performance for 65B packets, or
perhaps more important, a larger drop for 82B packets.
</t>
<t>
The loss of some TCP ACK packets are not the primary concern
when such a burst occurs. When a burst occurs, any other
packets, regardless of packet length and packet QoS are
dropped once on-chip input buffers prior to the decision
engine are exceeded. Buffers in front of the packet
decision engine are often very small or non-existent (less
than one packet of buffer) causing significant QoS agnostic
packet drop.
</t>
<t>
Internet service providers and content providers at one time
specified full rate forwarding with 40 byte payload packets
as a requirement. Today, this requirement often can be
waived if the provider can be convinced that when long
sequence of short packets occur no packets will be dropped.
</t>
<t>
Many equipment suppliers have pointed out that the extra
cost in designing hardware capable of processing the minimum
size packets at full line rate is significant for very high
speed interfaces. If hardware is not capable of processing
the minimum size packets at full line rate, then that
hardware MUST be capable of handling large burst of small
packets, a condition which is often observed. This level of
performance is necessary to meet Differentiated Services
<xref target="RFC2475" />
requirements for without it, packets are lost prior to
inspection of the IP DSCP field
<xref target="RFC2474" />
or MPLS TC field <xref target="RFC5462" />.
</t>
<t>
With adequate on-chip buffers before the packet decision
engine, an LSR can absorb a long sequence of short packets.
Even if the output is slowed to the point where light
congestion occurs, the packets, having cleared the decision
process, can make use of larger VOQ or output side buffers
and be dealt with according to configured QoS treatment,
rather than dropped completely at random.
</t>
<t>
These on-chip buffers need not contribute significant delay
since they are only used when the packet decision engine is
unable to keep up, not in response to congestion, plus these
buffers are quite small. For example, an on-chip buffer
capable of handling 4K packets of 64 bytes in length, or
256KB, corresponds to 200 usec on a 10 Gb/s link and 20 usec
on a 100 Gb/s link. If the packet decision engine is
capable of handling packets at 90% of the full rate for
small packets, then the maximum added delay is 20 usec and
2 usec respectively, and this delay only applies if a 4K
burst of short packets occurs. When no burst of short
packets was being processed, no delay is added. These
buffers are only needed on high speed interfaces where it is
difficult to process small packets at full line rate.
</t>
<t>
Packet rate requirements apply regardless of which network
tier equipment is deployed in. Whether deployed in the
network core or near the network edges, one of the two
conditions MUST be met if Differentiated Services
requirements are to be met:
<list style="numbers">
<t>
Packets must be processed at full line rate with minimum
sized packets. -OR-
</t>
<t>
Packets must be processed at a rate well under generally
accepted average packet sizes, with sufficient buffering
prior to the packet decision engine to accommodate long
bursts of small packets.
</t>
</list>
</t>
</section>
<section anchor="sect.multipath" title="MPLS Multipath Techniques">
<t>
In any large provider, service providers and content
providers, hash based multipath techniques are used in the
core and in the edge. In many of these providers hash based
multipath is also used in the larger metro networks.
</t>
<t>
The Differentiated Services requirements for good reasons
dictate that packets within a common microflow SHOULD NOT be
reordered [RFC2474]. Service providers generally impose
stronger requirements, commonly requiring that packets
within a microflow MUST NOT be reordered except in rare
circumstances such as load balancing across multiple links
or path change for load balancing or path change for other
reason.
</t>
<t>
The most common multipath techniques are ECMP applied at
the IP forwarding level, Ethernet LAG with inspection of the
IP payload, and multipath on links carrying both IP and
MPLS, where the IP header is inspected below the MPLS label
stack. In most core networks, the vast majority of traffic
is MPLS encapsulated.
</t>
<t>
In order to support an adequately balanced load distribution
across multiple links, IP header information must be used.
Common practice today is to reinspect the IP headers at each
LSR and use the label stack and IP header information in a
hash performed at each LSR. Further details are provided in
<xref target="sect.mp-hash" />.
</t>
<t>
The use of this technique is so ubiquitous in provider
networks that lack of support for multipath makes any
product unsuitable for use in large core networks. This
will continue to be the case in the near future, even as
deployment of MPLS entropy label begins to relax the core
LSR multipath performance requirements given the existing
deployed base of edge equipment without the ability to add
an entropy label.
</t>
<t>
A generation of edge equipment supporting the ability to add
an MPLS entropy label is needed before the performance
requirements for core LSR can be relaxed. However, it is
likely that two generations of deployment in the future will
allow core LSR to support full packet rate only when a
relatively small number of MPLS labels need to be inspected
before hashing. For now, don't count on it.
</t>
<t>
Common practice today is to reinspect the packet at each LSR
and use information from the packet combined plus a hash seed
that is selected by each LSR. Where flow labels or entropy
labels are used, a hash seed must be used when creating
these labels.
</t>
<section anchor="sect.pw-cw" title="Pseudowire Control Word">
<t>
Within the core of a network some form of multipath is
almost certain to be used. Multipath techniques deployed
today are likely to be looking beneath the label stack for
an opportunity to hash on IP addresses.
</t>
<t>
A pseudowire encapsulated at a network edge must have a
means to prevent reordering within the core if the
pseudowire will be crossing a network core, or any part of
a network topology where multipath is used
(see <xref target="RFC4385" />
and <xref target="RFC4928" />).
</t>
<t>
Not supporting the ability to encapsulate a pseudowire
with a control word may lock a product out from
consideration. A pseudowire capability without control
word support might be sufficient for applications that
are strictly both intra-metro and low bandwidth. However
a provider with other applications will very likely not
tolerate having equipment which can only support a subset
of their pseudowire needs.
</t>
</section>
<section anchor="sect.large-uflow" title="Large Microflows">
<t>
Where multipath makes use of a simple hash and simple load
balance such as modulo or other fixed allocation (see
<xref target="sect.multipath" />) the presence of large
microflows that each consumes 10% of the capacity of a
component link of a potentially congested composite link,
one such microflow can upset the traffic balance and more
than one can in effect reduce the effective capacity of
the entire composite link by more than 10%.
</t>
<t>
When even a very small number of large microflows are
present, there is a significant probability that more
than one of these large microflows could fall on the same
component link. If the traffic contribution from large
microflows is small, the probability for three or more
large microflows on the same component link drops
significantly. Therefore in a network where a significant
number of parallel 10 Gb/s links exists, even a 1 Gb/s
pseudowire or other large microflow that could not
otherwise be subdivided into smaller flows should carry a
flow label or entropy label if possible.
</t>
<t>
Active management of the hash space to better accommodate
large microflows has been implemented and deployed in the
past, however such techniques are out of scope for this
document.
</t>
</section>
<section anchor="sect.fat-pw" title="Pseudowire Flow Label">
<t>
Unlike a pseudowire control word, a pseudowire flow label
<xref target="RFC6391" />, is required only for relatively
large capacity pseudowires. There are many cases where a
pseudowire flow label makes sense. Any service such as a
VPN which carries IP traffic within a pseudowire can make
use of a pseudowire flow label.
</t>
<t>
Any pseudowire carried over MPLS which makes use of the
pseudowire control word and does not carry a
flow label is in effect a single microflow (in
<xref target="RFC2475" /> terms) and may result in the
types of problems described in
<xref target="sect.large-uflow" />.
</t>
</section>
<section anchor="sect.entropy" title="MPLS Entropy Label">
<t>
The MPLS entropy label simplifies flow group
identification <xref target="RFC6790" /> at midpoint LSRs.
Prior to the MPLS entropy label midpoint LSRs needed to
inspect the entire label stack and often the IP headers to
provide an adequate distribution of traffic when using
multipath techniques (see <xref target="sect.mp-hash" />).
With the use of MPLS entropy label, a hash can be
performed closer to network edges, placed in the label
stack, and used by midpoint LSRs without fully reinspecting
the label stack and inspecting the payload.
</t>
<t>
The MPLS entropy label is capable of avoiding full label
stack and payload inspection within the core where
performance levels are most difficult to achieve (see
<xref target="sect.pkt-rate" />).
The label stack inspection can be terminated as soon as the
first entropy label is encountered, which is generally after a
small number of labels are inspected.
</t>
<t>
In order to provide these benefits in the core, LSR closer
to the edge must be capable of adding an entropy label.
This support may not be required in the access tier, the
tier closest to the customer, but is likely to be required
in the edge or the border to the network core. LSR peering
with external networks will also need to be able to add an
entropy label on incoming traffic.
</t>
</section>
<section anchor="sect.mp-hash"
title="Fields Used for Multipath Load Balance">
<t>
The most common multipath techniques are based on a hash
over a set of fields. Regardless of whether a hash is
used or some other method is used, the there is a limited
set of fields which can safely be used for multipath.
</t>
<section anchor="sect.label-hash" title="MPLS Fields in Multipath">
<t>
If the "outer" or "first" layer of encapsulation is
MPLS, then label stack entries are used in the hash.
Within a finite amount of time (and for small packets
arriving at high speed that time can be quite limited)
only a finite number of label entries can be inspected.
Pipelined or parallel architectures improve this, but
the limit is still finite.
</t>
<t>
The following guidelines are provided for use of MPLS
fields in multipath load balancing.
<list style="numbers">
<t>
Only the 20 bit label field SHOULD be used. The TTL
field SHOULD NOT be used. The S bit MUST NOT be
used. The TC field (formerly EXP) MUST NOT be used.
See text following this list for reasons.
</t>
<t>
If an ELI label is found, then if the LSR supports
entropy label, the EL label field in the next label
entry (the EL) SHOULD be used and label entries
below that label SHOULD NOT be used and the MPLS
payload SHOULD NOT be used.
See below this list for reasons.
</t>
<t>
Special purpose labels (label values 0-15) MUST NOT
be used. Extended special purpose labels (any label
following label 15) MUST NOT be used. In
particular, GAL and RA MUST NOT be used so that OAM
traffic follows the same path as payload packets
with the same label stack.
</t>
<t>
If a new special purpose label or extended special
purpose label is defined which requires special load
balance processing, then, as is the case for the ELI
label, a special action may be needed rather than
skipping the special purpose label or extended
special purpose label.
</t>
<t>
The most entropy is generally found in the label
stack entries near the bottom of the label stack
(innermost label, closest to S=1 bit). If the
entire label stack cannot be used (or entire stack
up to an EL), then it is better to use as many
labels as possible closest to the bottom of stack.
</t>
<t>
If no ELI is encountered, and the first nibble of
payload contains a 4 (IPv4) or 6 (IPv6), an
implementation SHOULD support the ability to
interpret the payload as IPv4 or IPv6 and extract
and use appropriate fields from the IP headers.
This feature is considered a non-negotiable requirement by
many service providers. If supported, there MUST be
a way to disable it (if, for example, PW without CW
are used). This ability to disable this feature is
considered a non-negotiable requirement by many service
providers. Therefore an implementation has a very
strong incentive to support both options.
</t>
<t>
A label which is popped at egress (UHP pop) SHOULD
NOT be used. A label which is popped at the
penultimate hop (PHP pop) SHOULD be used.
</t>
</list>
</t>
<t>
Apparently some chips have made use of the TC (formerly
EXP) bits as a source of entropy. This is very harmful
since it will reorder Assured Forwarding (AF) traffic
<xref target="RFC2597" />
when a subset does not conform to the configured rates
and is remarked but not dropped at a prior LSR. Traffic
which uses MPLS ECN
<xref target="RFC5129" />
can also be reordered if TC is used for entropy.
Therefore, as stated in the guidelines above, the TC
field (formerly EXP) MUST NOT be used in multipath load
balancing as it violates Differentiated Services Ordered
Aggregate (OA) requirements in these two instances.
</t>
<t>
Use of the MPLS label entry S bit would result in
putting OAM traffic on a different path if the addition
of a GAL at the bottom of stack removed the S bit from
the prior label.
</t>
<t>
If an ELI label is found, then if the LSR supports
entropy label, the EL label field in the next label
entry (the EL) SHOULD be used and the search for
additional entropy within the packet SHOULD be
terminated. Failure to terminate the search will impact
client MPLS-TP LSP carried within server MPLS LSP. A
network operator has the option to use administrative
attributes as a means to identify LSR which do not
terminate the entropy search at the first EL.
Administrative attributes are defined in
<xref target="RFC3209" />. Some configuration is
required to support this.
</t>
<t>
If the label removed by a PHP pop is not used, then for
any PW for which CW is used, there is no basis for
multipath load split. In some networks it is infeasible
to put all PW traffic on one component link. Any PW
which does not use CW will be improperly split
regardless of whether the label removed by a PHP pop is
used. Therefore the PHP pop label SHOULD be used as
recommended above.
</t>
</section>
<section anchor="sect.ip-hash" title="IP Fields in Multipath">
<t>
Inspecting the IP payload provides the most entropy in
provider networks. The practice of looking past the
bottom of stack label for an IP payload is well accepted
and documented in
<xref target="RFC4928" />
and in other RFCs.
</t>
<t>
Where IP is mentioned in the document, both IPv4 and
IPv6 apply. All LSRs MUST fully support IPv6.
<!-- or face the wrath of Shane -->
</t>
<t>
When information in the IP header is used, the following
guidelines apply:
<list style="numbers">
<t>
Both the IP source address and IP destination
address SHOULD be used. There MAY be an option to
reverse the order of these addresses, improving the
ability to provide symmetric paths in some cases.
Many service providers require that both addresses
be used.
</t>
<t>
Implementations SHOULD allow inspection of the IP
protocol field and use of the UDP or TCP port
numbers. For many service providers this feature is
considered mandatory, particularly for enterprise,
data center, or edge equipment. If this feature is
provided, it SHOULD be possible to disable use of
TCP and UDP ports. Many service providers consider
it a non-negotiable requirement that use of UDP and TCP ports
can be disabled. Therefore there is a strong
incentive for implementations to provide both
options.
</t>
<t>
Equipment suppliers MUST NOT make assumptions that
because the IP version field is equal to 4 (an IPv4
packet) that the IP protocol will either be TCP (IP
protocol 6) or UDP (IP protocol 17) and blindly
fetch the data at the offset where the TCP or UDP
ports would be found. With IPv6, TCP and UDP port
numbers are not at fixed offsets. With IPv4 packets
carrying IP options, TCP and UDP port numbers are
not at fixed offsets.
</t>
<t>
The IPv6 header flow field SHOULD be used. This is
the explicit purpose of the IPv6 flow field, however
observed flow fields rarely contains a non-zero
value. Some uses of the flow field have been
defined such as <xref target="RFC6438" />. In the
absence of MPLS encapsulation, the IPv6 flow field
can serve a role equivalent to entropy label.
</t>
<t>
Support for other protocols that share a common
Layer-4 header such as
RTP <xref target="RFC3550" />,
UDP-Lite <xref target="RFC3828" />,
SCTP <xref target="RFC4960" /> and
DCCP <xref target="RFC4340" />
SHOULD be provided, particularly for edge or access
equipment where additional entropy may be needed.
Equipment SHOULD also use RTP, UDP-lite, SCTP and
DCCP headers when creating an entropy label.
</t>
<t>
<!-- request to break this up into sub-bullets -->
The following IP header fields should not or must
not be used:
<list style="letters">
<t>
Similar to avoiding TC in MPLS, the IP DSCP, and
ECN bits MUST NOT be used.
</t>
<t>
The IPv4 TTL or IPv6 Hop Count SHOULD NOT be
used.
</t>
<t>
Note that the IP TOS field was deprecated
(<xref target="RFC0791" /> was updated by
<xref target="RFC2474" />).
No part of the IP DSCP field can be used
(formerly IP PREC and IP TOS bits).
</t>
</list>
</t>
<t>
Some IP encapsulations support tunneling, such as
IP-in-IP, GRE, L2TPv3, and IPSEC. These provide a
greater source of entropy which some provider
networks carrying large amounts of tunneled traffic
may need, for example as used in
<xref target="RFC5640" />
for GRE and L2TPv3. The use of tunneling header
information is out of scope for this document.
</t>
</list>
</t>
<t>
This document makes the following recommendations.
These recommendations are not required to claim
compliance to any existing RFC therefore implementers
are free to ignore them, but due to service provider
requirements should consider the risk of doing so.
The use of IP addresses MUST be supported and TCP and
UDP ports (conditional on the protocol field and
properly located) MUST be supported. The ability to
disable use of UDP and TCP ports MUST be available.
</t>
<t>
Though potentially very useful in some networks, it is
uncommon to support using payloads of tunneling
protocols carried over IP. Though the use of tunneling
protocol header information is out of scope for this
document, it is not discouraged.
</t>
</section>
<section anchor="sect.fl-gen" title="Fields Used in Flow Label">
<t>
The ingress to a pseudowire (PW) can extract information
from the payload being encapsulated to create a flow
label. <xref target="RFC6391" /> references IP carried
in Ethernet as an example. The Native Service
Processing (NSP) function defined in
<xref target="RFC3985" /> differs with pseudowire type.
It is in the NSP function where information for a
specific type of PW can be extracted for use in a flow
label. Which fields to use for any given PW NSP is out
of scope for this document.
</t>
</section>
<section anchor="sect.el-gen" title="Fields Used in Entropy Label">
<t>
An entropy label is added at the ingress to an LSP. The
payload being encapsulated is most often MPLS, a PW, or
IP. The payload type is identified by the layer-2
encapsulation (Ethernet, GFP, POS, etc).
</t>
<t>
If the payload is MPLS, then the information used to
create an entropy label is the same information used for
local load balancing (see
<xref target="sect.label-hash" />). This information
MUST be extracted for use in generating an entropy label
even if the LSR local egress interface is not a
multipath.
</t>
<t>
Of the non-MPLS payload types, only payloads that are
forwarded are of interest. For example, ARP is not
forwarded and CNLP (used only for ISIS) is not
forwarded.
</t>
<t>
The non-MPLS payload type of greatest interest are IPv4
and IPv6. The guidelines in
<xref target="sect.ip-hash" />
apply to fields used to create and entropy label.
</t>
<t>
The IP tunneling protocols mentioned in
<xref target="sect.ip-hash" />
may be more applicable to generation of an entropy label
at edge or access where deep packet inspection is
practical due to lower interface speeds than in the core
where deep packet inspection may be impractical.
</t>
</section>
</section>
</section>
<section anchor="sect.tp-uhp" title="MPLS-TP and UHP">
<t>
MPLS-TP introduces forwarding demands that will be extremely
difficult to meet in a core network. Most troublesome is
the requirement for Ultimate Hop Popping (UHP, the opposite
of Penultimate Hop Popping or PHP). Using UHP opens the
possibility of one or more MPLS pop operation plus an MPLS
swap operation for each packet. The potential for multiple
lookups and multiple counter instances per packet exists.
</t>
<t>
As networks grow and tunneling of LDP LSPs into RSVP-TE LSPs
is used, and/or RSVP-TE hierarchy is used, the requirement to
perform one or two or more MPLS pop operations plus a MPLS
swap operation (and possibly a push or two) increases. If
MPLS-TP LM (link monitoring) OAM is enabled at each layer,
then a packet and byte count MUST be maintained for each pop
and swap operation so as to offer OAM for each layer.
</t>
</section>
<section anchor="sect.oam-gtsm" title="Local Delivery of Packets">
<t>
There are a number of situations in which packets are
destined to a local address or where a return packet must be
generated. There is a need to mitigate the potential for
outage as a result of either attacks on network
infrastructure, or in some cases unintentional
misconfiguration resulting in processor overload. Some
hardware assistance is needed for all traffic destined to
the general purpose CPU that is used in MPLS control
protocol processing or network management protocol
processing and in most cases to other general purpose CPUs
residing on an LSR. This is due to the ease of overwhelming
such a processor with traffic arriving on LSR high speed
interfaces, whether the traffic is malicious or not.
</t>
<t>
Denial of service (DoS) protection is an area requiring hardware
support that is often overlooked or inadequately considered.
Hardware assist is also needed for OAM, particularly the
more demanding MPLS-TP OAM.
</t>
<section anchor="sect.gtsm" title="DoS Protection">
<t>
Modern equipment supports a number of control plane and
management plane protocols. Generally no single means of
protecting network equipment from denial of service (DoS)
attacks is sufficient, particularly for high speed
interfaces. This problem is not specific to MPLS, but is
a topic that cannot be ignored when implementing or
evaluating MPLS implementations.
</t>
<t>
Two types of protections are often cited as primary means
of protecting against attacks of all kinds.
<list style="hanging" hangIndent="4">
<t hangText="Isolated Control/Management Traffic">
<vspace blankLines="0" />
Control and Management traffic can be carried
out-of-band (OOB), meaning not intermixed with
payload. For MPLS, use of G-ACh and GAL to carry
control and management traffic provides a means of
isolation from potentially malicious payload. Used
alone, the compromise of a single node, including a
small computer at a network operations center, could
compromise an entire network. Implementations which
send all G-ACh/GAL traffic directly to a routing
engine CPU are subject to DoS attack as a result of
such a compromise.
</t>
<t hangText="Cryptographic Authentication">
<vspace blankLines="0" />
Cryptographic authentication can very effectively
prevent malicious injection of control or management
traffic. Cryptographic authentication can in some
circumstances be subject to DoS attack by overwhelming
the capacity of the decryption with a high volume of
malicious traffic. For very low speed interfaces,
cryptographic authentication can be performed by the
general purpose CPU used as a routing engine. For all
other cases, cryptographic hardware may be needed.
For very high speed interfaces, even cryptographic
hardware can be overwhelmed.
</t>
</list>
</t>
<t>
Some control and management protocols are often carried
with payload traffic. This is commonly the case with BGP,
T-LDP, and SNMP. It is often the case with RSVP-TE.
Even when carried over G-ACh/GAL additional measures can
reduce the potential for a minor breach to be leveraged to
a full network attack.
</t>
<t>
Some of the additional protections are supported by
hardware packet filtering.
<list style="hanging" hangIndent="4">
<t hangText="GTSM">
<vspace blankLines="0" />
<xref target="RFC5082" />
defines a mechanism that uses the IPv4 TTL or IPv6 Hop
Limit fields to insure control traffic that can only
originate from an immediate neighbor is not forged and
originating from a distant source. GTSM can be
applied to many control protocols which are routable,
for example LDP <xref target="RFC6720" />.
</t>
<t hangText="IP Filtering">
<vspace blankLines="0" />
At the very minimum, packet filtering plus
classification and use of multiple queues supporting
rate limiting is needed for traffic that could
potentially be sent to a general purpose CPU used as a
routing engine. The first level of filtering only
allows connections to be initiated from specific IP
prefixes to specific destination ports and then
preferably passes traffic directly to a cryptographic
engine and/or rate limits. The second level of
filtering passes connected traffic, such as TCP
connections having received at least one authenticated
SYN or having been locally initiated. The second
level of filtering only passes traffic to specific
address and port pairs to be checked for cryptographic
authentication.
</t>
</list>
</t>
<t>
The cryptographic authentication is generally the last
resort in DoS attack mitigation. If a packet must be
first sent to a general purpose CPU, then sent to a
cryptographic engine, a DoS attack is possible on high
speed interfaces. Only where hardware can fully process a
cryptographic authentication without intervention from a
general purpose CPU to find the authentication field and
to identify the portion of packet to run the cryptographic
algorithm over is cryptographic authentication beneficial
in protecting against DoS attacks.
</t>
<t>
For chips supporting multiple 100 Gb/s interfaces, only a
very large number of parallel cryptographic engines can
provide the processing capacity to handle a large scale
DoS or distributed DoS (DDoS) attack. For many forwarding
chips this much processing power requires significant chip
real estate and power, and therefore reduces system space
and power density. For this reason, cryptographic
authentication is not considered a viable first line of
defense.
</t>
<t>
For some networks the first line of defense is some means
of supporting OOB control and management traffic. In the
past this OOB channel might make use of overhead bits in
SONET or OTN or a dedicated DWDM wavelength. G-ACh and
GAL provide an alternative OOB mechanism which is
independent of underlying layers. In other networks,
including most IP/MPLS networks, perimeter filtering
serves a similar purpose, though less effective without
extreme vigilance.
</t>
<t>
A second line of defense is filtering, including GTSM.
For protocols such as EBGP, GTSM and other filtering is
often the first line of defense. Cryptographic
authentication is usually the last line of defense and
insufficient by itself to mitigate DoS or DDoS attacks.
</t>
</section>
<section anchor="sect.oam" title="MPLS OAM">
<t>
<xref target="RFC4377" />
defines requirements for MPLS OAM that predate MPLS-TP.
<xref target="RFC4379" />
defines what is commonly referred to as LSP Ping and LSP
Traceroute.
<xref target="RFC4379" />
is updated by
<xref target="RFC6424" />
supporting MPLS tunnels and stitched LSP and P2MP LSP.
<xref target="RFC4379" />
is updated by
<xref target="RFC6425" />
supporting P2MP LSP.
<xref target="RFC4379" />
is updated by
<xref target="RFC6426" />
to support MPLS-TP connectivity verification (CV) and route
tracing.
</t>
<t>
<xref target="RFC4950" />
extends the ICMP format to support TTL expiration that may
occur when using IP traceroute within an MPLS tunnel. The
ICMP message generation can be implemented in forwarding
hardware, but if sent to a general purpose CPU must be
rate limited to avoid a potential denial or service (DoS)
attack.
</t>
<t>
<xref target="RFC5880" />
defines Bidirectional Forwarding Detection (BFD), a
protocol intended to detect faults in the bidirectional
path between two forwarding engines.
<xref target="RFC5884" />
and
<xref target="RFC5885" />
define BFD for MPLS.
BFD can provide failure detection on any kind of path
between systems, including direct physical links, virtual
circuits, tunnels, MPLS Label Switched Paths (LSPs),
multihop routed paths, and unidirectional links as long as
there is some return path.
</t>
<t>
The processing requirements for BFD are less than for LSP
Ping, making BFD somewhat better suited for relatively
high rate proactive monitoring. BFD does not verify that
the data plane matches the control plane, where LSP Ping
does. LSP Ping is somewhat better suited for on-demand
monitoring including relatively low rate periodic
verification of data plane and as a diagnostic tool.
</t>
<t>
Hardware assistance is often provided for BFD response
where BFD setup or parameter change is not involved and
may be necessary for relatively high rate proactive
monitoring. If both BFD and LSP Ping are recognized in
filtering prior to passing traffic to a general purpose
CPU, appropriate DoS protection can be applied (see <xref
target="sect.gtsm" />). Failure to recognize BFD and LSP
Ping and at least rate limit creates the potential for
misconfiguration to cause outages rather than cause errors
in the misconfigured OAM.
</t>
</section>
<section anchor="sect.pw-oam" title="Pseudowire OAM">
<t>
Pseudowire OAM makes use of the control channel provided
by Virtual Circuit Connectivity Verification (VCCV)
<xref target="RFC5085" />.
VCCV makes use of the Pseudowire Control Word.
BFD support over VCCV is defined by
<xref target="RFC5885" />.
<xref target="RFC5885" />
is updated by
<xref target="RFC6478" />
in support of static pseudowires.
<xref target="RFC4379" />
is updated by
<xref target="RFC6829" />
supporting LSP Ping for Pseudowire FEC advertised over IPv6.
</t>
<t>
G-ACh/GAL (defined in <xref target="RFC5586" />) is the
preferred MPLS-TP OAM control channel and applies to any
MPLS-TP end points, including Pseudowire.
See <xref target="sect.tp-oam" /> for an overview of
MPLS-TP OAM.
</t>
</section>
<section anchor="sect.tp-oam" title="MPLS-TP OAM">
<t>
<xref target="RFC6669" />
summarizes the MPLS-TP OAM toolset, the set of protocols
supporting the MPLS-TP OAM requirements specified in
<xref target="RFC5860" />
and supported by the MPLS-TP OAM framework defined in
<xref target="RFC6371" />.
</t>
<t>
The MPLS-TP OAM toolset includes:
<list style="hanging" hangIndent="4">
<t hangText="CC-CV">
<vspace blankLines="0" />
<xref target="RFC6428" />
defines BFD extensions to support proactive
Connectivity Check and Connectivity Verification
(CC-CV) applications.
<xref target="RFC6426" />
provides LSP ping extensions that are used to
implement on-demand connectivity verification.
</t>
<t hangText="RDI">
<vspace blankLines="0" />
Remote Defect Indication (RDI) is triggered by
failure of proactive CC-CV, which is BFD based. For
fast RDI initiation, RDI SHOULD be initiated and
handled by hardware if BFD is handled in forwarding
hardware.
<xref target="RFC6428" />
provides an extension for BFD that includes the RDI
indication in the BFD format and a specification of
how this indication is to be used.
</t>
<t hangText="Route Tracing">
<vspace blankLines="0" />
<xref target="RFC6426" />
specifies that the LSP ping enhancements for MPLS-TP
on-demand connectivity verification include
information on the use of LSP ping for route tracing
of an MPLS-TP path.
</t>
<t hangText="Alarm Reporting">
<vspace blankLines="0" />
<xref target="RFC6427" />
describes the details of a new protocol supporting
Alarm Indication Signal (AIS), Link Down Indication,
and fault management. Failure to support this
functionality in forwarding hardware can potentially
result in failure to meet protection recovery time
requirements and is therefore strongly recommended.
</t>
<t hangText="Lock Instruct">
<vspace blankLines="0" />
Lock instruct is initiated on-demand and therefore
need not be implemented in forwarding hardware.
<xref target="RFC6435" />
defines a lock instruct protocol.
</t>
<t hangText="Lock Reporting">
<vspace blankLines="0" />
<xref target="RFC6427" />
covers lock reporting. Lock reporting need not be
implemented in forwarding hardware.
</t>
<t hangText="Diagnostic">
<vspace blankLines="0" />
<xref target="RFC6435" />
defines protocol support for loopback. Loopback
initiation is on-demand and therefore need not be
implemented in forwarding hardware. Loopback of
packet traffic SHOULD be implemented in forwarding
hardware on high speed interfaces.
</t>
<t hangText="Packet Loss and Delay Measurement">
<vspace blankLines="0" />
<xref target="RFC6374" />
and
<xref target="RFC6375" />
define a protocol and profile for packet loss
measurement (LM) and delay measurement (DM). LM
requires a very accurate capture and insertion of
packet and byte counters when a packet is transmitted
and capture of packet and byte counters when a packet
is received. This capture and insertion MUST be
implemented in forwarding hardware for LM OAM if high
accuracy is needed. DM requires very accurate capture and
insertion of a timestamp on transmission and capture
of timestamp when a packet is received. This
timestamp capture and insertion MUST be implemented in
forwarding hardware for DM OAM if high accuracy is
needed.
</t>
</list>
</t>
<t>
See <xref target="sect.oam" /> for discussion of hardware
support necessary for BFD and LSP Ping.
</t>
<t>
CC-CV and alarm reporting is tied to protection and
therefore SHOULD be supported in forwarding hardware in
order to provide protection for a large number of affected
LSP within target response intervals. Since CC-CV is
supported by BFD, for MPLS-TP providing hardware
assistance for BFD processing helps insure that protection
recovery time requirements can be met even for faults
affecting a large number of LSP.
</t>
<t>
MPLS-TP Protection State Coordination (PSC) is defined by
<xref target="RFC6378" />
and updated by
<xref target="I-D.ietf-mpls-psc-updates" />,
correcting some errors in
<xref target="RFC6378" />.
</t>
</section>
<section anchor="sect.oam-iwk"
title="MPLS OAM and Layer-2 OAM Interworking">
<t>
<xref target="RFC6670" />
provides the reasons for selecting a single MPLS-TP OAM
solution and examines the consequences were ITU-T to
develop a second OAM solution that is based on Ethernet
encodings and mechanisms.
</t>
<t>
<xref target="RFC6310" /> and
<xref target="RFC7023" />
specifies the mapping of defect states between many types
of hardware Attachment Circuits (ACs) and associated
Pseudowires (PWs). This functionality SHOULD be supported
in forwarding hardware.
</t>
<t>
It is beneficial if an MPLS OAM implementation can
interwork with the underlying server layer and provide a
means to interwork with a client layer. For example,
<xref target="RFC6427" />
specifies an inter-layer propagation of AIS and LDI from
MPLS server layer to client MPLS layers. Where the server
layer is a Layer-2, such as Ethernet, PPP over SONET/SDH,
or GFP over OTN, interwork among layers is also
beneficial. For high speed interfaces, supporting this
interworking in forwarding hardware helps insure that
protection based on this interworking can meet recovery
time requirements even for faults affecting a large number
of LSP.
</t>
</section>
<section anchor="sect.oam-hdwr"
title="Extent of OAM Support by Hardware">
<t>
Where certain requirements must be met, such as relatively
high CC-CV rates and a large number of interfaces, or
strict protection recovery time requirements and a
moderate number of affected LSP, some OAM functionality
must be supported by forwarding hardware. In other cases,
such as highly accurate LM and DM OAM or strict protection
recovery time requirements with a large number of affected
LSP, OAM functionality must be entirely implemented in
forwarding hardware.
</t>
<t>
Where possible, implementation in forwarding hardware
should be in programmable hardware such that if standards
are later changed or extended these changes are likely to
be accommodated with hardware reprogramming rather than
replacement.
</t>
<t>
For some functionality there is a strong case for an
implementation in dedicated forwarding hardware. Examples
include packet and byte counters needed for LM OAM as well
as needed for management protocols. Similarly the capture
and insertion of packet and byte counts or timestamps
needed for transmitted LM or DM or time synchronization
packets MUST be implemented in forwarding hardware if high
accuracy is required.
</t>
<t>
For some functions there is a strong case to provide
limited support in forwarding hardware but may make use of
an external general purpose processor if performance
criteria can be met. For example origination of RDI
triggered by CC-CV, response to RDI, and Protection State
Coordination (PSC) functionality
may be supported by hardware, but expansion to a large
number of client LSP and transmission of AIS or RDI to the
client LSP may occur in a general purpose processor. Some
forwarding hardware supports one or more on-chip general
purpose processors which may be well suited for such a
role.
<xref target="I-D.ietf-mpls-psc-updates" />, being a very
recent document that affects a protection state machine
that requires hardware support, underscores the importance
of having a degree of programmability in forwarding hardware.
</t>
<t>
The customer (system supplier or provider) should not
dictate design, but should independently validate target
functionality and performance. However, it is not
uncommon for service providers and system implementers to
insist on reviewing design details (under NDA) due to past
experiences with suppliers and to reject suppliers who are
unwilling to provide details.
</t>
</section>
<section title="Support for IPFIX in Hardware">
<t>
The IPFIX architecture is defined by
<xref target="RFC5470" />.
IPFIX supports per flow statistics. IPFIX infomation
elements (IEs) are defined in
<xref target="RFC5102" />
and include IEs for MPLS.
</t>
<t>
The forwarding chips used in core routers are not
optimized for high touch applications like IPFIX. Often
support for IPFIX in core routers is limited to optional
IPFIX metering, which involves a 1-in-N packet sampling,
limited filtering support, and redirection to either an
internal CPU or an external interface. The CPU or device
at the other end of the external interface then implements
the full IPFIX filtering and IPFIX collector
functionality.
</t>
<t>
LSR which are intended to be deployed further from the
core may support lower capacity interfaces but support
higher touch applications on the forwarding hardware and
may provide dedicated hardware to support a greater subset
IPFIX functionality before handing off to a general
purpose CPU. In some cases, far from the core the entire
IPFIX functionality up to and including the collector may
be implemented in hardware and firmware in the forwarding
silicon. It is also worth noting that at lower speeds a
general purpose CPU may become adequate to implement
IPFIX, particularly if metering is used.
</t>
</section>
</section>
<section anchor="sect.no-of-flows" title="Number and Size of Flows">
<t>
Service provider networks may carry up to hundreds of
millions of flows on 10 Gb/s links. Most flows are very
short lived, many under a second. A subset of the flows are
low capacity and somewhat long lived. When Internet traffic
dominates capacity a very small subset of flows are high
capacity and/or very long lived.
</t>
<t>
Two types of limitations with regard to number and size of
flows have been observed.
<list style="numbers">
<t>
Some hardware cannot handle some high capacity flows
because of internal paths which are limited, such as per
packet backplane paths or paths internal or external to
chips such as buffer memory paths. Such designs can
handle aggregates of smaller flows. Some hardware with
acknowledged limitations has been successfully deployed
but may be increasingly problematic if the capacity of
large microflows in deployed networks continues to grow.
</t>
<t>
Some hardware approaches cannot handle a large number of
flows, or a large number of large flows due to
attempting to count per flow, rather than deal with
aggregates of flows. Hash techniques scale with regard
to number of flows due to a fixed hash size with many
flows falling into the same hash bucket. Techniques
that identify individual flows have been implemented but
have never successfully deployed for Internet traffic.
</t>
</list>
</t>
</section>
</section>
<section anchor="sect.ask"
title="Questions for Suppliers">
<t>
The following questions should be asked of a supplier. These
questions are grouped into broad categories. The questions
themselves are intended to be an open ended question to the
supplier. The tests in <xref target="sect.test" /> are
intended to verify whether the supplier disclosed any
compliance or performance limitations completely and
accurately.
</t>
<section title="Basic Compliance">
<t>
<list counter="q" hangIndent="4" style="format Q#%d">
<t>
Can the implementation forward packets with an
arbitrarily large stack depth?
What limitations exist, and under what circumstances
do further limitations come into play (such as high
packet rate or specific features enabled or specific
types of packet processing)?
See <xref target="sect.basics" />.
</t>
<t>
Is the entire set of basic MPLS functionality
described in <xref target="sect.basics" /> supported?
</t>
<t>
Are the set of MPLS special purpose labels handled
correctly and with adequate performance? Are extended
special purpose labels handled correctly and with
adequate performance?
See <xref target="sect.resv-labels" />.
</t>
<t>
Are mappings of label value and TC to PHB handled
correctly, including RFC3270 L-LSP mappings and
RFC4124 CT mappings to PHB?
See <xref target="sect.qos" />.
</t>
<t>
Is time synchronization adequately supported in
forwarding hardware?
<list style="letters">
<t>
Are both PTP and NTP formats supported?
</t>
<t>
Is the accuracy of timestamp insertion and
incoming stamping sufficient?
</t>
</list>
See <xref target="sect.time-sync" />.
</t>
<t>
Is link bundling supported?
<list style="letters">
<t>
Can LSP be pinned to specific components?
</t>
<t>
Is the "all-ones" component link supported?
</t>
</list>
See <xref target="sect.link-bundle" />.
</t>
<t>
Is MPLS hierarchy supported?
<list style="letters">
<t>
Are both PHP and UHP supported? What limitations
exist on the number of pop operations with UHP?
</t>
<t>
Are the pipe, short-pipe, and uniform models
supported? Are TTL and TC values updated
correctly at egress where applicable?
</t>
</list>
See <xref target="sect.hierarchy" /> regarding MPLS
hierarchy. See <xref target="RFC3443" /> regarding
PHP, UHP, and pipe, short-pipe, and uniform models.
</t>
<t>
Is FRR supported?
<list style="letters">
<t>
Are both "One-to-One Backup" and "Facility Backup"
supported?
</t>
<t>
What forms of IPFRR/LDPFRR are supported?
</t>
<t>
How quickly does protection recovery occur?
</t>
<t>
Does protection recovery speed increase when a fault
affects a large numbers of protected LSP, and if so
by how much?
</t>
</list>
See <xref target="sect.frr" />.
</t>
<t>
Are pseudowire sequence numbers handled correctly?
See <xref target="sect.pw-seq" />.
</t>
<t>
Is VPN LER functionality handled correctly and without
performance issues?
See <xref target="sect.vpn" />.
</t>
<t>
Is MPLS multicast (P2MP and MP2MP) handled correctly?
<list style="letters">
<t>
Are packets dropped on uncongested outputs if some
outputs are congested?
</t>
<t>
Is performance limited in high fanout situations?
</t>
</list>
See <xref target="sect.mcast" />.
</t>
</list>
</t>
</section>
<section title="Basic Performance">
<t>
<list counter="q" hangIndent="4" style="format Q#%d">
<t>
Can very small packets be forwarded at full line rate
on all interfaces indefinitely?
What limitations exist, and under what circumstances
do further limitations come into play (such as
specific features enabled or specific types of packet
processing)?
</t>
<t>
Customers must decide whether to relax the prior
requirement and to what extent. If the answer to the
prior question indicates that limitations exist, then:
<list style="letters">
<t>
What is the smallest packet size where full line
rate forwarding can be supported?
</t>
<t>
What is the longest burst of full rate small
packets that can be supported?
</t>
</list>
Specify circumstances (such as specific features
enabled or specific types of packet processing) often
impact these rates and burst sizes.
</t>
<t>
How many pop operations can be supported along with a
swap operation at full line rate while maintaining
per LSP packet and byte counts for each pop and swap?
This requirement is particularly relevant for MPLS-TP.
</t>
<t>
How many label push operations can be supported.
While this limitation is rarely an issue, it applies
to both PHP and UHP, unlike the pop limit which
applies to UHP.
</t>
<t>
For a worst case where all packets arrive on one LSP,
what is the counter overflow time? Are any means
provided to avoid polling all counters at short
intervals? This applies to both MPLS and MPLS-TP.
</t>
</list>
</t>
</section>
<section title="Multipath Capabilities and Performance">
<t>
Multipath capabilities and performance do not apply to
MPLS-TP but apply to MPLS and apply if MPLS-TP is carried
in MPLS.
<list counter="q" hangIndent="4" style="format Q#%d">
<t>
How are large microflows accommodated? Is there
active management of the hash space mapping to output
ports? See <xref target="sect.large-uflow" />.
</t>
<t>
How many MPLS labels can be included in a hash based
on the MPLS label stack?
</t>
<t>
Is packet rate performance decreased beyond some
number of labels?
</t>
<t>
Can the IP header and payload information below the
MPLS stack be used in the hash? If so, which IP
fields, payload types and payload fields are
supported?
</t>
<t>
At what maximum MPLS label stack depth can Bottom of
Stack and an IP header appear without impacting packet
rate performance?
</t>
<t>
Are special purpose labels excluded from the label stack
hash? Are extended purpose labels excluded from the
label stack hash?
See <xref target="sect.label-hash" />.
</t>
<t>
How is multipath performance affected by high capacity
flows or an extremely large number of flows, or by
very short lived flows?
See <xref target="sect.no-of-flows" />.
</t>
</list>
</t>
</section>
<section title="Pseudowire Capabilities and Performance">
<t>
<list counter="q" hangIndent="4" style="format Q#%d">
<t>
Is the pseudowire control word supported?
</t>
<t>
What is the maximum rate of pseudowire encapsulation
and decapsulation? Apply the same questions as in
Base Performance for any packet based pseudowire such
as IP VPN or Ethernet.
</t>
<t>
Does inclusion of a pseudowire control word impact
performance?
</t>
<t>
Are flow labels supported?
</t>
<t>
If so, what fields are hashed on for the flow label
for different types of pseudowires?
</t>
<t>
Does inclusion of a flow label impact performance?
</t>
</list>
</t>
</section>
<section title="Entropy Label Support and Performance">
<t>
<list counter="q" hangIndent="4" style="format Q#%d">
<t>
Can an entropy label be added when acting as in
ingress LER and can it be removed when acting as an
egress LER?
</t>
<t>
If so, what fields are hashed on for the entropy label?
</t>
<t>
Does adding or removing an entropy label impact packet
rate performance?
</t>
<t>
Can an entropy label be detected in the label stack,
used in the hash, and properly terminate the search
for further information to hash on?
</t>
<t>
Does using an entropy label have any negative impact
on performance? It should have no impact or a
positive impact.
</t>
</list>
</t>
</section>
<section anchor="sect.q-dos" title="DoS Protection">
<t>
<list counter="q" hangIndent="4" style="format Q#%d">
<t>
For each control and management plane protocol in use,
what measures are taken to provide DoS attack
hardening?
</t>
<t>
Have DoS attack tests been performed?
</t>
<t>
Can compromise of an internal computer on a management
subnet be leveraged for any form of attack including
DoS attack?
</t>
</list>
</t>
</section>
<section title="OAM Capabilities and Performance">
<t>
<list counter="q" hangIndent="4" style="format Q#%d">
<t>
What OAM proactive and on-demand mechanisms are
supported?
</t>
<t>
What performance limits exist under high proactive
monitoring rates?
</t>
<t>
Can excessively high proactive monitoring rates impact
control plane performance or cause control plane
instability?
</t>
<t>
Ask the prior questions for each of the following.
<list style="letters">
<t>MPLS OAM</t>
<t>Pseudowire OAM</t>
<t>MPLS-TP OAM</t>
<t>Layer-2 OAM Interworking</t>
</list>
See <xref target="sect.oam-gtsm" />.
</t>
</list>
</t>
</section>
</section>
<section anchor="sect.test"
title="Forwarding Compliance and Performance Testing">
<t>
Packet rate performance of equipment supporting a large number
of 10 Gb/s or 100 Gb/s links is not possible using desktop
computers or workstations. The use of high end workstations
as a source of test traffic was barely viable 20 years ago,
but is no longer at all viable. Though custom microcode has
been used on specialized router forwarding cards to serve the
purpose of generating test traffic and measuring it, for the
most part performance testing will require specialized test
equipment. There are multiple sources of suitable equipment.
</t>
<t>
The set of tests listed here do not correspond one-to-one to
the set of questions in <xref target="sect.ask" />. The same
categorization is used and these tests largely serve to
validate answers provided to the prior questions, and can
also provide answers where a supplier is unwilling to disclose
compliance or performance.
</t>
<t>
Performance testing is the domain of the IETF Benchmark
Methodology Working Group (BMWG). Below are brief
descriptions of conformance and performance tests. Some very
basic tests are specified in <xref target="RFC5695" /> which
partially cover only the basic performance test T#3.
</t>
<t>
The following tests should be performed by the systems
designer, or deployer, or performed by the supplier on their
behalf if it is not practical for the potential customer to
perform the tests directly. These tests are grouped into
broad categories.
</t>
<t>
The tests in
<xref target="q-bc" />
should be repeated under various conditions to retest basic
performance when critical capabilities are enabled. Complete
repetition of the performance tests enabling each capability
and combinations of capabilities would be very time intensive,
therefore a reduced set of performance tests can be used to
gauge the impact of enabling specific capabilities.
</t>
<section anchor="q-bc" title="Basic Compliance">
<t>
<list counter="t" hangIndent="4" style="format T#%d">
<t>
Test forwarding at a high rate for packets with
varying number of label entries. While packets with
more than a dozen label entries are unlikely to be
used in any practical scenario today, it is useful to
know if limitations exists.
</t>
<t>
For each of the questions listed under "Basic
Compliance" in <xref target="sect.ask" />, verify the
claimed compliance. For any functionality considered
critical to a deployment, where applicable performance
using each capability under load should be verified in
addition to basic compliance.
</t>
</list>
</t>
</section>
<section title="Basic Performance">
<t>
<list counter="t" hangIndent="4" style="format T#%d">
<t>
Test packet forwarding at full line rate with small
packets. See <xref target="RFC5695" />. The most
likely case to fail is the smallest packet size. Also
test with packet sizes in four byte increments ranging
from payload sizes or 40 to 128 bytes.
</t>
<t>
If the prior tests did not succeed for all packet
sizes, then perform the following tests.
<list style="letters">
<t>
Increase the packet size by 4 bytes until a size
is found that can be forwarded at full rate.
</t>
<t>
Inject bursts of consecutive small packets into a
stream of larger packets. Allow some time for
recovery between bursts. Increase the number of
packets in the burst until packets are dropped.
</t>
</list>
</t>
<t>
Send test traffic where a swap operation is required.
Also set up multiple LSP carried over other LSP where
the device under test (DUT) is the egress of these
LSP. Create test packets such that the swap operation
is performed after pop operations, increasing the
number of pop operations until forwarding of small
packets at full line rate can no longer be supported.
Also check to see how many pop operations can be
supported before the full set of counters can no
longer be maintained. This requirement is
particularly relevant for MPLS-TP.
</t>
<t>
Send all traffic on one LSP and see if the counters
become inaccurate. Often counters on silicon are much
smaller than the 64 bit packet and byte counters in
various IETF MIBs. System developers should consider what
counter polling rate is necessary to maintain accurate
counters and whether those polling rates are
practical.
Relevant MIBs for MPLS are discussed in
<xref target="RFC4221" /> and
<xref target="RFC6639" />.
</t>
<t>
<xref target="RFC6894" />
provides a good basis for MPLS FRR testing. Similar
testing should be performed to determine restoration
times, however this testing is far more difficult to
perform due to the need for a simulated test topology
that is capable of simulating the signaling used in
restoration. The simulated topology should be
comparable with the target deployment in the number of
nodes and links and in resource usage flooding and setup
delays. Some commercial test equipment can support this
type of testing.
</t>
</list>
</t>
</section>
<section anchor="q-multipath"
title="Multipath Capabilities and Performance">
<t>
Multipath capabilities do not apply to MPLS-TP but apply
to MPLS and apply if MPLS-TP is carried in MPLS.
<list counter="t" hangIndent="4" style="format T#%d">
<t>
Send traffic at a rate well exceeding the capacity of
a single multipath component link, and where entropy
exists only below the top of stack. If only the top
label is used this test will fail immediately.
</t>
<t>
Move the labels with entropy down in the stack until
either the full forwarding rate can no longer be
supported or most or all packets try to use the same
component link.
</t>
<t>
Repeat the two tests above with the entropy contained
in IP headers or IP payload fields below the label
stack rather than in the label stack. Test with the
set of IP headers or IP payload fields considered
relevant to the deployment or to the target market.
</t>
<t>
Determine whether traffic that contains a pseudowire
control word is interpreted as IP traffic.
Information in the payload MUST NOT be used in the
load balancing if the first nibble of the packet is
not 4 or 6 (IPv4 or IPv6).
</t>
<t>
Determine whether special purpose labels and extended
special purpose labels are excluded from the label stack
hash. They MUST be excluded.
</t>
<t>
Perform testing in the presence of combinations of:
<list style="letters">
<t>
Very large microflows.
</t>
<t>
Relatively short lived high capacity flows.
</t>
<t>
Extremely large numbers of flows.
</t>
<t>
Very short lived small flows.
</t>
</list>
</t>
</list>
</t>
</section>
<section title="Pseudowire Capabilities and Performance">
<t>
<list counter="t" hangIndent="4" style="format T#%d">
<t>
Ensure that pseudowire can be set up with a pseudowire
label and pseudowire control word added at ingress and
the pseudowire label and pseudowire control word
removed at egress.
</t>
<t>
For pseudowire that contains variable length payload
packets, repeat performance tests listed under "Basic
Performance" for pseudowire ingress and egress
functions.
</t>
<t>
Repeat pseudowire performance tests with and without
a pseudowire control word.
</t>
<t>
Determine whether pseudowire can be set up with a
pseudowire label, flow label, and pseudowire control
word added at ingress and the pseudowire label, flow
label, and pseudowire control word removed at egress.
</t>
<t>
Determine which payload fields are used to create the
flow label and whether the set of fields and algorithm
provide sufficient entropy for load balancing.
</t>
<t>
Repeat pseudowire performance tests with flow labels
included.
</t>
</list>
</t>
</section>
<section title="Entropy Label Support and Performance">
<t>
<list counter="t" hangIndent="4" style="format T#%d">
<t>
Determine whether entropy labels can be added at
ingress and removed at egress.
</t>
<t>
Determine which fields are used to create an entropy
label. Labels further down in the stack, including
entropy labels further down and IP headers or IP
payload fields where applicable should be used.
Determine whether the set of fields and algorithm
provide sufficient entropy for load balancing.
</t>
<t>
Repeat performance tests under "Basic Performance"
when entropy labels are used, where ingress or egress
is the device under test (DUT).
</t>
<t>
Determine whether an ELI is detected when acting as a
midpoint LSR and whether the search for further
information on which to base the load balancing is
used. Information below the entropy label SHOULD NOT
be used.
</t>
<t>
Ensure that the entropy label indicator and entropy
label (ELI and EL) are removed from the label stack
during UHP and PHP operations.
</t>
<t>
Insure that operations on the TC field when adding and
removing entropy label are correctly carried out. If
TC is changed during a swap operation, the ability to
transfer that change MUST be provided. The ability to
suppress the transfer of TC MUST also be provided. See
"pipe", "short pipe", and "uniform" models in
<xref target="RFC3443" />.
</t>
<t>
Repeat performance tests for a midpoint LSR with entropy
labels found at various label stack depths.
</t>
</list>
</t>
</section>
<section anchor="sect.dos-attack" title="DoS Protection">
<t>
<list counter="t" hangIndent="4" style="format T#%d">
<t>
Actively attack LSR under high protocol churn load and
determine control plane performance impact or
successful DoS under test conditions. Specifically
test for the following.
<list style="letters">
<t>
TCP SYN attack against control plane and
management plane protocols using TCP, including
CLI access (typically SSH protected login),
NETCONF, etc.
</t>
<t>
High traffic volume attack against control plane
and management plane protocols not using TCP.
</t>
<t>
Attacks which can be performed from a compromised
management subnet computer, but not one with
authentication keys.
</t>
<t>
Attacks which can be performed from a compromised
peer within the control plane (internal domain and
external domain).
<!-- where does KARP work on key dist stand? -->
Assume that per peering keys and per router ID
keys rather than network wide keys are in use.
</t>
</list>
See <xref target="sect.gtsm" />.
</t>
</list>
</t>
</section>
<section title="OAM Capabilities and Performance">
<t>
<list counter="t" hangIndent="4" style="format T#%d">
<t>
Determine maximum sustainable rates of BFD traffic.
If BFD requires CPU intervention, determine both
maximum rates and CPU loading when multiple interfaces
are active.
</t>
<t>
Verify LSP Ping and LSP Traceroute capability.
</t>
<t>
Determine maximum rates of MPLS-TP CC-CV traffic. If
CC-CV requires CPU intervention, determine both
maximum rates and CPU loading when multiple interfaces
are active.
</t>
<t>
Determine MPLS-TP DM precision.
</t>
<t>
Determine MPLS-TP LM accuracy.
</t>
<t>
Verify MPLS-TP AIS/RDI and Protection State Coordination
(PSC) functionality,
protection speed, and AIS/RDI notification speed when
a large number of Management Entities (ME) must be
notified with AIS/RDI.
</t>
</list>
</t>
</section>
</section>
<section anchor="sect.ack" title="Acknowledgements">
<t>
Numerous very useful comments have been received in private
email. Some of these contributions are acknowledged here,
approximately in chronologic order.
</t>
<t>
Paul Doolan provided a brief review resulting in a number of
clarifications, most notably regarding on-chip vs. system
buffering, 100 Gb/s link speed assumptions in the 150 Mpps
figure, and handling of large microflows. Pablo Frank
reminded us of the sawtooth effect in PPS vs. packet size
graphs, prompting the addition of a few paragraphs on this.
Comments from Lou Berger at IETF-85 prompted the addition of
<xref target="sect.no-of-flows" />.
</t>
<t>
Valuable comments were received on the BMWG mailing list. Jay
Karthik pointed out testing methodology hints that after
discussion were deemed out of scope and were removed but may
benefit later work in BMWG.
</t>
<t>
Nabil Bitar pointed out the need to cover QoS (Differentiated
Services), MPLS multicast (P2MP and MP2MP), and MPLS-TP OAM.
Nabil also provided a number of clarifications to the
questions and tests in <xref target="sect.ask" /> and
<xref target="sect.test" />.
</t>
<t>
Mark Szczesniak provided a thorough review and a number of
useful comments and suggestions that improved the document.
</t>
<t>
Gregory Mirsky and Thomas Beckhaus provided useful comments
during the MPLS RT review.
</t>
<t>
Tal Mizrahi provided comments that prompted clarifications
regarding timestamp processing, local delivery of packets, and
the need for hardware assistance in processing OAM traffic.
</t>
<t>
Alexander (Sasha) Vainshtein pointed out errors in
<xref target="sect.pw-seq" />
and suggested new text which after lengthy discussion resulted
in restating the summarization of requirements from PWE3 RFCs
and more clearly stating the benefits and drawbacks of packet
resequencing based on PW sequence number.
</t>
<t>
Loa Anderson provided useful comments and corrections prior to
WGLC. Adrian Farrel provided useful comments and corrections
prior as part of the AD review.
</t>
<t>
Discussion with Steve Kent during SecDir review resulted in
expansion of <xref target="sect.security" />, briefly
summarizing security considerations related to forwarding in
normative references. Tom Petch pointed out some editorial
errors in private email plus an important math error. Al
Morton during OpsDir review prompted clarification in the
target audience section, suggested more clear wording in
places, and found numerous editorial errors.
</t>
<t>
Discussion with Steward Bryant and Alia Atlas as part of IESG
review resulted in coverage of IPFIX and improvements to
document coverage of MPLS FRR, and IP/LDP FRR, plus some
corrections to the text elsewhere.
</t>
</section>
<section anchor="sect.iana" title="IANA Considerations">
<t>
This memo includes no request to IANA.
</t>
</section>
<section anchor="sect.security" title="Security Considerations">
<t>
This document reviews forwarding behavior specified elsewhere
and points out compliance and performance requirements. As
such it introduces no new security requirements or concerns.
</t>
<t>
Discussion of hardware support and other equipment hardening
against DoS attack can be found in
<xref target="sect.gtsm" />.
<xref target="sect.q-dos" />
provides a list of question regarding DoS to be asked of
suppliers.
<xref target="sect.dos-attack" />
suggests types of testing that can provide some assurance of
the effectiveness of supplier DoS hardening claims.
</t>
<t>
Knowledge of potential performance shortcomings may serve to
help new implementations avoid pitfalls. It is unlikely that
such knowledge could be the basis of new denial of service as
these pitfalls are already widely known in the service
provider community and among leading equipment suppliers. In
practice extreme data and packet rate are needed to affect
existing equipment and to affect networks that may be still
vulnerable due to failure to implement adequate protection.
The extreme data and packet rates make this type of denial of
service unlikely and make undetectable denial of service of
this type impossible.
</t>
<t>
The set of normative references each contain security
considerations. A brief summarization of MPLS security
considerations applicable to forwarding follows:
</t>
<t>
<list style="numbers">
<t>
MPLS encapsulation does not support an authentication
extension. This is reflected in the security section of
<xref target="RFC3032" />.
Documents which clarify MPLS header fields such as TTL
<xref target="RFC3443" />,
the explicit null label
<xref target="RFC4182" />,
renaming EXP to TC
<xref target="RFC5462" />,
ECN for MPLS
<xref target="RFC5129" />, and
MPLS Ethernet encapsulation
<xref target="RFC5332" />
make no changes to security considerations in
<xref target="RFC3032" />.
</t>
<t>
Some cited RFCs are related to Diffserv forwarding.
<xref target="RFC3270" />
refers to MPLS and Diffserv security.
<xref target="RFC2474" />
mentions theft of service and denial of service due to
mismarking.
<xref target="RFC2474" />
mentions IPsec interaction, but with MPLS, not being
carried by IP, this type of interaction in
<xref target="RFC2474" />
is not relevant.
</t>
<t>
<xref target="RFC3209" />
is cited here due only to make-before-break forwarding
requirements. This is related to resource sharing and the
theft of service and denial of service concerns in
<xref target="RFC2474" />
apply.
</t>
<t>
<xref target="RFC4090" />
defines FRR which provides protection but does not add
security concerns. RFC4201 defines link bundling but
raises no additional security concerns.
</t>
<t>
Various OAM control channels are defined in
<xref target="RFC4385" />
(PW CW),
<xref target="RFC5085" />
(VCCV),
<xref target="RFC5586" />
(G-Ach and GAL).
These documents describe potential abuse of these OAM
control channels.
</t>
<t>
<xref target="RFC4950" />
defines ICMP extensions when MPLS TTL expires and payload
is IP. This provides MPLS header information which is of
no use to an IP attacker, but sending this information can
be suppressed through configuration.
</t>
<t>
GTSM
<xref target="RFC5082" />
provides a means to improve protection against high
traffic volume spoofing as a form of DoS attack.
</t>
<t>
BFD
<xref target="RFC5880" />
<xref target="RFC5884" />
<xref target="RFC5885" />
provides a form of OAM used in MPLS and MPLS-TP. The
security considerations related to the OAM control channel
are relevant. The BFD payload supports authentication
unlike the MPLS encapsulation or MPLS or PW control
channel encapsulation is carried in. Where an IP return
OAM path is used IPsec is suggested as a means of securing
the return path.
</t>
<t>
Other forms of OAM are supported by
<xref target="RFC6374" />
<xref target="RFC6375" />
(Loss and Delay Measurement),
<xref target="RFC6428" />
(Connectivity Check/Verification based on BFD), and
<xref target="RFC6427" />
(Fault Management). The security considerations related
to the OAM control channel are relevant. IP return paths,
where used, can be secured with IPsec.
</t>
<t>
Linear protection is defined by
<xref target="RFC6378" />
and updated by
<xref target="I-D.ietf-mpls-psc-updates" />.
Security concerns related to MPLS encapsulation and OAM
control channels apply. Security concerns reiterate
<xref target="RFC5920" />
as applied to protection switching.
</t>
<t>
The PW Flow Label
<xref target="RFC6391" />
and MPLS Entropy Label
<xref target="RFC6790" />
affect multipath load balancing. Security concerns
reiterate
<xref target="RFC5920" />.
Security impacts would be limited to load distribution.
</t>
</list>
</t>
<t>
MPLS security including data plane security is discussed in
greater detail in
<xref target="RFC5920" />
(MPLS/GMPLS Security Framework).
The MPLS-TP security framework
<xref target="RFC6941" />
build upon this, focusing largely on the MPLS-TP OAM additions
and OAM channels with some attention given to using network
management in place of control plane setup. In both security
framework documents MPLS is assumed to run within a "trusted
zone", defined as being where a single service provider (SP)
has total operational control over that part of the network.
</t>
<t>
If control plane security and management plane security are
sufficiently robust, compromise of a single network element
may result in chaos in the data plane anywhere in the network
through denial of service attacks, but not a Byzantine
security failure in which other network elements are fully
compromised.
</t>
<t>
MPLS security, or lack of, can affect whether traffic can be
misrouted and lost, or intercepted, or intercepted and
reinserted (a man-in-the-middle attack) or spoofed. End user
applications, including control plane and management plane
protocols used by the SP, are expected to make use of
appropriate end-to-end authentication and where appropriate
end-to-end encryption.
</t>
</section>
<section title="Organization of References Section">
<t>
The References section is split into Normative and Informative
subsections. References that directly specify forwarding
encapsulations or behaviors are listed as normative.
References which describe signaling only, though normative
with respect to signaling, are listed as informative. They
are informative with respect to MPLS forwarding.
</t>
</section>
</middle>
<back>
<references title="Normative References">
&RFC2119;
&RFC3032;
&RFC3209;
&RFC3270;
&RFC3443;
&RFC4090;
&RFC4182;
&RFC4201;
&RFC4385;
&RFC4950;
&RFC5082;
&RFC5085;
&RFC5129;
&RFC5332;
&RFC5586;
&RFC5880;
&RFC5884;
&RFC5885;
&RFC6374;
&RFC6375;
&RFC6378;
&RFC6391;
&RFC6427;
&RFC6428;
&RFC6790;
&I-D.ietf-mpls-psc-updates;
</references>
<references title="Informative References">
&RFC0791;
&RFC2474;
&RFC2475;
&RFC2597;
&RFC3031;
&RFC3168;
&RFC3429;
&RFC3471;
&RFC3550;
&RFC3828;
&RFC3985;
&RFC4023;
&RFC4110;
&RFC4124;
&RFC4206;
&RFC4221;
&RFC4340;
&RFC4377;
&RFC4379;
&RFC4664;
&RFC4817;
&RFC4875;
&RFC4928;
&RFC4960;
&RFC5036;
&RFC5102;
&RFC5286;
&RFC5317;
&RFC5462;
&RFC5470;
<!-- RFC5513; -->
&RFC5640;
&RFC5695;
&RFC5704;
&RFC5714;
&RFC5715;
&RFC5860;
&RFC5905;
&RFC5920;
&RFC6291;
&RFC6310;
&RFC6371;
&RFC6388;
&RFC6424;
&RFC6425;
&RFC6426;
&RFC6435;
&RFC6438;
&RFC6478;
&RFC6639;
&RFC6669;
&RFC6670;
&RFC6720;
&RFC6829;
&RFC6941;
<!-- RFC6976; -->
&RFC6894;
&RFC6981;
&RFC7023;
&RFC7074;
&RFC7079;
&I-D.ietf-mpls-in-udp;
&I-D.ietf-mpls-special-purpose-labels;
&I-D.ietf-tictoc-1588overmpls;
&I-D.ietf-rtgwg-remote-lfa;
&I-D.ietf-rtgwg-mrt-frr-architecture;
<reference anchor="ACK-compression">
<front>
<title>Observations and Dynamics of a Congestion Control
Algorithm: The Effects of Two-Way Traffic</title>
<author fullname="Zhang, L." />
<author fullname="Shenker, S" />
<author fullname="Clark, D. D." />
<date year="1991" />
</front>
<seriesInfo name="Proc. ACM SIGCOMM, ACM Computer
Communications Review (CCR)"
value="Vol 21, No 4, 1991, pp.133-147." />
</reference>
</references>
</back>
</rfc>
| PAFTECH AB 2003-2026 | 2026-04-23 16:23:19 |