One document matched: draft-ietf-rtgwg-cl-requirement-01.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- xml2rfc is available at http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [

  <!-- citation reference entities
       from http://xml.resource.org/public/rfc/bibxml -->

  <!ENTITY RFC1717 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1717.xml">
  <!ENTITY RFC2475 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2475.xml">
  <!ENTITY RFC2615 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2615.xml">
  <!ENTITY RFC2702 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2702.xml">
  <!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
  <!ENTITY RFC2991 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2991.xml">
  <!ENTITY RFC2992 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2992.xml">
  <!ENTITY RFC3031 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3031.xml">
  <!ENTITY RFC3468 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3468.xml">
  <!ENTITY RFC3809 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3809.xml">
  <!ENTITY RFC3260 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3260.xml">
  <!ENTITY RFC4031 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4031.xml">
  <!ENTITY RFC4201 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4201.xml">
  <!ENTITY RFC4301 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4301.xml">
  <!ENTITY RFC4385 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4385.xml">
  <!ENTITY RFC4665 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4665.xml">
  <!ENTITY RFC4928 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4928.xml">
  <!ENTITY RFC5254 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5254.xml">

<!ENTITY I-D.ietf-pwe3-fat-pw SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-pwe3-fat-pw-03.xml">

  ]>

<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<?rfc tocdepth="4"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<?rfc comments="yes"?>
<?rfc inline="yes" ?>

<!-- we have lost some comments and some changes based on email message:
     Message-ID:
     201004110429.o3B4TEmM096179@harbor.orleans.occnc.com
     793F49BA1FC821409F99F10862A0E4DB06827EA1@FHDP1LUMXCV14.us.one.verizon.com
  -->

<rfc category="info" ipr="trust200902"
     docName="draft-ietf-rtgwg-cl-requirement-01">

  <front>
    <title abbrev="Composite Link Requirements">
      Requirements for MPLS Over a Composite Link</title>

    <author role="editor"
	    fullname="Curtis Villamizar" initials="C." surname="Villamizar">
      <organization>Infinera Corporation</organization>
      <address>
        <postal>
          <street>169 W. Java Drive</street>
          <city>Sunnyvale, CA</city>
	  <code>94089</code>
        </postal>
	<email>cvillamizar@infinera.com</email>
      </address>
    </author>

    <author role="editor"
	    fullname="Dave McDysan" initials="D." surname="McDysan">
      <organization>Verizon</organization>
      <address>
        <postal>
          <street>22001 Loudoun County PKWY</street>
          <city>Ashburn, VA</city>
	  <code>20147</code>
        </postal>
        <email>dave.mcdysan@verizon.com</email>
      </address>
    </author>

    <author
	    fullname="So Ning" initials="S." surname="Ning">
      <organization>Verizon</organization>
      <address>
        <postal>
          <street>2400 N. Glenville Ave.</street>
          <city>Richardson, TX</city>
	  <code>75082</code>
        </postal>
        <phone>+1 972-729-7905</phone>
        <email>ning.so@verizonbusiness.com</email>
      </address>
    </author>

    <author
	    fullname="Andrew Malis" initials="A." surname="Malis">
      <organization>Verizon</organization>
      <address>
        <postal>
          <street>117 West St.</street>
          <city>Waltham, MA</city>
	  <code>02451</code>
        </postal>
        <phone>+1 781-466-2362</phone>
        <email>andrew.g.malis@verizon.com</email>
      </address>
    </author>

    <author
	    fullname="Lucy Yong" initials="L." surname="Yong">
      <organization>Huawei USA</organization>
      <address>
        <postal>
          <street>1700 Alma Dr. Suite 500</street>
          <city>Plano, TX</city>
	  <code>75075</code>
        </postal>
        <phone>+1 469-229-5387</phone>
        <email>lucyyong@huawei.com</email>
      </address>
    </author>



<!--
    <author
	    fullname="Frederic Jounay" initials="F." surname="Jounay">
      <organization>France Telecom</organization>
      <address>
        <postal>
          <street>2, avenue Pierre-Marzin</street>
	  <code>22307</code>
          <city>Lannion Cedex</city>
	  <country>France</country>
        </postal>
        <email>frederic.jounay@orange-ftgroup.com</email>
      </address>
    </author>

    <author
	    fullname="Yuji Kamite" initials="Y." surname="Kamite">
      <organization>NTT Communications Corporation</organization>
      <address>
        <postal>
          <street>Granpark Tower</street>
          <street>3-4-1 Shibaura, Minato-ku</street>
          <city>Tokyo</city>
	  <code>108-8118</code>
	  <country>Japan</country>
        </postal>
        <email>y.kamite@ntt.com</email>
      </address>
    </author>
 -->

    <date month="July" year="2010" />

    <!-- Meta-data Declarations -->

    <area>Routing</area>
    <workgroup>RTGWG</workgroup>

    <keyword>MPLS</keyword>
    <keyword>composite link</keyword>
    <keyword>link aggregation</keyword>
    <keyword>ECMP</keyword>
    <keyword>link bundling</keyword>
    <keyword>delay metric</keyword>

    <abstract>
      <t>
	There is often a need to provide large aggregates of bandwidth
	that is best provided using parallel links between routers or
	MPLS LSR.  In core networks there is often no alternative
	since the aggregate capacities of core networks today far
	exceed the capacity of a single physical link or single packet
	processing element. Furthermore, links may be composed of
	network elements operating across multiple layers.
      </t>
      <t>
	The presence of parallel links, potentially comprised of
	multiple layers has resulted in a additional requirements.
	Certain services may benefit from being restricted to a subset
	of the set of composite link component links or a specific
	component link, where component link characteristics, such as
	latency, differ.  Certain services require that LSP be treated
	as atomic and avoid reordering.  Other services will continue
	to require only that reordering not occur with a microflow as
	is current practice.
      </t>
      <t>
	Current practice related to multipath is described briefly in
	an appendix.
      </t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>
	The purpose of this document is to describe why network
	operators require certain functions in order to solve certain
	business problems (<xref target="assumptions" />). The intent
	is to first describe why things need to be done in terms of
	functional requirements that are as independent as possible of
	protocol specifications (<xref target="FR" />). For certain
	functional requirements this document describes a set of
	derived protocol requirements (<xref target="DR" />).  Three
	appendices provide supporting details as a summary of
	existing/prior operator approaches, a summary of implementation
	techniques and relevant protocol standards, and a summary of G.800
	terminology used to define the concept of a composite link.
	(<xref target="multipath-bcp" />).
      </t>

      <section title="Requirements Language">
        <t>
	  The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
          "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY",
          and "OPTIONAL" in this document are to be interpreted as
          described in <xref target="RFC2119">RFC 2119</xref>.
	</t>
      </section>
    </section>

    <section anchor="assumptions" title="Assumptions">
      <t>
	The services supported include L3VPN, L2VPN (VPWS and VPLS),
	Internet traffic encapsulated by at least one MPLS label, and
	dynamically signaled MPLS-TP LSPs and pseudowires. The MPLS
	LSPs supporting these services may be pt-pt, pt-mpt, or
	mpt-mpt.
      </t>
      <t>
	The location in a network where these requirements apply are a
	Label Edge Router (LER) or a Label Switch Router (LSR) as
	defined in <xref target="RFC3031">RFC 3031</xref>.
      </t>
      <t>
	The IP DSCP cannot be used for flow identification since L3VPN
	requires Diffserv transparency (see <xref target="RFC4031">RFC
	4031 5.5.2</xref>), and in general network operators do not
	rely on the DSCP of Internet packets.
<!-- DM: I recall that a comment from the list proposing deletion of
     "negative" requirements.  It is duplicated in Appendix A. -->
      </t>
    </section>

    <section anchor="def" title="Definitions">
      <t>
	<list hangIndent="4" style="hanging">
	  <t hangText="Composite Link:">
	    <vspace blankLines="0" />
	    <xref target="ITU-T.G.800">Section 6.9.2 of
	    ITU-T-G.800</xref> defines composite link as summarized in
	    Appendix
	    <xref target="G.800-Definitions"></xref>. The following
	    definitions map the ITU-T G.800 terminology into IETF
	    terminology which is used in this document.
	    <list hangIndent="4" style="hanging">
	      <t hangText="Multiple parallel links:">
		When multiple parallel component links between the an
		LER/LSR and another LER/LSR.
	      </t>
	      <t hangText="Multi-layer Component Link:">
		A component link that is formed by other network
		elements at other layers.
	      </t>
	    </list>
	  </t>
	  <t hangText="Component Link:">
	    A physical link (e.g., Lambda, Ethernet PHY, SONET/SDH,
  	    OTN, etc.) with packet transport capability, or a logical
  	    link (e.g., MPLS LSP, Ethernet VLAN, MPLS-TP LSP, etc.)
	  </t>
	  <t hangText="Flow:">
	    A sequence of packets that must be transferred on one component
	    link.
<!-- should we add "in order to maintain packet order"? DM: Reordering
     is either not allowed or allowed subject to a specified frequency
     in the requirements and stating only the first case as a
     definition woud be inconsistent.-->
	  </t>
	  <t hangText="Flow identification:">
	    The label stack and other information that uniquely
	    identifies a flow.  Other information in flow
	    identification may include an IP header, PW control word,
	    Ethernet MAC address, etc.  Note that an LSP may contain
	    one or more Flows or an LSP may be equivalent to a Flow.
	    Flow identification is used to locally select a component
	    link, or a path through the network toward the
	    destination.
	  </t>
	</list>
      </t>
    </section>

    <section anchor="FR" title="Network Operator Functional Requirements">
      <t>
	The Functional Requirements in this section are grouped in
	subsections starting with the highest priority.
      </t>
      <section anchor="it-works"
	       title="Availability, Stability and Transient Response">
	<t>
	  Limiting the period of unavailability in response to
	  failures or transient events is extremely important as well
	  as maintaining stability. The transient period between some
	  service disrupting event and the convergence of the routing
	  and/or signaling protocols MUST occur within a time frame
	  specified by SLA objectives. The timeframes range from rapid
	  restoration, on the order of 100 ms or less (e.g., for
	  VPWS), to several minutes (e.g., for L3VPN) and may differ
	  among the set of customers within a single service.
	  <list counter="fr" hangIndent="4" style="format FR#%d">
	    <t>
	      The solution SHALL provide a means to summarize routing
	      advertisements regarding the characteristics of a
	      composite link such that the routing protocol
	      convergence within the timeframe needed to meet the SLA
	      objective..
	    </t>
	    <t>
	      The solution SHALL provide a means for aggregating
	      signaling such that in response to a failure in the
	      worst case cross section of the network that MPLS LSPs
	      are restored within the timeframe needed to meet the SLA
	      objective.
	    </t>
<!-- foo/bar notation is unclear - suggest we reword DM: OK Now?-->
	    <t>
	      The solution SHALL provide to select a path for a flow
	      across a network that contains a number of paths
	      comprised of pairs of nodes connected by composite links
	      in such a way as to automatically distribute the load
	      over the network nodes connected by composite links
	      while meeting all of the other mandatory requirements
	      stated above. The solution SHOULD work in a manner
	      similar to that when the characteristics of the
	      individual component links are advertised.
<!-- Is there any mechanism in mind to coordinate balancing across
     multiple nodes within a site or is this science fiction?
     DM: Reworded. We do have customer requirements like this.
  -->
	    </t>
	    <t>
	      If extensions to existing protocols are specified and/or
	      new protocols are defined, then the solution SHOULD
	      provide a means for a network operator to migrate an
	      existing deployment in a minimally disruptive manner.
	    </t>
	    <t>
	      Any automatic LSP routing and/or load balancing
	      solutions MUST not oscillate such that performance
	      observed by users changes such that an SLA is
	      violated. Since oscillation may cause reordering, there
	      MUST be means to control the frequency of changing the
	      component link over which a flow is placed.
	    </t>
<!-- need to mention minimized reordering somewhere DM: OK Now?-->
	    <t>
	      Management and diagnostic protocols MUST be able to
	      operate over composite links.
	    </t>
<!-- if you mean MPLS-TP OAM, then please say so DM: This came from
     NTT, I think scope is broader.-->
	  </list>
	</t>
      </section>
      <section anchor="layering"
	       title="Component Links Provided by Lower Layer
	       Networks">
	<t>
	  Case 3 as defined in <xref target="ITU-T.G.800" /> involves
	  a component link supporting an MPLS layer network over
	  another lower layer network (e.g., circuit switched or
	  another MPLS network (e.g., MPLS-TP)). The lower layer
	  network may change the latency (and/or other performance
	  parameters) seen by the MPLS layer network. Network
	  Operators have SLAs of which some components are based on
	  performance parameters. Currently, there is no protocol for
	  the lower layer network to inform the higher layer network
	  of a change in a performance parameter. Communication of the
	  latency performance parameter is a very important
	  requirement. Communication of other performance parameters
	  (e.g., delay variation) is desirable.
          <list counter="fr" hangIndent="4" style="format FR#%d">
	    <t>
	      In order to support network SLAs and provide acceptable
	      user experience, the solution SHALL specify a protocol
	      means to allow a lower layer server network to
	      communicate latency to the higher layer client network.
	    </t>
	    <t>
	      The precision of latency reporting SHOULD be at least 10%
	      of the one way latency for latency of 1 ms or more.
	    </t>
	    <t>
	      The solution SHALL provide a means to limit the latency
	      on a per LSP basis between nodes within a network to
	      meet an SLA target when the path between these nodes
	      contains one or more pairs of nodes connected via a
	      composite link.  <vspace blankLines="1" /> The SLAs
	      differ across the services, and some services have
	      different SLAs for different QoS classes, for example,
	      one QoS class may have a much larger latency bound than
	      another. Overload can occur which would violate an SLA
	      parameter (e.g., loss) and some remedy to handle this
	      case for a composite link.
	    </t>
	    <t>
	      If the total demand offered by traffic flows exceeds the
	      capacity of the composite link, the solution SHOULD define
	      a means to cause the LSPs for some traffic flows to move
	      to some other point in the network that is not
	      congested. These "preempted LSPs" may not be restored if
	      there is no uncongested path in the network.
	    </t>
	  </list>
	</t>
      </section>
      <section anchor="multipath-diff"
	       title="Parallel Component Links with Different Characteristics">
	<t>
	  Corresponding to Case 1 of <xref target="ITU-T.G.800" />, as
	  one means to provide high availability, network operators
	  deploy a topology in the MPLS network using lower layer
	  networks that have a certain degree of diversity at the
	  lower layer(s).
	  Many techniques have been developed to balance the
	  distribution of flows across component links that connect
	  the same pair of nodes (See <xref target="multipath-lag"
	  />).  When the path for a flow can be chosen from a set of
	  candidate nodes connected via composite links, other
	  techniques have been developed (See
	  <xref target="multipath-mp" />).
<!-- The following sections break the requirements into three cases
     determined by the connectivity of the component links: a) same
     pair of nodes or sites, b) same pair of nodes only, c) component
     links connecting multiple pairs of nodes in a pair of sites. -->
<!-- The set of case a, b, c above doesn't make sense.  Case a seems
     to be the superset of case b and c.  If that is the intent, then
     the text needs to be clear about it.  DM: That was the idea, case
     a applies to both b and c to reduce amount of text. Rewrote this
     however.  -->
	  <list counter="fr" hangIndent="4" style="format FR#%d">
            <t>
	      The solution SHALL measure traffic on a labeled traffic
	      flow and dynamically select the component link on which
	      to place this flow in order to balance the load so that
	      no component link in the composite link between a pair
	      of nodes is overloaded.
	    </t>
	    <t>
	      When a traffic flow is moved from one component link to
	      another in the same composite link between a set of
	      nodes (or sites), it MUST be done so in a minimally
	      disruptive manner.  <vspace blankLines="1" /> When a
	      flow is moved from a current link to a target link with
	      different latency, reordering can occur if the target
	      link latency is less than that of the current or
	      clumping can occur if target link latency is greater
	      than that of the current. Therefore, some flows (e.g.,
	      timing distribution, PW circuit emulation) are quite
	      sensitive to these effects, which may be specified in an
	      SLA or are needed to meet a user experience objective
	      (e.g. jitter buffer under/overrun).
	    </t>
<!-- There are a few erros in the above paragraph.  New delay greater
     than old delay results in a gap.  New delay less than old delay
     results in reorder.  It is not practical to put playback buffers
     in the network core. DM: Good catch. OK Now?  -->
	    <t>
	      The solution SHALL provide a means to identify flows
	      whose rearrangement frequency needs to be bounded by a
	      configured value.
	    </t>
	    <t>
	      The solution SHALL provide a means that communicates
	      whether the flows within an LSP can be split across
	      multiple component links. The solution SHOULD provide a
	      means to indicate the flow identification field(s) which
	      can be used along the flow path which can be used to
	      perform this function.
<!-- does not parse - reword.  makes sense but grammar error. DM: OK Now?-->
	    </t>
	    <t>
	      The solution SHALL provide a means to indicate that a
	      traffic flow shall select a component link with the
	      minimum latency value.
	    </t>
	    <t>
	      The solution SHALL provide a means to indicate that a
	      traffic flow shall select a component link with a
	      maximum acceptable latency value as specified by
	      protocol.
<!-- or a targer latency? DM: Same as Max acceptable from Ning?-->
	    </t>
	    <t>
	      The solution SHALL provide a means to indicate that a
	      traffic flow shall select a component link with a
	      maximum acceptable delay variation value as specified by
	      protocol.
	    </t>
	    <t>
	      The solution SHALL provide a local means to a node which
	      automatically distribute flows across the component
	      links in the composite link that connects to the other
	      node such that SLA objectives are met.
	    </t>
	    <t>
	      The solution SHALL provide a means to distribute flows
	      from a single LSP across multiple component links to
	      handle at least the case where the traffic carried in an
	      LSP exceeds that of any component link in the composite
	      link.
	    </t>
	  </list>
	</t>
      </section>
    </section>

    <section anchor="DR" title="Derived Requirements">
      <t>
	This section takes the next step and derives high-level
	requirements on protocol specification from the functional
	requirements.
	<list counter="dr" hangIndent="4" style="format DR#%d">
	  <t>
	    The solution SHOULD attempt to extend existing protocols
	    wherever possible, developing a new protocol only if this
	    adds a significant set of capabilities.
	    <vspace blankLines="1" /> The vast majority of network
	    operators have provisioned L3VPN services over LDP. Many
	    have deployed L2VPN services over LDP as well. TE
	    extensions to IGP and RSVP-TE are viewed as being overly
	    complex by some operators.
<!-- This is not worded as a requirement. -->
	  </t>
	  <t>
	    A solution SHOULD extend LDP capabilities to meet
	    functional requirements (without using TE methods as
	    decided in <xref target="RFC3468" />).
	  </t>
	  <t>
	    Coexistence of LDP and RSVP-TE signaled LSPs MUST be
	    supported on a composite link. Other functional
	    requirements should be supported as independently of
	    signaling protocol as possible.
	  </t>
	  <t>
	    When the nodes connected via a composite link are in the
	    same MPLS network topology, the solution MAY define
	    extensions to the IGP.
	  </t>
	  <t>
	    When the nodes are connected via a composite link are in
	    different MPLS network topologies, the solution SHALL NOT
	    rely on extensions to the IGP.
	  </t>
<!-- Terminology of "same network" is not precise.  For example, by
     some definitions all nodes on the Internet are on the same
     network.  Maybe "same network domain" or "same topology" would be
     better.  -->
	  <t>
	    When a worst case failure scenario occurs,the resulting
	    number of links advertised in the IGP causes IGP
	    convergence to occur, causing a period of unavailability
	    as perceived by users.  The convergence time of the
	    solution MUST meet the SLA objective for the duration of
	    unavailability.
<!-- The sentence needs rewording. DM: Better now?-->
	  </t>
	  <t>
	    The Solution SHALL summarize the characteristics of the
	    component links as a composite link IGP advertisement that
	    results in convergence time better than that of
	    advertising the individual component links.  This summary
	    SHALL be designed so that it represents the range of
	    capabilities of the individual component links such that
	    functional requirements are met, and also minimizes the
	    frequency of advertisement updates which may cause IGP
	    convergence to occur. Examples of advertisement update
	    tiggering events to be considered include: LSP
	    establishment/release, changes in component link
	    characteristics (e.g., latency, up/down state), and/or
	    bandwidth utilization.
	  </t>
	  <t>
	    When a worst case failure scenario occurs,the resulting
	    number of links advertised in the IGP causes IGP
	    convergence to occur, causing a period of unavailability
	    as perceived by users.  The convergence time of the
	    solution MUST meet the SLA objective for the duration of
	    unavailability.
	  </t>
	  <t>
	    When a worst case failure scenario occurs, the number of
	    RSVP-TE LSPs to be resignaled will cause a period of
	    unavailability as perceived by users. The resignaling time
	    of the solution MUST meet the SLA objective for the
	    duration of unavailability. The resignaling time of the
	    solution MUST not increase significantly as compared with
	    current methods.
<!-- Same here.  Some rewording is needed. DM: Better now?-->
	  </t>
	</list>
      </t>
    </section>

    <section anchor="Acknowledgements" title="Acknowledgements">
      <t>
	Frederic Jounay of France Telecom and Yuji Kamite of NTT
	Communications Corporation co-authored a version of this
	document.
      </t>
      <t>
	A rewrite of this document occurred after the IETF77 meeting.
	Dimitri Papadimitriou, Lou Berger, Tony Li, the WG chairs John
	Scuder and Alex Zinin, and others provided valuable guidance
	prior to and at the IETF77 RTGWG meeting.
      </t>
      <t>
	Tony Li and John Drake have made numerous valuable comments on
	the RTGWG mailing list that are reflected in versions
	following the IETF77 meeting.
      </t>
    </section>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>This memo includes no request to IANA.</t>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>
	This document specifies a set of requirements.  The
	requirements themselves do not pose a security threat.  If
	these requirements are met using MPLS signaling as commonly
	practiced today with authenticated but unencrypted OSPF-TE,
	ISIS-TE, and RSVP-TE or LDP, then the requirement to provide
	additional information in this communication presents
	additional information that could conceivably be gathered in a
	man-in-the-middle confidentiality breach.  Such an attack
	would require a capability to monitor this signaling either
	through a provider breach or access to provider physical
	transmission infrastructure.  A provider breach already poses
	a threat of numerous tpes of attacks which are of far more
	serious consequence.  Encrption of the signaling can prevent
	or render more difficult any confidentiality breach that
	otherwise might occur by means of access to provider physical
	transmission infrastructure.
      </t>
    </section>
  </middle>

  <back>

    <references title="Normative References">

      &RFC2119;

    </references>

    <references title="Informative References">

      &RFC2702;

      &RFC3031;

      &RFC3468;

      &RFC3809;

      &RFC4031;

      &RFC4665;

      &RFC5254;

      <reference anchor="ITU-T.G.800"
                 target="http://www.itu.int/rec/T-REC-G/recommendation.asp?parent=T-REC-G.800">
        <front>
          <title>Unified functional architecture of transport
          networks</title>

          <author>
            <organization>ITU-T</organization>
          </author>

          <date year="2007" />
        </front>
      </reference>

    </references>

    <references title="Appendix References">

      <!-- add diffserv framework -->

      &RFC1717;

      &RFC2475;

      &RFC2615;

      &RFC2991;

      &RFC2992;

      &RFC3260;

      &RFC4201;

      &RFC4301;

      &RFC4385;

      &RFC4928;

      &I-D.ietf-pwe3-fat-pw;

      <reference anchor="ITU-T.Y.1541"
                 target="http://www.itu.int/rec/T-REC-Y.1541/en">
        <front>
          <title>Network performance objectives for IP-based services</title>

          <author>
            <organization>ITU-T</organization>
          </author>

          <date year="2006" />
        </front>
      </reference>

      <reference anchor="IEEE-802.1AX"
                 target="http://standards.ieee.org/getieee802/download/802.1AX-2008.pdf">
        <front>
          <title>IEEE Std 802.1AX-2008 IEEE Standard for
	    Local and Metropolitan Area Networks - Link Aggregation</title>

          <author>
            <organization>IEEE Standards Association</organization>
          </author>

          <date year="2006" />
        </front>
      </reference>

    </references>

    <section anchor="story-time"
	     title="More Details on Existing Network Operator
	     Practices and Protocol Usage">
      <t>
	Network operators have SLAs for services that are comprised of
	numerical values for performance measures, principally
	availability, latency, delay variation.  See
	<xref target="ITU-T.Y.1541" />, <xref target="RFC3809">RFC
	3809, Section 4.9</xref> for examples of the form of such
	SLAs. Note that the numerical values of Y.1541 span multiple
	networks and may be looser than network operator SLAs.
	Applications and acceptable user experience have a
	relationship to these performance parameters.
      </t>
      <t>
	Consider latency as an example. In some cases, minimizing
	latency relates directly to the best customer experience
	(e.g., in TCP closer is faster). I other cases, user
	experience is relatively insensitive to latency, up to a
	specific limit at which point user perception of quality
	degrades significantly (e.g., interactive human voice and
	multimedia conferencing). A number of SLAs have. a bound on
	point-point latency, and as long as this bound is met, the SLA
	is met -- decreasing the latency is not necessary. In some
	SLAs, if the specified latency is not met, the user considers
	the service as unavailable. An unprotected LSP can be manually
	provisioned on a set of to meet this type of SLA, but this
	lowers availability since an alternate route that meets the
	latency SLA cannot be determined.
      </t>
      <t>
	Historically, when an IP/MPLS network was operated over a
	lower layer circuit switched network (e.g., SONET rings), a
	change in latency caused by the lower layer network (e.g., due
	to a maintenance action or failure) this was not known to the
	MPLS network. This resulted in latency affecting end user
	experience, sometimes violating SLAs or resulting in user
	complaints.
      </t>
      <t>
	A response to this problem was to provision IP/MPLS networks
	over unprotected circuits and set the metric and/or TE-metric
	proportional to latency. This resulted in traffic being
	directed over the least latency path, even if this was not
	needed to meet an SLA or meet user experience objectives. This
	results in reduced flexibility and increased cost for network
	operators. Using lower layer networks to provide restoration
	and grooming is expected to be more efficient, but the
	inability to communicate performance parameters, in particular
	latency, from the lower layer network to the higher layer
	network is an important problem to be solved before this can
	be done.
      </t>
      <t>
	Latency SLAs for pt-pt services are often tied closely to
	geographic locations, while latency for multipoint services may
	be based upon a worst case within a region.
      </t>
      <t>
	The presence of only three Traffic Class (TC) bits (previously
	known as EXP bits) in the MPLS shim header is limiting when a
	network operator needs to support QoS classes for multiple
	services (e.g., L2VPN VPWS, VPLS, L3VPN and Internet), each of
	which has a set of QoS classes that need to be supported. In
	some cases one bit is used to indicate conformance to some
	ingress traffic classification, leaving only two bits for
	indicating the service QoS classes. The approach that has been
	taken is to aggregate these QoS classes into similar sets on
	LER-LSR and LSR-LSR links.
      </t>
      <t>
	Labeled LSPs have been and use of link layer encapsulation
	have been standardized in order to provide a means to meet
	these needs.
      </t>
      <t>
	The IP DSCP cannot be used for flow identification since
	<xref target="RFC4301">RFC 4301 Section 5.5</xref> requires
	Diffserv transparency, and in general network operators do not
	rely on the DSCP of Internet packets.
      </t>
      <t>
	A label is pushed onto Internet packets when they are carried
	along with L2/L3VPN packets on the same link or lower layer
	network provides a mean to distinguish between the QoS class
	for these packets.
      </t>
      <t>
	Operating an MPLS-TE network involves a different paradigm
	from operating an IGP metric-based LDP signaled MPLS
	network. The mpt-pt LDP signaled MPLS LSPs occur
	automatically, and balancing across parallel links occurs if
	the IGP metrics are set "equally" (with equality a locally
	definable relation).
      </t>
      <t>
	Traffic is typically comprised of a few large (some very
	large) flows and many small flows. In some cases, separate
	LSPs are established for very large flows. This can occur even
	if the IP header information is inspected by a router, for
	example an IPsec tunnel that carries a large amount of
	traffic. An important example of large flows is that of
    a L2/L3 VPN customer who has an access line bandwdith comparable to a
    client-client composite link bandwidth -- there could be flows that are
    on the order of the access line bandwdith.
      </t>
    </section>

    <section anchor="multipath-bcp"
	     title="Existing Multipath Standards and Techniques">
      <t>
	Today the requirement to handle large aggregations of traffic,
	much larger than a single component link, can be handled by a
	number of techniques which we will collectively call
	multipath.  Multipath applied to parallel links between the
	same set of nodes includes Ethernet Link Aggregation
	<xref target="IEEE-802.1AX" />,
	<xref target="RFC4201">link bundling</xref>, or other
	aggregation techniques some of which may be vendor specific.
	Multipath applied to diverse paths rather than parallel links
	includes Equal Cost MultiPath (ECMP) as applied to OSPF, ISIS,
	or even BGP, and equal cost LSP, as described
	in <xref target="multipath-mp" />.  Various mutilpath techniques
	have strengths and weaknesses.
      </t>
      <t>
	The term composite link is more general than terms such as
	link aggregate which is generally considered to be specific to
	Ethernet and its use here is consistent with the broad
	definition in <xref target="ITU-T.G.800" />.  The term
	multipath excludes inverse multiplexing and refers to
	techniques which only solve the problem of large aggregations
	of traffic, without addressing the other requirements outlined
	in this document.
      </t>
      <section anchor="multipath-common"
	       title="Common Multpath Load Spliting Techniques">
	<t>
	  Identical load balancing techniqes are used for multipath
	  both over parallel links and over diverse paths.
	</t>
	<t>
	  Large aggregates of IP traffic do not provide explicit
	  signaling to indicate the expected traffic loads.  Large
	  aggregates of MPLS traffic are carried in MPLS tunnels
	  supported by MPLS LSP.  LSP which are signaled using RSVP-TE
	  extensions do provide explicit signaling which includes the
	  expected traffic load for the aggregate.  LSP which are
	  signaled using LDP do not provide an expected traffic load.
	</t>
	<t>
	  MPLS LSP may contain other MPLS LSP arranged hierarchically.
	  When an MPLS LSR serves as a midpoint LSR in an LSP carrying
	  other LSP as payload, there is no signaling associated with
	  these inner LSP.  Therefore even when using RSVP-TE
	  signaling there may be insufficient information provided by
	  signaling to adequately distribute load across a composite
	  link.
	</t>
	<t>
	  Generally a set of label stack entries that is unique across
	  the ordered set of label numbers can safely be assumed to
	  contain a group of flows.  The reordering of traffic can
	  therefore be considered to be acceptable unless reordering
	  occurs within traffic containing a common unique set of
	  label stack entries.  Existing load splitting techniques
	  take advantage of this property in addition to looking
	  beyond the bottom of the label stack and determining if the
	  payload is IPv4 or IPv6 to load balance traffic accordingly.
	</t>
	<t>
	  MPLS-TP OAM violates the assumption that it is safe to
	  reorder traffic within an LSP.  If MPLS-TP OAM is to be
	  accommodated, then existing multipth techniques must be
	  modified.  Such modifications are outside the scope of this
	  document.
	</t>
	<t>
	  For example a large aggregate of IP traffic may be
	  subdivided into a large number of groups of flows using a
	  hash on the IP source and destination addresses.  This is as
	  described in <xref target="RFC2475" /> and clarified in
	  <xref target="RFC3260" />.  For MPLS traffic carrying IP, a
	  similar hash can be performed on the set of labels in the
	  label stack.  These techniques are both examples of means to
	  subdivide traffic into groups of flows for the purpose of
	  load balancing traffic across aggregated link capacity.  The
	  means of identifying a flow should not be confused with the
	  definition of a flow.
	</t>
	<t>
	  Discussion of whether a hash based approach provides a
	  sufficiently even load balance using any particular hashing
	  algorithm or method of distributing traffic across a set of
	  component links is outside of the scope of this document.
	</t>
	<t>
	  The current load balancing techniques are referenced in
	  <xref target="RFC4385" /> and <xref target="RFC4928" />.
	  The use of three hash based approaches are described in
	  <xref target="RFC2991" /> and <xref target="RFC2992" />.  A
	  mechanism to identify flows within PW is described in
	  <xref target="I-D.ietf-pwe3-fat-pw" />.  The use of hash
	  based approaches is mentioned as an example of an existing
	  set of techniques to distribute traffic over a set of
	  component links.  Other techniques are not precluded.
	</t>
      </section>
      <section anchor="multipath-active"
	       title="Simple and Adaptive Load Balancing Multipath">
	<t>
	  Simple multipath generally relies on the mathematical
	  probability that given a very large number of small
	  microflows, these microflows will tend to be distributed
	  evenly across a hash space.  A common simple multipath
	  implementation assumes that all members (component links)
	  are of equal capacity and perform a modulo operation across
	  the hashed value.  An alternate simple multipath technique
	  uses a table generally with a power of two size, and
	  distributes the table entries proportionally among members
	  according to the capacity of each member.
	  </t>
	  <t>
	  Simple load balancing works well if there are a very large
	  number of small microflows (i.e., microflow rate is much
	  less than component link capacity).  However, the case where
	  there are even a few large microflows is not handled well by
	  simple load balancing.
	  </t>
	<t>
	  An adaptive multipath technique is one where the traffic
	  bound to each member (component link) is measured and the
	  load split is adjusted accordingly.  As long as the
	  adjustment is done within a single network element, then no
	  protocol extensions are required and there are no
	  interoperability issues.
	</t>
	<t>
	  Note that if the load balancing algorithm and/or its
	  parameters is adjusted, then packets in some flows may be
	  delivered out of sequence.
	</t>

      </section>
      <section anchor="multipath-lag"
	       title="Traffic Split over Parallel Links">
	<t>
	  The load spliting techniques defined in
	  <xref target="multipath-common" /> and
	  <xref target="multipath-active" /> are both used in
	  splitting traffic over parallel links between the same pair
	  of nodes.  The best known technique, though far from being
	  the first, is
	  <xref target="IEEE-802.1AX">Ethernet Link
	  Aggregation</xref>.  This same technique had been applied
	  much earlier using OSPF or ISIS Equal Cost MultiPath (ECMP)
	  over parallel links between the same
	  nodes.  <xref target="RFC1717"> Multilink PPP</xref> uses a
	  technique that provides inverse multiplexing, however a
	  number of vendors had provided proprietary extensions to
	  <xref target="RFC2615">PPP over SONET/SDH</xref> that
	  predated Ethernet Link Aggregation but are no longer used.
	</t>
	<t>
	  <xref target="RFC4201">Link bundling</xref> provides yet
	  another means of handling parallel LSP.  RFC4201 explicitly
	  allow a special value of all ones to indicate a split across
	  all members of the bundle.
	</t>
      </section>
      <section anchor="multipath-mp"
	       title="Traffic Split over Multiple Paths">
	<t>
	  OSPF or ISIS Equal Cost MultiPath (ECMP) is a well known
	  form of traffic split over multiple paths that may traverse
	  intermediate nodes.  ECMP is often incorrectly equated to
	  only this case, and multipath over multiple diverse paths is
	  often incorrectly equated to ECMP.
	</t>
	<t>
	  Many implementations are able to create more than one LSP
	  between a pair of nodes, where these LSP are routed
	  diversely to better make use of available capacity.  The
	  load on these LSP can be distributed proportionally to the
	  reserved bandwidth of the LSP.  These multiple LSP may be
	  advertised as a single PSC FA and any LSP making use of the
	  FA may be split over these multiple LSP.
	</t>
	<t>
	  <xref target="RFC4201">Link bundling</xref> component links
	  may themselves be LSP.  When this technique is used, any LSP
	  which specifies the link bundle may be split across the
	  multiple paths of the LSP that comprise the bundle.
	</t>
      </section>
    </section>

    <section anchor="G.800-Definitions"
	     title="ITU-T G.800 Composite Link Definitions and Terminology">
      <t>
	<list hangIndent="4" style="hanging">
	  <t hangText="Composite Link:">
	    <vspace blankLines="0" />
	    <xref target="ITU-T.G.800">Section 6.9.2 of
	    ITU-T-G.800</xref> defines composite link in terms of
	    three cases, of which the following two are relevant (the
	    one describing inverse (TDM) multiplexing does not
	    apply). Note that these case definitions are taken
	    verbatim from section 6.9, "Layer Relationships".
	    <list hangIndent="4" style="hanging">
	      <t hangText="Case 1:">
		"Multiple parallel links between the same subnetworks
		can be bundled together into a single composite
		link. Each component of the composite link is
		independent in the sense that each component link is
		supported by a separate server layer trail. The
		composite link conveys communication information using
		different server layer trails thus the sequence of
		symbols crossing this link may not be preserved. This
		is illustrated in Figure 14."
	      </t>
	      <t hangText="Case 3:">
		"A link can also be constructed by a concatenation of
		component links and configured channel forwarding
		relationships. The forwarding relationships must have
		a 1:1 correspondence to the link connections that will
		be provided by the client link. In this case, it is
		not possible to fully infer the status of the link by
		observing the server layer trails visible at the ends
		of the link. This is illustrated in Figure 16."
	      </t>
	    </list>
	  </t>
	  <t hangText="Subnetwork:">
	    A set of one or more nodes (i.e., LER or LSR) and links.
	    As a special case it can represent a site comprised of
	    multiple nodes.
<!-- this should be listed as a special case of a subnet DM: OK? -->
	  </t>
	  <t hangText="Forwarding Relationship:">
	    Configured forwarding between ports on a subnetwork. It
	    may be connectionless (e.g., IP, not considered in this
	    draft), or connection oriented (e.g., MPLS signaled or
	    configured).
<!-- conflict with prior statement that limits scope to MPLS with a CP
     DM: OK now?-->
	  </t>
	  <t hangText="Component Link:">
	    A topolological relationship between subnetworks (i.e., a
	    connection between nodes), which may be a wavelength,
	    circuit, virtual circuit or an MPLS LSP.
<!-- do we really mean subnetwork here or site?  DM: Site is special
     case of subnet.  If subnet, ECMP? DM: Question unclear. -->
	  </t>
	</list>
      </t>
    </section>
  </back>
</rfc>

PAFTECH AB 2003-20262026-04-24 03:15:56