One document matched: draft-ietf-rtgwg-cl-framework-04.xml
<?xml version="1.0" encoding="US-ASCII"?>
<!-- xml2rfc is available at http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2475 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2475.xml">
<!ENTITY RFC2702 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2702.xml">
<!ENTITY RFC2991 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2991.xml">
<!ENTITY RFC2992 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2992.xml">
<!ENTITY RFC3209 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3209.xml">
<!ENTITY RFC3260 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3260.xml">
<!ENTITY RFC3468 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3468.xml">
<!ENTITY RFC3471 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3471.xml">
<!ENTITY RFC3630 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3630.xml">
<!ENTITY RFC3945 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3945.xml">
<!ENTITY RFC3985 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3985.xml">
<!ENTITY RFC4201 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4201.xml">
<!ENTITY RFC4206 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4206.xml">
<!ENTITY RFC4385 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4385.xml">
<!ENTITY RFC4448 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4448.xml">
<!ENTITY RFC4655 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4655.xml">
<!ENTITY RFC4928 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4928.xml">
<!ENTITY RFC5036 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5036.xml">
<!ENTITY RFC5151 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5151.xml">
<!ENTITY RFC5152 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5152.xml">
<!ENTITY RFC5305 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5305.xml">
<!ENTITY RFC5316 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5316.xml">
<!ENTITY RFC5392 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5392.xml">
<!ENTITY RFC5420 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5420.xml">
<!ENTITY RFC5441 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5441.xml">
<!ENTITY RFC5586 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5586.xml">
<!ENTITY RFC5712 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5712.xml">
<!ENTITY RFC5786 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5786.xml">
<!ENTITY RFC5920 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5920.xml">
<!ENTITY RFC5921 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5921.xml">
<!ENTITY RFC6107 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6107.xml">
<!ENTITY RFC6374 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6374.xml">
<!ENTITY RFC6391 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6391.xml">
<!ENTITY RFC6790 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6790.xml">
<!ENTITY RFC6941 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6941.xml">
<!ENTITY I-D.ietf-rtgwg-cl-requirement SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtgwg-cl-requirement-11">
<!ENTITY I-D.ietf-rtgwg-cl-use-cases SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtgwg-cl-use-cases-04">
<!ENTITY I-D.ospf-cc-stlv SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ospf-cc-stlv-00">
<!ENTITY I-D.kompella-mpls-rsvp-ecmp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-kompella-mpls-rsvp-ecmp-03">
<!ENTITY I-D.ietf-mpls-multipath-use SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-mpls-multipath-use-00">
<!ENTITY I-D.villamizar-mpls-multipath-extn SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-villamizar-mpls-multipath-extn-00">
<!ENTITY I-D.ietf-ospf-te-metric-extensions SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-ospf-te-metric-extensions-04">
<!ENTITY I-D.previdi-isis-te-metric-extensions SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-previdi-isis-te-metric-extensions-03">
<!ENTITY I-D.atlas-mpls-te-express-path SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-atlas-mpls-te-express-path-02">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<?rfc tocdepth="4"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<?rfc comments="yes"?>
<?rfc inline="yes" ?>
<rfc category="info" ipr="trust200902"
docName="draft-ietf-rtgwg-cl-framework-04">
<front>
<title abbrev="Advanced Multipath Framework">
Advanced Multipath Framework in MPLS</title>
<author
fullname="So Ning" initials="S." surname="Ning">
<organization>Tata Communications</organization>
<address>
<email>ning.so@tatacommunications.com</email>
</address>
</author>
<author
fullname="Dave McDysan" initials="D." surname="McDysan">
<organization>Verizon</organization>
<address>
<postal>
<street>22001 Loudoun County PKWY</street>
<city>Ashburn, VA</city>
<code>20147</code>
<country>USA</country>
</postal>
<email>dave.mcdysan@verizon.com</email>
</address>
</author>
<author
fullname="Eric Osborne" initials="E." surname="Osborne">
<organization>Cisco</organization>
<address>
<email>eosborne@cisco.com</email>
</address>
</author>
<author
fullname="Lucy Yong" initials="L." surname="Yong">
<organization>Huawei USA</organization>
<address>
<postal>
<street>5340 Legacy Dr.</street>
<city>Plano, TX</city>
<code>75025</code>
<country>USA</country>
</postal>
<phone>+1 469-277-5837</phone>
<email>lucy.yong@huawei.com</email>
</address>
</author>
<author
fullname="Curtis Villamizar" initials="C." surname="Villamizar">
<organization>Outer Cape Cod Network Consulting</organization>
<address>
<email>curtis@occnc.com</email>
</address>
</author>
<date year="2013" />
<area>Routing</area>
<workgroup>RTGWG</workgroup>
<keyword>MPLS</keyword>
<keyword>Advanced Multipath</keyword>
<keyword>composite link</keyword>
<keyword>link aggregation</keyword>
<keyword>ECMP</keyword>
<keyword>link bundling</keyword>
<keyword>multipath</keyword>
<keyword>MPLS-TP</keyword>
<abstract>
<t>
This document specifies a framework for support of Advanced
Multipath in MPLS networks. As defined in this framework, an
Advanced Multipath consists of a group of homogenous or
non-homogenous links that have the same forward adjacency (FA)
and can be considered as a single TE link or an IP link when
advertised into IGP routing.
</t>
</abstract>
</front>
<middle>
<section title="Introduction">
<t>
Advanced Multipath functional requirements are specified in
<xref target="I-D.ietf-rtgwg-cl-requirement" />. Advanced
Multipath use cases are described in
<xref target="I-D.ietf-rtgwg-cl-use-cases" />. This document
specifies a framework to meet these requirements.
</t>
<t>
This document describes an Advanced Multipath framework in the
context of MPLS networks using an IGP-TE and RSVP-TE MPLS
control plane with GMPLS extensions
<xref target="RFC3209" />
<xref target="RFC3630" />
<xref target="RFC3945" />
<xref target="RFC5305" />.
</t>
<t>
Specific protocol solutions are outside the scope of this
document, however a framework for the extension of existing
protocols is provided. Backwards compatibility is best
achieved by extending existing protocols where practical
rather than inventing new protocols. The focus is on
examining where existing protocol mechanisms fall short with
respect to <xref target="I-D.ietf-rtgwg-cl-requirement" /> and
on the types of extensions that will be required to accommodate
functionality that is called for in
<xref target="I-D.ietf-rtgwg-cl-requirement" />.
</t>
<section title="Background">
<t>
Classic multipath, including Ethernet Link Aggregation has
been widely used in today's MPLS networks <xref
target="RFC4385" /><xref target="RFC4928" />. Classic
multipath using non-Ethernet links are often advertised
using MPLS Link bundling. A link bundle <xref
target="RFC4201" /> bundles a group of homogeneous links as
a TE link to make IGP-TE information exchange and RSVP-TE
signaling more scalable. An Advanced Multipath allows
bundling non-homogenous links together as a single logical
link.
</t>
<t>
An Advanced Multipath is a single logical link in MPLS network
that contains multiple parallel component links between two
MPLS LSR. Unlike a link bundle <xref target="RFC4201" />,
the component links in an Advanced Multipath can have different
properties such as cost, capacity, delay, or jitter.
</t>
</section>
<section title="Architecture Summary">
<t>
Networks aggregate information, both in the control plane
and in the data plane, as a means to achieve scalability. A
tradeoff exists between the needs of scalability and the
needs to identify differing path and link characteristics
and differing requirements among flows contained within
further aggregated traffic flows. These tradeoffs are
discussed in detail in <xref target="sect.tradeoffs" />.
</t>
<t>
Some aspects of Advanced Multipath requirements present
challenges for which multiple solutions may exist. In
<xref target="sect.challenges" /> various challenges and
potential approaches are discussed.
</t>
<t>
A subset of the functionality called for in
<xref target="I-D.ietf-rtgwg-cl-requirement" />
is available through MPLS Link Bundling
<xref target="RFC4201" />.
Link bundling and other existing standards applicable to
Advanced Multipath are covered in
<xref target="sect.existing" />.
</t>
<t>
The most straightforward means of supporting Advanced
Multipath requirements is to extend MPLS protocols and
protocol semantics and in particular to extend link
bundling. Extensions which have already been proposed in
other documents which are applicable to Advanced Multipath
are discussed in <xref target="sect.proposed" />.
</t>
<t>
A goal of most new protocol work within IETF is to reuse
existing protocol encapsulations and mechanisms where they
meet requirements and extend existing mechanisms. This
approach minimizes additional complexity while meeting
requirements and tends to preserve backwards compatibility
to the extent it is practical to do so. These goals are
considered in proposing a framework for further protocol
extensions and mechanisms in
<xref target="sect.needed-extn" />.
</t>
</section>
<section title="Conventions used in this document">
<t>
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
"SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY",
and "OPTIONAL" in this document are to be interpreted as
described in <xref target="RFC2119">RFC 2119</xref>.
</t>
</section>
<section title="Terminology">
<t>
Terminology defined in
<xref target="I-D.ietf-rtgwg-cl-requirement" />
is used in this document. The additional terms defined in
<xref target="I-D.ietf-rtgwg-cl-use-cases" />
are also used.
</t>
<t>
The abbreviation IGP-TE is used as a shorthand indicating
either OSPF-TE <xref target="RFC3630" />
or ISIS-TE <xref target="RFC5305" />.
</t>
</section>
<section title="Document Issues">
<t>
This subsection exists solely for the purpose of focusing
the RTGWG meeting and mailing list discussions on areas
within this document that need attention in order for the
document to achieve the level of quality necessary to
advance the document through the IETF process. This
subsection will be removed before work group last call.
</t>
<t>
The following issues need to be resolved.
<list style="numbers">
<t>
The feasibility of symmetric paths for all flows is
questionable. The only case where this is practical is
where LSP are smaller than component links and where
classic link bundling (not using the all-ones component)
is used. Perhaps the emphasis on this (mis)feature
should be reduced in the requirements document. See
<xref target="sect.path-symmetry" />.
</t>
<t>
There is a tradeoff between supporting delay optimized
routing and avoiding oscillation. This may be
sufficiently covered, but a careful review by others and
comments would be beneficial.
</t>
<t>
Any measurement of jitter (delay variation) that is used
in route decision is likely to cause oscillation.
Trying to optimize a path to reduce jitter may be a
fools errand. How do we say this in the draft or does
the existing text cover it adequately?
</t>
<t>
RTGWG needs to consider the possibility of using
multi-topology IGP extensions in IP and LDP routing where
the topologies reflect differing requirements (see
<xref target="sect.ldp-limitations" />). This idea is
similar to TOS routing, which has been discussed for
decades but has never been deployed. One possible
outcome of discussion would be to declare TOS routing
out of scope in the requirements document.
</t>
<t>
The following referenced drafts have expired:
<list style="letters">
<t><xref target="I-D.ospf-cc-stlv" /></t>
<t><xref target="I-D.villamizar-mpls-multipath-extn" /></t>
</list>
A replacement for <xref target="I-D.ospf-cc-stlv" /> is
expected to be submitted.
<xref target="I-D.villamizar-mpls-multipath-extn" />
is expected to emerge in a simplified form, removing
extensions for which existing workarounds are considered
adequate based on feedback at a prior IETF.
</t>
<t>
Clarification of what we intend to do with Multi-Domain
Advanced Multipath is needed in <xref target="r.multi-domain" />.
</t>
<t>
The following topics in the requirements document are
not addressed. Since they are explicitly mentioned in
the requirements document some mention of how they are
supported is needed in this document.
<list style="letters">
<t>
Migration (incremental deployment) may not be
adequately covered in <xref target="sect.compat" />.
It might also be necessary to say more here on
performance, scalability, and stability as it
related to migration. Comments on this from
co-authors or the WG?
<!-- This might be a topic for r.bundle, r.metric -->
</t>
<t>
We may need a performance section in this document
to specifically address #DR6 (fast convergence), and
#DR7 (fast worst case failure convergence). We do
already have scalability discussion and make a
recommendation for a separate document. At the very
least the performance section would have to say "no
worse than before, except were there was no
alternative to make it very slightly worse" (in a
bit more detail than that). It might also be
helpful to better define the nature of the
performance criteria implied by #DR6 and #DR7.
<!-- need r.stability ? - or embed in other docs? -->
</t>
</list>
</t>
</list>
</t>
<t>
The above list has been in this document for the better part
of a year with very little discussion (or none) of the above
issues on the RTGWG mailing list.
</t>
</section>
</section>
<section title="Advanced Multipath Key Characteristics">
<t>
<xref target="I-D.ietf-rtgwg-cl-requirement" /> defines
external behavior of Advanced Multipath. The overall framework
approach involves extending existing protocols in a backwards
compatible manner and reusing ongoing work elsewhere in IETF
where applicable, defining new protocols or semantics only
where necessary. Given the requirements, and this approach of
extending MPLS, Advanced Multipath key characteristics can be
described in greater detail than given requirements alone.
</t>
<section anchor="sect.flow-id"
title="Flow Identification">
<t>
Traffic mapping to component links is a data plane
operation. Control over how the mapping is done may be
directly dictated or constrained by the control plane or by
the management plane. When unconstrained by the control
plane or management plane, distribution of traffic is
entirely a local matter. Regardless of constraints or lack
or constraints, the traffic distribution is required to keep
packets belonging to individual flows in sequence and meet
QoS criteria specified per LSP by either signaling or
management
<xref target="RFC2475" />
<xref target="RFC3260" />.
</t>
<t>
Key objectives of the traffic distribution are to not
overload any component link, and to be be able to perform
local recovery when a subset of component links fails.
</t>
<t>
The network operator may have other objectives such as
placing a bidirectional flow or LSP on the same component
link in both direction, bounding delay and/or jitter,
Advanced Multipath energy saving, and etc.
These new requirements are described in
<xref target="I-D.ietf-rtgwg-cl-requirement" />.
</t>
<t>
Examples of means to identify a flow may in principle include:
<list style="numbers">
<t>
an LSP identified by an MPLS label,
</t>
<!--
a sub-LSP *xref target="I-D.kompella-mpls-rsvp-ecmp" /*
identified by an MPLS label,
-->
<t>
a pseudowire (PW) <xref target="RFC3985" /> identified
by an MPLS PW label,
</t>
<t>
a flow or group of flows within a pseudowire (PW)
<xref target="RFC6391" /> identified by an MPLS flow label,
</t>
<t>
a flow or flow group in an LSP
<xref target="RFC6790" />
identified by an MPLS entropy label,
</t>
<t>
all traffic between a pair of IP hosts, identified by an
IP source and destination pair,
</t>
<t>
a specific connection between a pair of IP hosts,
identified by an IP source and destination pair, protocol,
and protocol port pair,
</t>
<t>
a layer-2 conversation within a pseudowire (PW), where
the identification is PW payload type specific, such as
Ethernet MAC addresses and VLAN tags within an Ethernet
PW <xref target="RFC4448" />. This is feasible but not
practical (see below).
</t>
</list>
</t>
<t>
Although in principle a layer-2 conversation within a
pseudowire (PW), may be identified by PW payload type
specific information, in practice this is impractical at LSP
midpoints when PW are carried. The PW ingress may provide
equivalent information in a PW flow label <xref
target="RFC6391" />. Therefore, in practice, item #8 above
is covered by <xref target="RFC6391" /> and may be dropped
from the list.
</t>
<section title="Flow Identification Granularity">
<t>
An LSR must at least be capable of identifying flows based
on MPLS labels. Most MPLS LSP do not require that traffic
carried by the LSP are carried in order. MPLS-TP is a
recent exception. If it is assumed that no LSP require
strict packet ordering of the LSP itself (only of flows
within the LSP), then the entire label stack can be used as
flow identification. If some LSP may require strict packet
ordering but those LSP cannot be distinguished from others,
then only the top label can be used as a flow identifier.
If only the top label is used (for example, as specified by
<xref target="RFC4201" /> when the "all-ones" component
described in <xref target="RFC4201" /> is not used), then
there may not be adequate flow granularity to accomplish
well balanced traffic distribution and it will not be
possible to carry LSP that are larger than any individual
component link.
</t>
<t>
The number of flows can be extremely large. This may be the
case when the entire label stack is used and is always the
case when IP addresses are used in provider networks
carrying Internet traffic. Current practice for native IP
load balancing at the time of writing were documented in
<xref target="RFC2991" /> and <xref target="RFC2992" />.
These practices as described, make use of IP addresses.
</t>
<t>
The common practices described in <xref target="RFC2991" />
and <xref target="RFC2992" /> were extended to include the
MPLS label stack and the common practice of looking at IP
addresses within the MPLS payload. These extended practices
require that pseudowires use a PWE3 Control Word and are
described in <xref target="RFC4385" /> and
<xref target="RFC4928" />. Additional detail on current
multipath practices can be found in the appendices of
<xref target="I-D.ietf-rtgwg-cl-use-cases" />.
</t>
<t>
Using only the top label supports too coarse a traffic
balance. Prior to MPLS Entropy Label
<xref target="RFC6790" />
using the full label stack was also too coarse. Using the
full label stack and IP addresses as flow identification
provides a sufficiently fine traffic balance, but is capable
of identifying such a high number of distinct flows, that a
technique of grouping flows, such as hashing on the flow
identification criteria, becomes essential to reduce the
stored state, and is an essential scaling technique. Other
means of grouping flows may be possible.
</t>
</section>
<section title="Flow Identification Summary">
<t>
In summary:
<list style="numbers">
<t>
Load balancing using only the MPLS label stack provides
too coarse a granularity of load balance.
</t>
<t>
Tracking every flow is not scalable due to the extremely
large number of flows in provider networks.
</t>
<t>
Existing techniques, IP source and destination hash in
particular, have proven in over two decades of
experience to be an excellent way of identifying groups
of flows.
</t>
<t>
If a better way to identify groups of flows is
discovered, then that method can be used.
</t>
<t>
IP address hashing is not required, but use of this
technique is strongly encouraged given the technique's
long history of successful deployment.
</t>
</list>
</t>
</section>
<section title="Flow Identification Using Entropy Label">
<t>
MPLS Entropy Label
<xref target="RFC6790" />
provides a means of making use of the entropy from
information that would require deeper packet inspection,
such as inspection of IP addresses, and putting that
entropy in the form of a hashed value into the label
stack. Midpoint LSR that understand the Entropy Label
Indicator can make use of only label stack information but
still obtain a fine load balance granularity.
</t>
</section>
</section>
<section anchor="sect.control-plane"
title="Advanced Multipath in Control Plane">
<t>
An Advanced Multipath is advertised as a single logical interface
between two connected routers, which forms forwarding
adjacency (FA) between the routers. The FA is advertised as
a TE-link in a link state IGP, using either OSPF-TE or
ISIS-TE. The IGP-TE advertised interface parameters for the
Advanced Multipath can be preconfigured by the network operator
or be derived from its component links. Advanced Multipath
advertisement requirements are specified in <xref
target="I-D.ietf-rtgwg-cl-requirement" />.
</t>
<t>
In IGP-TE, an Advanced Multipath is advertised as a single TE
link between two connected routers. This is similar to a
link bundle <xref target="RFC4201" />. Link bundle applies
to a set of homogenous component links. Advanced Multipath
allows homogenous and non-homogenous component links. Due
to the similarity, and for backwards compatibility,
extending link bundling is viewed as both simple and as the
best approach.
</t>
<t>
In order for a route computation engine to calculate a
proper path for a LSP, it is necessary for Advanced Multipath to
advertise the summarized available bandwidth as well as the
maximum bandwidth that can be made available for single flow
(or single LSP where no finer flow identification is
available). If an Advanced Multipath contains some
non-homogeneous component links, the Advanced Multipath also
should advertise the summarized bandwidth and the maximum
bandwidth for single flow per each homogeneous component
link group.
</t>
<t>
Both LDP <xref target="RFC5036" /> and RSVP-TE <xref
target="RFC3209" /> can be used to signal a LSP over an
Advanced Multipath. LDP cannot be extended to support traffic
engineering capabilities <xref target="RFC3468" />.
</t>
<t>
When an LSP is signaled using RSVP-TE, the LSP MUST be
placed on the component link that meets the LSP criteria
indicated in the signaling message.
</t>
<t>
When an LSP is signaled using LDP, the LSP MUST be placed on
the component link that meets the LSP criteria, if such a
component link is available. LDP does not support traffic
engineering capabilities, imposing restrictions on LDP use
of Advanced Multipath. See <xref target="sect.ldp-limitations"
/> for further details.
</t>
<t>
If the Advanced Multipath solution is based on extensions to
IGP-TE and RSVP-TE, then in order to meet requirements
defined in
<xref target="I-D.ietf-rtgwg-cl-requirement" />,
the following derived requirements MUST be met.
<list style="numbers">
<t>
An Advanced Multipath MAY contain non-homogeneous component
links. The route computing engine MAY select one group
of component links for a LSP. The The route computing
engine MUST accommodate service objectives for a given
LSP when selecting a group of component links for a LSP.
</t>
<t>
The routing protocol MUST make a grouping of component
links available in the TE-LSDB, such that within each
group all of the component links have similar
characteristics (the component links are homogeneous
within a group).
</t>
<t>
The route computation used in RSVP-TE MUST be extended
to include only the capacity of groups within an
Advanced Multipath which meet LSP criteria.
</t>
<t>
The signaling protocol MUST be able to indicate either
the criteria, or which groups may be used.
</t>
<t>
An Advanced Multipath MUST place each LSP on a component link
or group which meets or exceeds the LSP criteria.
</t>
</list>
</t>
<t>
Advanced Multipath capacity is aggregated capacity. LSP
capacity MAY be larger than individual component link
capacity. Any aggregated LSP can determine a bounds on the
largest microflow that could be carried and this constraint
can be handled as follows.
</t>
<t>
<list style="numbers">
<t>
If no information is available through signaling,
management plane, or configuration, the largest
microflow is bound by one of the following:
<list style="letters">
<t>
the largest single LSP if most traffic is RSVP-TE
signaled and further aggregated,
</t>
<t>
the largest pseudowire if most traffic is carrying
pseudowire payloads that are aggregated within
RSVP-TE LSP,
</t>
<t>
or the largest interface or component lisk capacity
carrying IP or LDP if a large amount of IP or LDP
traffic is contained within the aggregate.
</t>
</list>
If a very large amount of traffic being aggregated is IP
or LDP, then the largest microflow is bound by the
largest component link on which IP traffic can arrive.
For example, if an LSR is acting as an LER and IP and
LDP traffic is arriving on 10 Gb/s edge interfaces, then
no microflow larger than 10 Gb/s will be present on the
RSVP-TE LSP that aggregate traffic across the core, even
if the core interfaces are 100 Gb/s interfaces.
</t>
<t>
The prior conditions provide a bound on the largest
microflow when no signaling extensions indicate a
bounds. If an LSP is aggregating smaller LSP for which
the largest expected microflow carried by the smaller
LSP is signaled, then the largest microflow expected in
the containing LSP (the aggregate) is the maximum of the
largest expected microflow for any contained LSP. For
example, RSVP-TE LSP may be large but aggregate traffic
for which the source or sink are all 1 Gb/s or smaller
interfaces (such as in mobile applications in which cell
sites backhauls are no larger than 1 Gb/s). If this
information is carried in the LSP originated at the cell
sites, then further aggregates across a core may make
use of this information.
</t>
<t>
The IGP must provide the bounds on the largest microflow
that an Advanced Multipath can accommodate, which is the
maximum capacity on a component link that can be made
available by moving other traffic. This information is
needed by the ingress LER for path determination.
</t>
<t>
A means to signal an LSP whose capacity is larger than
individual component link capacity is needed <xref
target="I-D.ietf-rtgwg-cl-requirement" /> and also
signal the largest microflow expected to be contained in
the LSP. If a bounds on the largest microflow is not
signaled there is no means to determine if an LSP which
is larger than any component link can be subdivided into
flows and therefore should be accepted by admission
control.
</t>
</list>
</t>
<t>
When a bidirectional LSP request is signaled over an
Advanced Multipath, if the request indicates that the LSP must
be placed on the same component link, the routers of the
Advanced Multipath MUST place the LSP traffic in both directions
on a same component link. This is particularly challenging
for aggregated capacity which makes use of the label stack
for traffic distribution. The two requirements are mutually
exclusive for any one LSP. No one LSP may be both larger
than any individual component link and require symmetrical
paths for every flow. Both requirements can be accommodated
by the same Advanced Multipath for different LSP, with any one
LSP requiring no more than one of these two features.
</t>
<t>
Individual component link may fail independently. Upon
component link failure, an Advanced Multipath MUST support a
minimally disruptive local repair, preempting any LSP which
can no longer be supported. Available capacity in other
component links MUST be used to carry impacted traffic.
The available bandwidth after failure MUST be advertised
immediately to avoid looped crankback.
</t>
<t>
When an Advanced Multipath is not able to transport all flows, it
preempts some flows based upon holding priority and informs
the control plane of these preempted flows. To minimize
impact on traffic, the Advanced Multipath MUST support soft
preemption <xref target="RFC5712" />. The network operator
SHOULD enable soft preemption. This action ensures the
remaining traffic is transported properly. FR#10 requires
that the traffic be restored. FR#12 requires that any
change be minimally disruptive. These two requirements are
interpreted to include preemption among the types of changes
that must be minimally disruptive.
</t>
</section>
<section anchor="sect.data-plane"
title="Advanced Multipath in Data Plane">
<t>
The data plane must identify groups of flows. Flow
identification is covered in <xref target="sect.flow-id" />.
Having identified groups of flows the groups must be placed
on individual component links. This step following flow
group identification is called traffic distribution or
traffic placement. The two steps together are known as
traffic balancing or load balancing.
</t>
<t>
Traffic distribution may be determined by or constrained by
control plane or management plane. Traffic distribution may
be changed due to component link status change, subject to
constraints imposed by either the management plane or
control plane. The distribution function is local to the
routers in which an Advanced Multipath belongs to and its
implementation is not specified here.
</t>
<t>
When performing traffic placement, an Advanced Multipath does not
differentiate multicast traffic vs. unicast traffic.
</t>
<t>
In order to maintain scalability, existing data plane
forwarding retains state associated with the top label only.
Using UHP (UHP is the absence of the more common PHP), zero
of more labels may be POPed and packet and byte counters
incremented prior to processing what becomes the top label
after the POP operations are completed. Flow group
identification may be a parallel step in the forwarding
process. Data plane forwarding makes use of the top label
to select an Advanced Multipath, or a group of components within
an Advanced Multipath or for the case where an LSP is pinned (see
<xref target="RFC4201" />), a specific component link. For
those LSP for which the LSP selects only the Advanced Multipath
or a group of components within an Advanced Multipath, the load
balancing makes use of the set of component links selected
based on the top label, and makes use of the flow group
identification to select among that group.
</t>
<t>
The simplest traffic placement techniques uses a modulo
operation after computing a hash. This techniques has
significant disadvantages. The most common traffic
placement techniques uses the a flow group identification as
an index into a table. The table provides an indirection.
The number of bits of hash is constrained to keep table size
small. While this is not the best technique, it is the most
common. Better techniques exist but they are outside the
scope of this document and some are considered proprietary.
</t>
<t>
Requirements to limit frequency of load balancing can be
adhered to by keeping track of when a flow group was last
moved and imposing a minimum period before that flow group
can be moved again. This is straightforward for a table
approach. For other approaches it may be less
straightforward.
</t>
</section>
</section>
<section anchor="sect.tradeoffs"
title="Architecture Tradeoffs">
<t>
Scalability and stability are critical considerations in
protocol design where protocols may be used in a large network
such as today's service provider networks. Advanced Multipath is
applicable to networks which are large enough to require that
traffic be split over multiple paths. Scalability is a major
consideration for networks that reach a capacity large enough
to require Advanced Multipath.
</t>
<t>
Some of the requirements of Advanced Multipath could potentially
have a negative impact on scalability. This section is about
architectural tradeoffs, many motivated by the need to
maintain scalability and stability, a need which is reflected
in <xref target="I-D.ietf-rtgwg-cl-requirement" />,
specifically in DR#6 and DR#7.
</t>
<section anchor="sect.scalability"
title="Scalability Motivations">
<t>
In the interest of scalability, information is aggregated in
situations where information about a large amount of network
capacity or a large amount of network demand provides is
adequate to meet requirements. Routing information is
aggregated to reduce the amount of information exchange
related to routing and to simplify route computation (see
<xref target="sect.routing-tradeoff" />).
</t>
<t>
In an MPLS network large routing changes can occur when a
single fault occurs. For example, a single fault may impact
a very large number of LSP traversing a given link. As new
LSP are signaled to avoid the fault, resources are consumed
elsewhere, and routing protocol announcements must flood the
resource changes. If protection is in place, there is less
urgency to converging quickly. If multiple faults occur
that are not covered by shared risk groups (SRG), then some
protection may fail, adding urgency to converging quickly
even where protection is deployed.
</t>
<t>
Reducing the amount of information allows the exchange of
information during a large routing change to be accomplished
more quickly and simplifies route computation. Simplifying
route computation improves convergence time after very
significant network faults which cannot be handled by
preprovisioned or precomputed protection mechanisms.
Aggregating smaller LSP into larger LSP is a means to reduce
path computation load and reduce RSVP-TE signaling (see
<xref target="sect.signaling-tradeoff" />).
</t>
<t>
Neglecting scaling issues can result in performance issues,
such as slow convergence. Neglecting scaling in some cases
can result in networks which perform so poorly as to become
unstable.
</t>
</section>
<section anchor="sect.routing-tradeoff"
title="Reducing Routing Information and Exchange">
<t>
Link bundling provides a means of aggregating control plane
information. Even where the all-ones component link
supported by link bundling is not used, the amount of
control information is reduced by the number of component
links in a bundle.
</t>
<t>
Fully deaggregating link bundle information would negate
this benefit. If there is a need to deaggregate, such as to
distinguish between groups of links within specified ranges
of delay, then no more deaggregation than is necessary
should be done.
</t>
<t>
For example, in supporting the requirement for heterogeneous
component links, it makes little sense to fully deaggregate
link bundles when adding support for groups of component
links with common attributes within a link bundle can
maintain most of the benefit of aggregation while adequately
supporting the requirement to support heterogeneous
component links.
</t>
<t>
Routing information exchange is also reduced by making
sensible choices regarding the amount of change to link
parameters that require link readvertisement. For example,
if delay measurements include queuing delay, then a much
more coarse granularity of delay measurement would be called
for than if the delay does not include queuing and is
dominated by geographic delay (speed of light delay).
</t>
</section>
<section anchor="sect.signaling-tradeoff"
title="Reducing Signaling Load">
<t>
Aggregating traffic into very large hierarchical LSP in the
core very substantially reduces the number of LSP that need
to be signaled and the number of path computations any given
LSR will be required to perform when a network fault occurs.
</t>
<t>
In the extreme, applying MPLS to a very large network
without hierarchy could exceed the 20 bit label space. For
example, in a network with 4,000 nodes, with 2,000 on either
side of a cutset, would have 4,000,000 LSP crossing the
cutset. Even in a degree four cutset, an uneven
distribution of LSP across the cutset, or the loss of one
link would result in a need to exceed the size of the label
space. Among provider networks, 4,000 access nodes is not
at all large. Hierarchy is an absolute requirement if all
access nodes were interconnected in such a network.
</t>
<t>
In less extreme cases, having each node terminate hundreds
of LSP to achieve a full mesh creates a very large
computational load. Computational complexity is a function
of the number of nodes (N) and links (L) in a topology, and
the number of LSP that need to be set up. In the common
case where L is proportional to N (relatively constant node
degree with growth), the time complexity of one CSPF
computation is order(N log N). If each node must perform
order(N) computations when a fault occurs, then the
computational load increases as order(N^2 log N) as the
number of nodes increases (where "^" is the power of
operator and "N^2" is read "N-squared"). In practice at the
time of writing, this imposes a limit of a few hundred nodes
in a full mesh of MPLS LSP before the computational load is
sufficient to result in unacceptable convergence times.
</t>
<t>
Two solutions are applied to reduce the amount of RSVP-TE
signaling. Both involve subdividing the MPLS domain into a
core and a set of regions.
</t>
<section title="Reducing Signaling Load using LDP MPTP">
<t>
LDP can be used for edge-to-edge LSP, using RSVP-TE to
carry the LDP intra-core traffic and also optionally also
using RSVP-TE to carry the LDP intra-region traffic within
each region. LDP does not support traffic engineering,
but does support multipoint-to-point (MPTP) LSP, which
require less signaling than edge-to-edge RSVP-TE
point-to-point (PTP) LSP. A drawback of this approach is
the inability to use RSVP-TE protection (FRR or GMPLS
protection) against failure of the border LSR sitting at a
core/region boundary.
</t>
</section>
<section title="Reducing Signaling Load using Hierarchy">
<t>
When the number of nodes grows too large, the amount of
RSVP-TE signaling can be reduced using the MPLS PSC
hierarchy <xref target="RFC4206" />. A core within the
hierarchy can divide the topology into M regions of on
average N/M nodes. Within a region the computational load
is reduced by more than M^2. Within the core, the
computational load generally becomes quite small since M
is usually a fairly small number (a few tens of regions)
and each region is generally attached to the core in
typically only two or three places on average.
</t>
<t>
Using hierarchy improves scaling but has two consequences.
First, hierarchy effectively forces the use of platform
label space. When a containing LSP is rerouted, the
labels assigned to the contained LSP cannot be changed but
may arrive on a different interface. Second, hierarchy
results in much larger LSP. These LSP today are larger
than any single component link and therefore force the use
of the all-ones component in link bundles.
</t>
</section>
<section title="Using Both LDP MPTP and RSVP-TE Hierarchy">
<t>
It is also possible to use both LDP and RSVP-TE hierarchy.
MPLS networks with a very large number of nodes may
benefit from the use of both LDP and RSVP-TE hierarchy.
The two techniques are certainly not mutually exclusive.
</t>
</section>
</section>
<section anchor="sect.dp-tradeoff"
title="Reducing Forwarding State">
<t>
Both LDP and MPLS hierarchy have the benefit of reducing the
amount of forwarding state. Using the example from <xref
target="sect.signaling-tradeoff" />, and using MPLS
hierarchy, the worst case generally occurs at borders with
the core.
</t>
<t>
For example, consider a network with approximately 1,000
nodes divided into 10 regions. At the edges, each node
requires 1,000 LSP to other edge nodes. The edge nodes also
require 100 intra-region LSP. Within the core, if the core
has only 3 attachments to each region the core LSR have less
than 100 intra-core LSP. At the border cutset between the
core and a given region, in this example there are 100 edge
nodes with inter-region LSP crossing that cutset, destined
to 900 other edge nodes. That yields forwarding state for
on the order of 90,000 LSP at the border cutset. These same
routers need only reroute well under 200 LSP when a multiple
fault occurs, as long as only links are affected and a
border LSR does not go down.
</t>
<t>
Interior to the core, the forwarding state is greatly
reduced. If inter-region LSP have different
characteristics, it makes sense to make use of aggregates
with different characteristics. Rather than exchange
information about every inter-region LSP within the
intra-core LSP it makes more sense to use multiple
intra-core LSP between pairs of core nodes, each aggregating
sets of inter-region LSP with common characteristics or
common requirements.
</t>
</section>
<section anchor="sect.oscillation"
title="Avoiding Route Oscillation">
<t>
Networks can become unstable when a feedback loop exists
such that moving traffic to a link causes a metric such as
delay to increase, which then causes traffic to move
elsewhere. For example, the original ARPANET routing used a
delay based cost metric and proved prone to route
oscillations <xref target="DBP" />.
</t>
<t>
Delay may be used as a constraint in routing for high
priority traffic, when this high priority traffic makes a
minor contribution to total load, such that the movement of
the high priority traffic has a small impact on the delay
experienced by other high priority traffic. The safest way
to measure delay is to make measurements based on traffic
which is prioritized such that it is queued ahead of the
lower priority traffic which will be affected if high
priority traffic is moved. The amount of high priority
traffic must be constrained to consume a fraction of link
capacities with the remaining capacity available to lower
priority traffic.
</t>
<t>
Any measurement of jitter (delay variation) that is used in
route decision is likely to cause oscillation. Jitter that
is caused by queuing effects and cannot be measured using a
very high priority measurement traffic flow.
</t>
<t>
It may be possible to find links with constrained queuing
delay or jitter using a theoretical maximum or a probability
based bound on queuing delay or jitter at a given priority
based on the types and amounts of traffic accepted and
combining that theoretical limit with a measured delay at
very high priority. Using delay or jitter as path metrics
without creating oscillations is challenging.
</t>
<t>
Instability can occur due to poor performance and
interaction with protocol timers. In this way a
computational scaling problem can become a stability problem
when a network becomes sufficiently large.
</t>
</section>
</section>
<section anchor="sect.challenges"
title="New Challenges">
<t>
New technical challenges are posed by
<xref target="I-D.ietf-rtgwg-cl-requirement" />
in both the control plane and data plane.
</t>
<t>
Among the more difficult challenges are the following.
<list style="numbers">
<t>
The requirements related to delay or jitter conflict with
requirements for scalability and stability (see
<xref target="sect.delay-cspf" />),
</t>
<t>
The combination of ingress control over LSP placement and
retaining an ability to move traffic as demands dictate
can pose challenges and such requirements can even be
conflicting (see <xref target="sect.local-control" />),
</t>
<t>
Path symmetry requires extensions and is particularly
challenging for very large LSP (see
<xref target="sect.path-symmetry" />),
</t>
<t>
Accommodating a very wide range of requirements among
contained LSP can lead to inefficiency if the most
stringent requirements are reflected in aggregates, or
reduce scalability if a large number of aggregates are
used to provide a too fine a reflection of the
requirements in the contained LSP (see
<xref target="sect.contained-lsp" />),
</t>
<t>
Backwards compatibility is somewhat limited due to the
need to accommodate legacy multipath interfaces which
provide too little information regarding their configured
default behavior, and legacy LSP which provide too little
information regarding their LSP requirements (see
<xref target="sect.compat" />),
</t>
<t>
Data plane challenges include those of accommodating very
large LSP, large microflows, traffic ordering constraints
imposed by a subset of LSP, and accounting for IP and LDP
traffic (see <xref target="sect.dp-challenge" />).
</t>
</list>
</t>
<section anchor="sect.cp-challenge"
title="Control Plane Challenges">
<t>
Some of the control plane requirements are particularly
challenging. Handling large flows which aggregate smaller
flows must be accomplished with minimal impact on
scalability. Potentially conflicting are requirements for
jitter and requirements for stability. Potentially
conflicting are the requirements for ingress control of a
large number of parameters, and the requirements for local
control needed to achieve traffic balance across an Advanced
Multipath. These challenges and potential solutions are
discussed in the following sections.
</t>
<section anchor="sect.delay-cspf"
title="Delay and Jitter Sensitive Routing">
<t>
Delay and jitter sensitive routing are called for in
<xref target="I-D.ietf-rtgwg-cl-requirement" />
in requirements FR#2, FR#7, FR#8, FR#9, FR#15, FR#16, FR#17,
FR#18. Requirement FR#17 is particularly problematic,
calling for constraints on jitter.
</t>
<t>
A tradeoff exists between scaling benefits of aggregating
information, and potential benefits of using a finer
granularity in delay reporting. To maintain the scaling
benefit, measured link delay for any given Advanced Multipath
SHOULD be aggregated into a small number of delay ranges.
IGP-TE extensions MUST be provided which advertise the
available capacities for each of the selected ranges.
</t>
<t>
For path selection of delay sensitive LSP, the ingress
SHOULD bias link metrics based on available capacity and
select a low cost path which meets LSP total path delay
criteria. To communicate the requirements of an LSP, the
ERO MUST be extended to indicate the per link constraints.
To communicate the type of resource used, the RRO SHOULD
be extended to carry an identification of the group that
is used to carry the LSP at each link bundle hop.
</t>
</section>
<section anchor="sect.local-control"
title="Local Control of Traffic Distribution">
<t>
Many requirements in
<xref target="I-D.ietf-rtgwg-cl-requirement" />
suggest that a node immediately adjacent to a component
link should have a high degree of control over how traffic
is distributed, as long as network performance objectives
are met. Particularly relevant are FR#18 and FR#19.
</t>
<t>
The requirements to allow local control are potentially in
conflict with requirement FR#21 which gives full control
of component link select to the LSP ingress. While
supporting this capability is mandatory, use of this
feature is optional per LSP.
</t>
<t>
A given network deployment will have to consider this set
of conflicting requirements and make appropriate use of
local control of traffic placement and ingress control of
traffic placement to best meet network requirements.
</t>
</section>
<section anchor="sect.path-symmetry"
title="Path Symmetry Requirements">
<t>
Requirement FR#21 in
<xref target="I-D.ietf-rtgwg-cl-requirement" />
includes a provision to bind both directions of a
bidirectional LSP to the same component. This is easily
achieved if the LSP is directly signaled across an
Advanced Multipath. This is not as easily achieved if a set
of LSP with this requirement are signaled over a large
hierarchical LSP which is in turn carried over an Advanced
Multipath. The basis for load distribution in such as case is
the label stack. The labels in either direction are
completely independent.
</t>
<t>
This could be accommodated if the ingress, egress, and all
midpoints of the hierarchical LSP make use of an entropy
label in the distribution, and the ingress use a fixed
value per contained LSP in the entropy label. A solution
for this problem may add complexity with very little
benefit. There is little or no true benefit of using
symmetrical paths rather than component links of identical
characteristics.
</t>
<t>
Traffic symmetry and large LSP capacity are a second pair
of conflicting requirements. Any given LSP can meet one
of these two requirements but not both. A given network
deployment will have to make appropriate use of each of
these features to best meet network requirements.
</t>
</section>
<section anchor="sect.contained-lsp"
title="Requirements for Contained LSP">
<t>
<xref target="I-D.ietf-rtgwg-cl-requirement" />
calls for new LSP constraints. These constraints include
frequency of load balancing rearrangement, delay and
jitter, packet ordering constraints, and path symmetry.
</t>
<t>
When LSP are contained within hierarchical LSP, there is
no signaling available at midpoint LSR which identifies
the contained LSP let alone providing the set of
requirements unique to each contained LSP. Defining
extensions to provide this information would severely
impact scalability and defeat the purpose of aggregating
control information and forwarding information into
hierarchical LSP. For the same scalability reasons, not
aggregating at all is not a viable option for large
networks where scalability and stability problems may
occur as a result.
</t>
<t>
As pointed out in <xref target="sect.path-symmetry" />, the
benefits of supporting symmetric paths among LSP contained
within hierarchical LSP may not be sufficient to justify
the complexity of supporting this capability.
</t>
<t>
A scalable solution which accommodates multiple sets of
LSP between given pairs of LSR is to provide multiple
hierarchical LSP for each given pair of LSR, each
hierarchical LSP aggregating LSP with common requirements
and a common pair of endpoints. This is a network design
technique available to the network operator rather than a
protocol extension. This technique can accommodate
multiple sets of delay and jitter parameters, multiple
sets of frequency of load balancing parameters, multiple
sets of packet ordering constraints, etc.
</t>
</section>
<section anchor="sect.compat"
title="Retaining Backwards Compatibility">
<t>
Backwards compatibility and support for incremental
deployment requires considering the impact of legacy LSR
in the role of LSP ingress, and considering the impact of
legacy LSR advertising ordinary links, advertising
Ethernet LAG as ordinary links, and advertising link
bundles.
</t>
<t>
Legacy LSR in the role of LSP ingress cannot signal
requirements which are not supported by their control
plane software. The additional capabilities supported by
other LSR has no impact on these LSR. These LSR however,
being unaware of extensions, may try to make use of scarce
resources which support specific requirements such as low
delay. To a limited extent it may be possible for a
network operator to avoid this issue using existing
mechanisms such as link administrative attributes and
attribute affinities <xref target="RFC3209" />.
</t>
<t>
Legacy LSR advertising ordinary links will not advertise
attributes needed by some LSP. For example, there is no
way to determine the delay or jitter characteristics of
such a link. Legacy LSR advertising Ethernet LAG pose
additional problems. There is no way to determine that
packet ordering constraints would be violated for LSP with
strict packet ordering constraints, or that frequency of
load balancing rearrangement constraints might be
violated.
</t>
<t>
Legacy LSR advertising link bundles have no way to
advertise the configured default behavior of the link
bundle. Some link bundles may be configured to place each
LSP on a single component link and therefore may not be
able to accommodate an LSP which requires bandwidth in
excess of the size of a component link. Some link bundles
may be configured to spread all LSP over the all-ones
component. For LSR using the all-ones component link,
there is no documented procedure for correctly setting the
"Maximum LSP Bandwidth". There is currently no way to
indicate the largest microflow that could be supported by
a link bundle using the all-ones component link.
</t>
<t>
Having received the RRO, it is possible for an ingress to
look for the all-ones component to identify such link
bundles after having signaled at least one LSP. Whether
any LSR collects this information on legacy LSR and makes
use of it to set defaults, is an implementation choice.
</t>
</section>
</section>
<section anchor="sect.dp-challenge"
title="Data Plane Challenges">
<t>
Flow identification is briefly discussed in
<xref target="sect.flow-id" />.
Traffic distribution is briefly discussed in
<xref target="sect.data-plane" />.
This section discusses issues specific to particular
requirements specified in
<xref target="I-D.ietf-rtgwg-cl-requirement" />.
</t>
<section anchor="sect.large-lsp"
title="Very Large LSP">
<t>
Very large LSP may exceed the capacity of any single
component of an Advanced Multipath. In some cases contained
LSP may exceed the capacity of any single component.
These LSP may make use of the equivalent of the all-ones
component of a link bundle, or may use a subset of
components which meet the LSP requirements.
</t>
<t>
Very large LSP can be accommodated as long as they can be
subdivided (see <xref target="sect.large-flows" />). A
very large LSP cannot have a requirement for symmetric
paths unless complex protocol extensions are proposed (see
<xref target="sect.control-plane" /> and <xref
target="sect.path-symmetry" />).
</t>
</section>
<section anchor="sect.large-flows"
title="Very Large Microflows">
<t>
Within a very large LSP there may be very large
microflows. A very large microflow is one which cannot be
further subdivided and contributes a very large amount of
capacity. Flows which cannot be subdivided must be no
larger that the capacity of any single component link.
</t>
<t>
Current signaling provides no way to specify the largest
microflow that a can be supported on a given link bundle
in routing advertisements. Extensions which address this
are discussed in <xref target="sect.multipath-extn" />.
Absent extensions of this type, traffic containing
microflows that are too large for a given Advanced Multipath
may be present. There is no data plane solution for this
problem that would not require reordering traffic at the
Advanced Multipath egress.
</t>
<t>
Some techniques are susceptible to statistical collisions
where an algorithm to distribute traffic is unable to
disambiguate traffic among two or more very large
microflow where their sum is in excess of the capacity of
any single component. Hash based algorithms which use too
small a hash space are particularly susceptible and require
a change in hash seed in the event that this were to
occur. A change in hash seed is highly disruptive,
causing traffic reordering among all traffic flows over
which the hash function is applied.
</t>
</section>
<section anchor="sect.ordering"
title="Traffic Ordering Constraints">
<t>
Some LSP have strict traffic ordering constraints. Most
notable among these are MPLS-TP LSP. In the absence of
aggregation into hierarchical LSP, those LSP with strict
traffic ordering constraints can be placed on individual
component links if there is a means of identifying which
LSP have such a constraint. If LSP with strict traffic
ordering constraints are aggregated in hierarchical LSP,
the hierarchical LSP capacity may exceed the capacity of
any single component link. In such a case the load
balancing may be constrained through the use of an entropy
label <xref target="RFC6790" />.
This and related issues are discussed further in
<xref target="sect.multipath-extn" />.
</t>
</section>
<section anchor="sect.ip+ldp"
title="Accounting for IP and LDP Traffic">
<t>
Networks which carry RSVP-TE signaled MPLS traffic
generally carry low volumes of native IP traffic, often
only carrying control traffic as native IP. There is no
architectural guarantee of this, it is just how network
operators have made use of the protocols.
</t>
<t>
<xref target="I-D.ietf-rtgwg-cl-requirement" /> requires
that native IP and native LDP be accommodated (DR#2 and
DR#3). In some networks, a subset of services may be
carried as native IP or carried as native LDP. Today this
may be accommodated by the network operator estimating the
contribution of IP and LDP and configuring a lower set of
available bandwidth figures on the RSVP-TE advertisements.
</t>
<t>
The only improvement that Advanced Multipath can offer is that
of measuring the IP and LDP traffic levels and
automatically reducing the available bandwidth figures on
the RSVP-TE advertisements. The measurements would have
to be filtered. This is similar to a feature in existing
LSR, commonly known as "autobandwidth" with a key
difference. In the "autobandwidth" feature, the bandwidth
request of an RSVP-TE signaled LSP is adjusted in response
to traffic measurements. In this case the IP or LDP
traffic measurements are used to reduce the link bandwidth
directly, without first encapsulating in an RSVP-TE LSP.
</t>
<t>
This may be a subtle and perhaps even a meaningless
distinction if Advanced Multipath is used to form a Sub-Path
Maintenance Element (SPME). A SPME is in practice
essentially an unsignaled single hop LSP with PHP enabled
<xref target="RFC5921" />. An Advanced Multipath SPME looks
very much like classic multipath, where there is no
signaling, only management plane configuration creating
the multipath entity (of which Ethernet Link Aggregation
is a subset).
</t>
</section>
<section anchor="sect.ldp-limitations"
title="IP and LDP Limitations">
<t>
IP does not offer traffic engineering. LDP cannot be
extended to offer traffic engineering <xref
target="RFC3468" />. Therefore there is no traffic
engineered fallback to an alternate path for IP and LDP
traffic if resources are not adequate for the IP and/or
LDP traffic alone on a given link in the primary path.
The only option for IP and LDP would be to declare the
link down. Declaring a link down due to resource
exhaustion would reduce traffic to zero and eliminate the
resource exhaustion. This would cause oscillations and is
therefore not a viable solution.
</t>
<t>
Congestion caused by IP or LDP traffic loads is a
pathologic case that can occur if IP and/or LDP are
carried natively and there is a high volume of IP or LDP
traffic. This situation can be avoided by carrying IP and
LDP within RSVP-TE LSP.
</t>
<t>
It is also not possible to route LDP traffic differently
for different FEC. LDP traffic engineering is
specifically disallowed by <xref target="RFC3468" />. It
may be possible to support multi-topology IGP extensions
to accommodate more than one set of criteria. If so, the
additional IGP could be bound to the forwarding criteria,
and the LDP FEC bound to a specific IGP instance,
inheriting the forwarding criteria. Alternately, one IGP
instance can be used and the LDP SPF can make use of the
constraints, such as delay and jitter, for a given LDP
FEC.
</t>
</section>
</section>
</section>
<section anchor="sect.existing"
title="Existing Mechanisms">
<t>
In MPLS the one mechanism which supports explicit signaling
of multiple parallel links is Link Bundling
<xref target="RFC4201" />.
The set of techniques known as "classis multipath" support no
explicit signaling, except in two cases. In Ethernet Link
Aggregation the Link Aggregation Control Protocol (LACP)
coordinates the addition or removal of members from an
Ethernet Link Aggregation Group (LAG). The use of the
"all-ones" component of a link bundle indicates use of classis
multipath, however the ability to determine if a link bundle
makes use of classis multipath is not yet supported.
</t>
<section anchor="sect.link-bundle"
title="Link Bundling">
<t>
Link bundling supports advertisement of a set of homogenous
links as a single route advertisement. Link bundling
supports placement of an LSP on any single component link,
or supports placement of an LSP on the all-ones component
link. Not all link bundling implementations support the
all-ones component link. There is no way for an ingress LSR
to tell which potential midpoint LSR support this feature
and use it by default and which do not. Based on <xref
target="RFC4201" /> it is unclear how to advertise a link
bundle for which the all-ones component link is available
and used by default. Common practice is to violate the
specification and set the Maximum LSP Bandwidth to the
Available Bandwidth. There is no means to determine the
largest microflow that could be supported by a link bundle
that is using the all-ones component link.
</t>
<t>
<xref target="RFC6107" /> extends the procedures for
hierarchical LSP but also extends link bundles. An LSP can
be explicitly signaled to indicate that it is an LSP to be
used as a component of a link bundle. Prior to that the
common practice was to simply not advertise the component
link LSP into the IGP, since only the ingress and egress of
the link bundle needed to be aware of their existence, which
they would be aware of due to the RSVP-TE signaling used in
setting up the component LSP.
</t>
<t>
While link bundling can be the basis for Advanced Multipath, a
significant number of small extension needs to be added.
<list style="numbers">
<t>
To support link bundles of heterogeneous links, a means
of advertising the capacity available within a group of
homogeneous links needs to be provided.
</t>
<t>
Attributes need to be defined to support the following
parameters for the link bundle or for a group of
homogeneous links.
<list style="letters">
<t>delay range</t>
<t>jitter (delay variation) range</t>
<t>group metric</t>
<t>all-ones component capable</t>
<t>capable of dynamically balancing load</t>
<t>largest supportable microflow</t>
<t>support for entropy label</t>
</list>
</t>
<t>
For each of the prior extended attributes, the
constraint based routing path selection needs to be
extended to reflect new constraints based on the
extended attributes.
</t>
<t>
For each of the prior extended attributes, LSP admission
control needs to be extended to reflect new constraints
based on the extended attributes.
</t>
<t>
Dynamic load balance must be provided for flows within a
given set of links with common attributes such that
Performance Objectives are not violated including
frequency of load balance adjustment for any given flow.
</t>
</list>
</t>
</section>
<section anchor="sect.classic-mp"
title="Classic Multipath">
<t>
Classic multipath is described in
<xref target="I-D.ietf-rtgwg-cl-use-cases" />.
</t>
<t>
Classic multipath refers to the most common current practice
in implementation and deployment of multipath. The most
common current practice makes use of a hash on the MPLS
label stack and if IPv4 or IPv6 are indicated under the
label stack, makes use of the IP source and destination
addresses
<xref target="RFC4385" /> <xref target="RFC4928" />.
</t>
<t>
Classic multipath provides a highly scalable means of load
balancing. Dynamic multipath has proven value in assuring
an even loading on component link and an ability to adapt to
change in offered load that occurs over periods of hundreds
of milliseconds or more. Classic multipath scalability is
due to the ability to effectively work with an extremely
large number of flows (IP host pairs) using relatively
little resources (a data structure accessed using a hash
result as a key or using ranges of hash results).
</t>
<t>
Classic multipath meets a small subset of Advanced Multipath
requirements. Due to scalability of the approach, classic
multipath seems to be an excellent candidate for extension
to meet the full set of Advanced Multipath forwarding
requirements.
</t>
<t>
Additional detail can be found in <xref
target="I-D.ietf-rtgwg-cl-use-cases" />.
</t>
</section>
</section>
<section anchor="sect.proposed"
title="Mechanisms Proposed in Other Documents">
<t>
A number of documents which at the time of writing are works
in progress address parts of the requirements of Advanced
Multipath, or assist in making some of the goals achievable.
</t>
<section anchor="sect.loss-delay"
title="Loss and Delay Measurement">
<t>
Procedures for measuring loss and delay are provided in
<xref target="RFC6374" />. These are OAM based
measurements. This work could be the basis of delay
measurements and delay variation measurement used for
metrics called for in <xref
target="I-D.ietf-rtgwg-cl-requirement" />.
</t>
<t>
Currently there are three documents that address delay and
delay variation metrics.
</t>
<t>
<list hangIndent="4" style="hanging">
<t hangText="draft-ietf-ospf-te-metric-extensions">
<vspace blankLines="0" />
<xref target="I-D.ietf-ospf-te-metric-extensions" />
provides a set of OSPF-TE extension to support delay,
jitter, and loss. Stability is not adequately
addressed and some minor issues remain.
</t>
<t hangText="I-D.previdi-isis-te-metric-extensions">
<vspace blankLines="0" />
<xref target="I-D.previdi-isis-te-metric-extensions" />
provides the set of extensions for ISIS that
<xref target="I-D.ietf-ospf-te-metric-extensions" />
provides for OSPF. This draft mirrors
<xref target="I-D.ietf-ospf-te-metric-extensions" />
sometimes lagging for a brief period when the OSPF
version is updated.
</t>
<t hangText="I-D.atlas-mpls-te-express-path">
<vspace blankLines="0" />
<xref target="I-D.atlas-mpls-te-express-path" />
provides information on the use of OSPF and ISIS
extensions defined in
<xref target="I-D.ietf-ospf-te-metric-extensions" />
and
<xref target="I-D.previdi-isis-te-metric-extensions" />
and a modified CSPF path selection to meet LSP
performance criteria such as minimal delay paths or
bounded delay paths.
</t>
</list>
</t>
<t>
Delay variance, loss, residual bandwidth, and available
bandwidth extensions are particular prone to network
instability. The question as to whether queuing delay and
delay variation should be considered, and if so for which
diffserv Per-Hop Service Class (PSC) is not adequately
addressed in the current versions of these drafts. These
drafts are actively being discussed and updated and
remaining issues are expected to be resolved.
</t>
</section>
<section anchor="sect.bundle-extn"
title="Link Bundle Extensions">
<t>
A set of extension are needed to indicate a group of
component links in the ERO or RRO, where the group is given
an interface identification like the bundle itself. The
extensions could also be further extended to support
specification of the all-ones component link in the ERO or
RRO.
</t>
<t>
<xref target="I-D.ospf-cc-stlv" /> provides a baseline draft
for extending link bundling to advertise components. A new
component TLV (C-TLV) is proposed, which must reference an
Advanced Multipath Link TLV. <xref target="I-D.ospf-cc-stlv" />
is intended for the OSPF WG and submitted for the
"Experimental" track. The 00 version expired in February
2012. A replacement is expected that will be submitted for
consideration on the standards track.
</t>
</section>
<section anchor="sect.entropy"
title="Pseudowire Flow and MPLS Entropy Labels">
<t>
Two documents provide a means to add entropy for the purpose
of improving load balance. MPLS encapsulation can bury
information that is needed to identify microflows. These
two documents allow a pseudowire ingress and LSP ingress
respectively to add a label solely for the purpose of
providing a finer granularity of microflow groups.
</t>
<t>
<xref target="RFC6391" />
allows pseudowires which carry a large volume of traffic,
where microflows can be identified to be load balanced
across multiple members of an Ethernet LAG or an MPLS link
bundle. This is accomplished by adding a flow label below
the pseudowire label in the MPLS label stack. For this to
be effective the link bundle load balance must make use of
the label stack up to and including this flow label.
</t>
<t>
<xref target="RFC6790" />
provides a means for a LER to put an additional label known
as an entropy label on the MPLS label stack. Only the LER
can add the entropy label. The LER of a PSC LSP would have
to add a entropy label for contained LSPs for which it is a
midpoint LSR.
</t>
<t>
Core LSR acting as LER for aggregated LSP can add entropy
labels based on deep packet inspection and place an entropy
label indicator (ELI) and entropy label (EL) just below the
label being acted on. This would be helpful in situations
where the label stack depth to which load distribution can
operate is limited by implementation or is limited for other
reasons such as carrying both MPLS-TP and MPLS with entropy
labels within the same hierarchical LSP.
</t>
</section>
<section anchor="sect.multipath-extn"
title="Multipath Extensions">
<t>
The multipath extensions drafts address the issue of
accommodating LSP which have strict packet ordering
constraints in a network containing multipath. MPLS-TP has
become the one important instance of LSP with strict packet
ordering constraints and has driven this work.
</t>
<t>
<xref target="I-D.ietf-mpls-multipath-use" />
proposed to use MPLS Entropy Label
<xref target="RFC6790" />
to allow MPLS-TP to be carried within MPLS LSP that make use
of multipath. Limitations of this approach in the absence
of protocol extensions is discussed.
</t>
<t>
<xref target="I-D.villamizar-mpls-multipath-extn" />
provides protocol extensions needed to overcome the
limitations in the absence of protocol extensions is
discussed in
<xref target="I-D.ietf-mpls-multipath-use" />.
</t>
</section>
</section>
<section anchor="sect.needed-extn"
title="Required Protocol Extensions and Mechanisms">
<t>
Prior sections have reviewed key characteristics, architecture
tradeoffs, new challenges, existing mechanisms, and relevant
mechanisms proposed in existing new documents.
</t>
<t>
This section first summarizes and groups requirements
specified in <xref target="I-D.ietf-rtgwg-cl-requirement" />
(see <xref target="sect.reqm-review" />).
A set of documents coverage groupings are proposed with
existing works-in-progress noted where applicable (see
<xref target="sect.doclist" />).
The set of extensions are then grouped by protocol affected as
a convenience to implementors (see (see
<xref target="sect.by-protocol" />).
</t>
<section anchor="sect.reqm-review"
title="Brief Review of Requirements">
<t>
The following list provides a categorization of requirements
specified in
<xref target="I-D.ietf-rtgwg-cl-requirement" /> along with a
short phrase indication what topic the requirement covers.
</t>
<t>
<list hangIndent="4" style="hanging">
<!-- #1 -->
<t hangText="routing information aggregation">
<vspace blankLines="0" />
FR#1 (routing summarization), FR#20 (Advanced Multipath may
be a component of another Advanced Multipath)
</t>
<!-- #2 -->
<t hangText="restoration speed">
<vspace blankLines="0" />
FR#2 (restoration speed meeting performance objectives),
FR#12 (minimally disruptive load rebalance), DR#6 (fast
convergence), DR#7 (fast worst case failure convergence)
</t>
<!-- #3 -->
<t hangText="load distribution, stability, minimal disruption">
<vspace blankLines="0" />
FR#3 (automatic load distribution), FR#5 (must not
oscillate), FR#11 (dynamic placement of flows), FR#12
(minimally disruptive load rebalance), FR#13 (bounded
rearrangement frequency), FR#18 (flow placement must
satisfy performance objectives), FR#19 (flow
identification finer than per top level LSP), MR#6
(operator initiated flow rebalance)
</t>
<!-- #4 -->
<t hangText="backward compatibility and migration">
<vspace blankLines="0" />
FR#4 (smooth incremental deployment), FR#6 (management
and diagnostics must continue to function), DR#1
(extend existing protocols), DR#2 (extend LDP, no LDP
TE)
</t>
<!-- #5 -->
<t hangText="delay and delay variation">
<vspace blankLines="0" />
FR#7 (expose lower layer measured delay), FR#8
(precision of latency reporting), FR#9 (limit latency on
per LSP basis), FR#15 (minimum delay path), FR#16
(bounded delay path), FR#17 (bounded jitter path)
</t>
<!-- #6 -->
<t hangText="admission control, preemption, traffic engineering">
<vspace blankLines="0" />
FR#10 (admission control, preemption), FR#14 (packet
ordering), FR#21 (ingress specification of path), FR#22
(path symmetry), DR#3 (IP and LDP traffic), MR#3
(management specification of path)
</t>
<!-- #7 -->
<t hangText="single vs multiple domain">
<vspace blankLines="0" />
DR#4 (IGP extensions allowed within single domain), DR#5
(IGP extensions disallowed in multiple domain case)
</t>
<!-- #8 -->
<t hangText="general network management">
<vspace blankLines="0" />
MR#1 (polling, configuration, and notification), MR#2
(activation and de-activation)
</t>
<!-- #9 -->
<t hangText="path determination, connectivity verification">
<vspace blankLines="0" />
MR#4 (path trace), MR#5 (connectivity verification)
</t>
</list>
</t>
<t>
The above list is not intended as a substitute for <xref
target="I-D.ietf-rtgwg-cl-requirement" />, but rather as a
concise grouping and reminder or requirements to serve as a
means of more easily determining requirements coverage of a
set of protocol documents.
</t>
</section>
<section anchor="sect.doclist"
title="Proposed Document Coverage">
<t>
The primary areas where additional protocol extensions and
mechanisms are required include the topics described in the
following subsections.
</t>
<t>
There are candidate documents for a subset of the topics
below. This grouping of topics does not require that each
topic be addressed by a separate document. In some cases, a
document may cover multiple topics, or a specific topic may
be addressed as applicable in multiple documents.
</t>
<section anchor="r.bundle"
title="Component Link Grouping">
<t>
An extension to link bundling is needed to specify a group
of components with common attributes. This can be a TLV
defined within the link bundle that carries the same
encapsulations as the link bundle. Two interface indices
would be needed for each group.
<list style="letters">
<t>
An index is needed that if included in an ERO would
indicate the need to place the LSP on any one
component within the group.
</t>
<t>
A second index is needed that if included in an ERO
would indicate the need to balance flows within the
LSP across all components of the group. This is
equivalent to the "all-ones" component for the entire
bundle.
</t>
</list>
<xref target="I-D.ospf-cc-stlv" /> can be extended to
include multipath treatment capabilities. An ISIS
solution is also needed. An extension of RSVP-TE
signaling is needed to indicate multipath treatment
preferences.
</t>
<t>
If a component group is allowed to support all of the
parameters of a link bundle, then a group TE metric would
be accommodated. This can be supported with the component
TLV (C-TLV) defined in <xref target="I-D.ospf-cc-stlv" />.
</t>
<!-- #1 (routing information aggregation),
also:
#2 (restoration speed),
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "routing information aggregation" set of
requirements. The "restoration speed", "backward
compatibility and migration", and "general network
management" requirements must also be considered.
</t>
</section>
<section anchor="r.delay"
title="Delay and Jitter Extensions">
<t>
A extension is needed in the IGP-TE advertisement to
support delay and delay variation for links, link bundles,
and forwarding adjacencies. Whatever mechanism is
described must take precautions that insure that route
oscillations cannot occur.
The following set of drafts address this.
<list style="numbers">
<t><xref target="I-D.ietf-ospf-te-metric-extensions" /></t>
<t><xref target="I-D.previdi-isis-te-metric-extensions" /></t>
<t><xref target="I-D.atlas-mpls-te-express-path" /></t>
</list>
</t>
<!-- #5 (delay and delay variation),
also
#2 (restoration speed),
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "delay and delay variation" set of requirements.
The "restoration speed", "backward compatibility and
migration", and "general network management" requirements
must also be considered.
</t>
</section>
<section anchor="r.path"
title="Path Selection and Admission Control">
<t>
Path selection and admission control changes must be
documented in each document that proposes a protocol
extension that advertises a new capability or parameter
that must be supported by changes in path selection and
admission control.
</t>
<t>
It would also be helpful to have an informational document
which covers path selection and admission control issues
in detail and briefly summarizes and references the set of
documents which propose extensions. This document could
be advanced in parallel with the protocol extensions.
</t>
<!-- #3 (load distribution, stability, minimal disruption),
#6 (admission control, preemption, traffic engineering),
also
#2 (restoration speed),
#9 (path determination, connectivity verification),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
are the "load distribution, stability, minimal disruption"
and "admission control, preemption, traffic engineering"
sets of requirements. The "restoration speed" and "path
determination, connectivity verification" requirements
must also be considered. The "backward compatibility and
migration", and "general network management" requirements
must also be considered.
</t>
</section>
<section anchor="r.dmp"
title="Dynamic Multipath Balance">
<t>
FR#11 explicitly calls for dynamic placement of flows.
Load balancing similar to existing dynamic multipath would
satisfy this requirement. In implementations where flow
identification uses a coarse granularity, the adjustments
would have to be equally coarse, in the worst case moving
entire LSP. The impact of flow identification granularity
and potential dynamic multipath approaches may need to be
documented in greater detail than provided here.
</t>
<!-- #2 (restoration speed),
#3 (load distribution, stability, minimal disruption),
also
#9 (path determination, connectivity verification),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
are the "restoration speed" and the "load distribution,
stability, minimal disruption" sets of requirements. The
"path determination, connectivity verification"
requirements must also be considered. The "backward
compatibility and migration", and "general network
management" requirements must also be considered.
</t>
</section>
<section anchor="r.freq-balance"
title="Frequency of Load Balance">
<t>
IGP-TE and RSVP-TE extensions are needed to support
frequency of load balancing rearrangement called for in
FR#13, and FR#15-FR#17. Constraints are not defined in
RSVP-TE, but could be modeled after administrative
attribute affinities in RFC3209 and elsewhere.
</t>
<!-- #3 (load distribution, stability, minimal disruption),
also
#9 (path determination, connectivity verification),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "load distribution, stability, minimal disruption"
set of requirements. The "path determination,
connectivity verification" must also be considered. The
"backward compatibility and migration" and "general
network management" requirements must also be considered.
</t>
</section>
<section anchor="r.ll-ul-leak"
title="Inter-Layer Communication">
<t>
Lower layer to upper layer communication called for in
FR#7 and FR#20. Specific parameters, specifically delay
and delay variation, need to be addressed. Passing
information from a lower non-MPLS layer to an MPLS layer
needs to be addressed, though this may largely be generic
advice encouraging a coupling of MPLS to lower layer
management plane or control plane interfaces. This topic
can be addressed in each document proposing a protocol
extension, where applicable.
</t>
<!-- #2 (restoration speed),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "restoration speed" set of requirements. The
"backward compatibility and migration" and "general
network management" requirements must also be considered.
</t>
</section>
<section anchor="r.mp-tp"
title="Packet Ordering Requirements">
<t>
A document is needed to define extensions supporting
various packet ordering requirements, ranging from
requirements to preserve microflow ordering only, to
requirements to preserve full LSP ordering (as in
MPLS-TP). This is covered by <xref
target="I-D.ietf-mpls-multipath-use" /> and <xref
target="I-D.villamizar-mpls-multipath-extn" />.
</t>
<!-- #6 (admission control, preemption, traffic engineering),
#9 (path determination, connectivity verification),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
are the "admission control, preemption, traffic
engineering" and the "path determination, connectivity
verification" sets of requirements. The "backward
compatibility and migration" and "general network
management" requirements must also be considered.
</t>
</section>
<section anchor="r.disrupt"
title="Minimally Disruption Load Balance">
<t>
The behavior of hash methods used in classic multipath
needs to be described in terms of FR#12 which calls for
minimally disruptive load adjustments. For example,
reseeding the hash violates FR#12. Using modulo
operations is significantly disruptive if a link comes or
goes down, as pointed out in <xref target="RFC2992" />.
In addition, backwards compatibility with older hardware
needs to be accommodated.
</t>
<!-- #3 (load distribution, stability, minimal disruption) -->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "load distribution, stability, minimal disruption"
set of requirements.
</t>
</section>
<section anchor="r.symmetry"
title="Path Symmetry">
<t>
Protocol extensions are needed to support dynamic load
balance as called for to meet FR#22 (path symmetry) and to
meet FR#11 (dynamic placement of flows).
</t>
<t>
Currently path symmetry can only be supported in link
bundling if the path is pinned. When a flow is moved both
ingress and egress must make the move as close to
simultaneously as possible to satisfy FR#22 and FR#12
(minimally disruptive load rebalance). There is currently
no protocol to coordinate this move.
</t>
<t>
If a group of flows are identified using a hash, then the
hash must be identical on the pair of LSR at the endpoint,
using the same hash seed and with one side swapping source
and destination. If the label stack is used, then either
the entire label stack must be a special case flow
identification, since the set of labels in either
direction are not correlated, or the two LSR must conspire
to use the same flow identifier. For example, using a
common entropy label value, and using only the entropy
label in the flow identification would satisfy the
forwarding requirement. There is no protocol to indicate
special treatment of a label stack within a hierarchical
LSP. Adding such a extension may add significant
complexity and ultimately may prove unscalable.
</t>
<!-- #3 (load distribution, stability, minimal disruption),
#6 (admission control, preemption, traffic engineering),
also
#4 (backward compatibility and migration),
#8 (general network management),
helps with
#9 (path determination, connectivity verification)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
are the "load distribution, stability, minimal disruption"
and the "admission control, preemption, traffic
engineering" sets of requirements. The "backward
compatibility and migration" and "general network
management" requirements must also be considered. Path
symmetry simplifies support for the "path determination,
connectivity verification" set of requirements, but with
significant complexity added elsewhere.
</t>
</section>
<section anchor="r.stability"
title="Performance, Scalability, and Stability">
<t>
A separate document providing analysis of performance,
scalability, and stability impacts of changes may be
needed. The topic of traffic adjustment oscillation must
also be covered. If sufficient coverage is provided in
each document covering a protocol extension, a separate
document would not be needed.
</t>
<!-- #2 (restoration speed),
impacts other documents,
should be cited by:
r.bundle, r.delay, r.path, r.symmetry, r.ip-ldp,
r.ldp-extn, r.pw-extn, r.multi-domain
possibly r.adaptive, r.freq-balance
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "restoration speed" set of requirements. This is
not a simple topic and not a topic that is well served by
scattering it over multiple documents, therefore it may be
best to put this in a separate document and put citations
in documents called for in
<xref target="r.bundle" />,
<xref target="r.delay" />,
<xref target="r.path" />,
<xref target="r.symmetry" />,
<xref target="r.ip-ldp" />,
<xref target="r.ldp-extn" />,
<xref target="r.pw-extn" />, and
<xref target="r.multi-domain" />.
Citation may also be helpful in
<xref target="r.dmp" />, and
<xref target="r.freq-balance" />.
</t>
</section>
<section anchor="r.ip-ldp"
title="IP and LDP Traffic">
<t>
A document is needed to define the use of measurements of
native IP and native LDP traffic levels which are then
used to reduce link advertised bandwidth amounts.
</t>
<!-- #3 (load distribution, stability, minimal disruption),
#6 (admission control, preemption, traffic engineering),
also
#9 (path determination, connectivity verification),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
are the "load distribution, stability, minimal disruption"
and the "admission control, preemption, traffic
engineering" set of requirements. The "path
determination, connectivity verification" must also be
considered. The "backward compatibility and migration"
and "general network management" requirements must also be
considered.
</t>
</section>
<section anchor="r.ldp-extn"
title="LDP Extensions">
<t>
Extending LDP is called for in DR#2. LDP can be extended
to couple FEC admission control to local resource
availability without providing LDP traffic engineering
capability. Other LDP extensions such as signaling a
bound on microflow size and LDP LSP requirements would
provide useful information without providing LDP traffic
engineering capability.
</t>
<!-- #6 (admission control, preemption, traffic engineering),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "admission control, preemption, traffic
engineering" set of requirements. The "backward
compatibility and migration" and "general network
management" requirements must also be considered.
</t>
</section>
<section anchor="r.pw-extn"
title="Pseudowire Extensions">
<t>
Pseudowire (PW) extensions such as signaling a bound on
microflow size and signaling requirements specific to PW
would provide useful information. This information can be
carried in the PW LDP signaling
<xref target="RFC3985" />
and the the PW requirements could then be used in a
containing LSP.
</t>
<!-- #6 (admission control, preemption, traffic engineering),
also
#4 (backward compatibility and migration),
#8 (general network management)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
is the "admission control, preemption, traffic
engineering" set of requirements. The "backward
compatibility and migration" and "general network
management" requirements must also be considered.
</t>
</section>
<section anchor="r.multi-domain"
title="Multi-Domain Advanced Multipath">
<t>
<!-- fix me -->
DR#5 calls for Advanced Multipath to span multiple network
topologies. Component LSP may already span multiple
network topologies, though most often in practice these
are LDP signaled. Component LSP which are RSVP-TE
signaled may also span multiple network topologies using
at least three existing methods (per domain <xref
target="RFC5152" />, BRPC <xref target="RFC5441" />, PCE
<xref target="RFC4655" />). When such component links are
combined in an Advanced Multipath, the Advanced Multipath spans
multiple network topologies. It is not clear in which
document this needs to be described or whether this
description in the framework is sufficient. The authors
and/or the WG may need to discuss this. DR#5 mandates
that IGP-TE extension cannot be used. This would disallow
the use of <xref target="RFC5316" /> or <xref
target="RFC5392" /> in conjunction with <xref
target="RFC5151" />.
</t>
<!-- #7 (single vs multiple domain),
#6 (admission control, preemption, traffic engineering),
also
#1 (routing information aggregation),
#3 (load distribution, stability, minimal disruption),
#5 (delay and delay variation),
also
#4 (backward compatibility and migration),
#8 (general network management),
#9 (path determination, connectivity verification)
-->
<t>
The primary focus of this document, among the sets of
requirements listed in <xref target="sect.reqm-review" />
are "single vs multiple domain" and "admission control,
preemption, traffic engineering". The "routing
information aggregation" and "load distribution,
stability, minimal disruption" requirements need attention
due to their use of the IGP in single domain Advanced
Multipath. Other requirements such as "delay and delay
variation", can more easily be accommodated by carrying
metrics within BGP. The "path determination, connectivity
verification" requirements need attention due to
requirements to restrict disclosure of topology
information across domains in multi-domain deployments.
The "backward compatibility and migration" and "general
network management" requirements must also be considered.
</t>
</section>
</section>
<section anchor="sect.by-protocol"
title="Framework Requirement Coverage by Protocol">
<t>
As an aid to implementors, this section summarizes
requirement coverage listed in <xref target="sect.doclist"
/> by protocol or LSR functionality affected.
</t>
<t>
Some documentation may be purely informational, proposing no
changes and proposing usage at most. This includes <xref
target="r.path" />, <xref target="r.disrupt" />, <xref
target="r.stability" />, and <xref target="r.multi-domain"
/>.
</t>
<t>
<xref target="r.symmetry" /> may require a new protocol.
</t>
<section anchor="sect.by-igp"
title="OSPF-TE and ISIS-TE Protocol Extensions">
<t>
Many of the changes listed in
<xref target="sect.doclist" /> require IGP-TE changes,
though most are small extensions to provide additional
information. This set includes
<xref target="r.bundle" />, <xref target="r.delay" />,
<xref target="r.freq-balance" />,
<xref target="r.ll-ul-leak" />, and
<xref target="r.mp-tp" />. An adjustment to existing
advertised parameters is suggested in
<xref target="r.ip-ldp" />.
</t>
</section>
<section anchor="sect.by-pw-extn"
title="PW Protocol Extensions">
<t>
The only suggestion of pseudowire (PW) extensions is in
<xref target="r.pw-extn" />.
</t>
</section>
<section anchor="sect.by-ldp-extn"
title="LDP Protocol Extensions">
<t>
Potential LDP extensions are described in
<xref target="r.ldp-extn" />.
</t>
</section>
<section anchor="sect.by-rsvp-te"
title="RSVP-TE Protocol Extensions">
<t>
RSVP-TE protocol extensions are called for in
<xref target="r.bundle" />,
<xref target="r.freq-balance" />,
<xref target="r.mp-tp" />, and
<xref target="r.symmetry" />.
</t>
</section>
<section anchor="sect.by-path-select"
title="RSVP-TE Path Selection Changes">
<t>
<xref target="r.path" /> calls for path selection to be
addressed in individual documents that require change.
These changes would include those proposed in
<xref target="r.bundle" />, <xref target="r.delay" />,
<xref target="r.freq-balance" />, and
<xref target="r.mp-tp" />.
</t>
</section>
<section anchor="sect.by-ac"
title="RSVP-TE Admission Control and Preemption">
<t>
When a change is needed to path selection, a corresponding
change is needed in admission control. The same set of
sections applies: <xref target="r.bundle" />,
<xref target="r.delay" />,
<xref target="r.freq-balance" />, and
<xref target="r.mp-tp" />. Some resource changes such as
a link delay change might trigger preemption. The rules
of preemption remain unchanged, still based on holding
priority.
</t>
</section>
<section anchor="sect.by-forwarding"
title="Flow Identification and Traffic Balance">
<t>
The following describe either the state of the art in flow
identification and traffic balance or propose changes:
<xref target="r.dmp" />, <xref target="r.freq-balance" />,
<xref target="r.mp-tp" />, and
<xref target="r.disrupt" />.
</t>
</section>
</section>
</section>
<section anchor="IANA" title="IANA Considerations">
<t>
This is a framework document and therefore does not specify
protocol extensions. This memo includes no request to IANA.
</t>
</section>
<section anchor="Security" title="Security Considerations">
<t>
The security considerations for MPLS/GMPLS and for MPLS-TP are
documented in <xref target="RFC5920" />
and <xref target="RFC6941" />.
</t>
<t>
The types protocol extensions proposed in this framework
document provide additional information about links,
forwarding adjacencies, and LSP requirements. The protocol
semantics changes described in this framework document propose
additional LSP constraints applied at path computation time
and at LSP admission at midpoints LSR. The additional
information and constraints provide no additional security
considerations beyond the security considerations already
documented in <xref target="RFC5920" /> and
<xref target="RFC6941" />.
</t>
</section>
<section title="Acknowledgments">
<t>
Authors would like to thank Adrian Farrel, Fred Jounay, Yuji
Kamite for his extensive comments and suggestions regarding
early versions of this document, Ron Bonica, Nabil Bitar,
Eric Gray, Lou Berger, and Kireeti Kompella for their reviews
of early versions and great suggestions.
</t>
<t>
Authors would like to thank Iftekhar Hussain for review and
suggestions regarding recent versions of this document.
</t>
<t>
In the interest of full disclosure of affiliation and in the
interest of acknowledging sponsorship, past affiliations of
authors are noted. Much of the work done by Ning So occurred
while Ning was at Verizon. Much of the work done by Curtis
Villamizar occurred while at Infinera. Infinera continues to
sponsor this work on a consulting basis.
</t>
</section>
</middle>
<back>
<references title="Normative References">
&RFC2119;
&RFC3209;
&RFC3630;
&RFC4201;
&RFC4206;
&RFC5036;
&RFC5305;
&RFC5712;
&RFC6107;
&RFC6374;
&RFC6391;
</references>
<references title="Informative References">
<!-- a framework doc can't be a normative reference -->
&RFC2475;
&RFC2991;
&RFC2992;
&RFC3468;
&RFC3260;
&RFC3945;
&RFC3985;
&RFC4448;
&RFC4655;
&RFC4385;
&RFC4928;
&RFC5151;
&RFC5152;
&RFC5316;
&RFC5392;
&RFC5441;
<!-- &RFC5586; MPLS GACH -->
&RFC5920;
&RFC5921;
&RFC6790;
&RFC6941;
&I-D.ospf-cc-stlv;
&I-D.ietf-rtgwg-cl-use-cases;
&I-D.ietf-rtgwg-cl-requirement;
<!--
&I-D.kompella-mpls-rsvp-ecmp;
-->
&I-D.ietf-mpls-multipath-use;
&I-D.villamizar-mpls-multipath-extn;
&I-D.ietf-ospf-te-metric-extensions;
&I-D.previdi-isis-te-metric-extensions;
&I-D.atlas-mpls-te-express-path;
<reference anchor="DBP">
<front>
<title>Dynamic Behavior of Shortest Path Routing Algorithms
for Communication Networks</title>
<author fullname="D. P. Bertsekas"
initials="D. F." surname="Bertsekas" />
<!-- date year="1982" / -->
</front>
<seriesInfo name="IEEE Trans. Auto. Control" value="1982" />
</reference>
</references>
</back>
</rfc>
| PAFTECH AB 2003-2026 | 2026-04-24 01:19:00 |