One document matched: draft-ietf-mpls-seamless-mpls-01.xml
<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
There has to be one entity for each item to be referenced.
An alternate method (rfc include) is described in the references. -->
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2629 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3107 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3107.xml">
<!ENTITY RFC3031 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3031.xml">
<!ENTITY RFC3209 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3209.xml">
<!ENTITY RFC3353 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3353.xml">
<!ENTITY RFC3552 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3552.xml">
<!ENTITY RFC4090 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4090.xml">
<!ENTITY RFC4364 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4364.xml">
<!ENTITY RFC5036 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5036.xml">
<!ENTITY RFC5283 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5283.xml">
<!ENTITY RFC5286 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5286.xml">
<!ENTITY RFC5332 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5332.xml">
<!ENTITY RFC5925 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5925.xml">
<!ENTITY I-D.narten-iana-considerations-rfc2434bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.narten-iana-considerations-rfc2434bis.xml">
<!ENTITY I-D.kothari-henderickx-l2vpn-vpls-multihoming SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.kothari-henderickx-l2vpn-vpls-multihoming.xml">
<!ENTITY I-D.ietf-bfd-v4v6-1hop SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-bfd-v4v6-1hop.xml">
<!ENTITY I-D.filsfils-rtgwg-lfa-applicability SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.filsfils-rtgwg-lfa-applicability.xml">
<!ENTITY I-D.sajassi-l2vpn-rvpls-bgp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.sajassi-l2vpn-rvpls-bgp.xml">
<!ENTITY I-D.raggarwa-mac-vpn SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.raggarwa-mac-vpn.xml">
<!ENTITY I-D.ietf-mpls-ldp-dod SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mpls-ldp-dod.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
(Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
(using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="info" docName="draft-ietf-mpls-seamless-mpls-01"
ipr="trust200902">
<!-- category values: std, bcp, info, exp, and historic
ipr values: full3667, noModification3667, noDerivatives3667
you can add the attributes updates="NNNN" and obsoletes="NNNN"
they will automatically be output with "(if approved)" -->
<!-- ***** FRONT MATTER ***** -->
<front>
<!-- The abbreviated title is used in the page header - it is only necessary if the
full title is longer than 39 characters -->
<title abbrev="Seamless MPLS">Seamless MPLS Architecture</title>
<!-- add 'role="editor"' below for the editors if appropriate -->
<!-- Another author who claims to be an editor -->
<author fullname="Nicolai Leymann" initials="N.L." role="editor"
surname="Leymann">
<organization>Deutsche Telekom AG</organization>
<address>
<postal>
<street>Winterfeldtstrasse 21</street>
<!-- Reorder these if your country does things differently -->
<city>Berlin</city>
<code>10781</code>
<country>DE</country>
</postal>
<phone>+49 30 8353-92761</phone>
<email>n.leymann@telekom.de</email>
<!-- uri and facsimile elements may also be added -->
</address>
</author>
<author fullname="Bruno Decraene" initials="B.D." surname="Decraene">
<organization>France Telecom</organization>
<address>
<postal>
<street>38-40 rue du General Leclerc</street>
<city>Issy Moulineaux cedex 9</city>
<region></region>
<code>92794</code>
<country>FR</country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email>bruno.decraene@orange-ftgroup.com</email>
<uri></uri>
</address>
</author>
<author fullname="Clarence Filsfils" initials="C.F." surname="Filsfils">
<organization>Cisco Systems</organization>
<address>
<postal>
<street></street>
<city>Brussels</city>
<region></region>
<code></code>
<country>Belgium</country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email>cfilsfil@cisco.com</email>
<uri></uri>
</address>
</author>
<author fullname="Maciek Konstantynowicz" initials="M.K."
surname="Konstantynowicz">
<organization>Cisco Systems</organization>
<address>
<postal>
<street></street>
<city></city>
<region></region>
<code></code>
<country></country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email>maciek@cisco.com</email>
<uri></uri>
</address>
</author>
<author fullname="Dirk Steinberg" initials="D.S." surname="Steinberg">
<organization>Steinberg Consulting</organization>
<address>
<postal>
<street>Ringstrasse 2</street>
<city>Buchholz</city>
<code>53567</code>
<country>DE</country>
</postal>
<email>dws@steinbergnet.net</email>
</address>
</author>
<date day="12" month="March" year="2012" />
<!-- If the month and year are both specified and are the current ones, xml2rfc will fill
in the current day for you. If only the current year is specified, xml2rfc will fill
in the current day and month for you. If the year is not the current one, it is
necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the
purpose of calculating the expiry date). With drafts it is normally sufficient to
specify just the year. -->
<!-- Meta-data Declarations -->
<area>Routing Area</area>
<workgroup>MPLS Working Group</workgroup>
<!-- WG name at the upperleft corner of the doc,
IETF is fine for individual submissions.
If this element is not present, the default is "Network Working Group",
which is used by the RFC Editor as a nod to the history of the IETF. -->
<keyword>Seamless MPLS, MPLS, access network, aggregation network, WAN,
MAN, leymann, kompella, filsfils, hendrickx</keyword>
<!-- Keywords will be incorporated into HTML output
files in a meta tag but they have no effect on text or nroff
output. If you submit your draft to the RFC Editor, the
keywords will be used for the search engine. This -->
<abstract>
<t>This documents describes an architecture which can be used to extend
MPLS networks to integrate access and aggregation networks into a single
MPLS domain ("Seamless MPLS"). The Seamless MPLS approach is based on
existing and well known protocols. It provides a highly flexible and a
scalable architecture and the possibility to integrate 100.000 of nodes.
The separation of the service and transport plane is one of the key
elements; Seamless MPLS provides end to end service independent
transport. Therefore it removes the need for service specific
configurations in network transport nodes (without end to end transport
MPLS, some additional services nodes/configurations would be required to
glue each transport domain). This draft defines a routing architecture
using existing standardized protocols. It does not invent any new
protocols or defines extensions to existing protocols.</t>
</abstract>
</front>
<middle>
<section title="Introduction">
<t>MPLS as a mature and well known technology is widely deployed in
today's core and aggregation/metro area networks. Many metro area
networks are already based on MPLS delivering Ethernet services to
residential and business customers. Until now those deployments are
usually done in different domains; e.g. core and metro area networks are
handled as separate MPLS domains.</t>
<t>Seamless MPLS extends the core domain and integrates aggregation and
access domains into a single MPLS domain ("Seamless MPLS"). This enables
a very flexible deployment of an end to end service delivery. In order
to obtain a highly scalable architecture Seamless MPLS takes into
account that typical access devices (DSLAMs, MSAN) are lacking some
advanced MPLS features, and may have more scalability limitations. Hence
access devices are kept as simple as possible.</t>
<t>Seamless MPLS is not a new protocol suite but describes an
architecture by deploying existing protocols like BGP, LDP and ISIS.
Multiple options are possible and this document aims at defining a
single architecture for the main function in order to ease
implementation prioritization and deployments in multi vendor networks.
Yet the architecture should be flexible enough to allow some level of
personalization, depending on use cases, existing deployed base and
requirements. Currently, this document focus on end to end unicast
LSP.</t>
<section title="Requirements Language">
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in <xref
target="RFC2119">RFC 2119</xref>.</t>
</section>
<section title="Terminology">
<t>This document uses the following terminology<list style="symbols">
<t>Access Node (AN): An access node is a node which processes
customers frames or packets at Layer 2 or above. This includes but
is not limited to DSLAMs or OLTs (in case of (G)PON deployments).
Access nodes have only limited MPLS functionalities in order to
reduce complexity in the access network.</t>
<t>Aggregation Node (AGN): An aggregation node (AGN) is a node
which aggregates several access nodes (ANs).</t>
<t>Area Border Router (ABR): Router between aggregation and core
domain.</t>
<t>Deployment Scenario: Describes which an implementation of
Seamless MPLS in order to fullfil the requirements derived from
one or more use cases.</t>
<t>Seamless MPLS Domain: A set of MPLS equipments which can set
MPLS LSPs between them.</t>
<t>Transport Node (TN): Transport nodes are used to connect access
nodes to service nodes, and services nodes to services nodes.
Transport nodes ideally have no customer or service state and are
therefore decoupled from service creation.</t>
<t>Seamless MPLS (S-MPLS): Used as a generic term to describe an
architecture which integrates access, aggregation and core network
in a single MPLS domain.</t>
<t>Service Node (SN): A service node is used to create services
for customers and is connected to one or more transport nodes.
Typical examples include Broadband Network Gateways (BNGs), video
servers</t>
<t>Transport Pseudo Wire (T-PW): A transport pseudowire provides
service independent transport mechanisms based on Pseudo-Wires
within the Seamless MPLS architecture.</t>
<t>Use Case: Describes a typical network including service
creation points in order to describe the requirments, typical
numbers etc. which need to be taken into account when applying the
Seamless MPLS architecture.</t>
</list></t>
</section>
</section>
<section title="Motivation">
<t>MPLS is deployed in core and aggregation network for several years
and provides a mature and stable basis for large networks. In addition
MPLS is already used in access networks, e.g. such as mobile or DSL
backhaul. Today MPLS as technology is being used on two different
layers:</t>
<t><list style="symbols">
<t>the Transport Layer and</t>
<t>the Service Layer (e.g. for MPLS VPNs)</t>
</list>In both cases the protocols and the encapsulation are identical
but the use of MPLS is different especially concerning the signalling,
the control plane, the provisioning, the scalability and the frequency
of updates. On the service layer only service specific information is
exchanged; every service can potentially deploy it's own architecture
and individual protocols. The services are running on top of the
transport layer. Nevertheless those deployments are usually isolated,
focussed on a single use case and not integrated into an end-to-end
manner.</t>
<t>The motivation of Seamless MPLS is to provide an architecture which
supports a wide variety of different services on a single MPLS platform
fully integrating access, aggregation and core network. The architecture
can be used for residential services, mobile backhaul, business services
and supports fast reroute, redundancy and load balancing. Seamless MPLS
provides the deployment of service creation points which can be
virtually everywhere in the network. This enables network and service
providers with a flexible service and service creation. Service creation
can be done based on the existing requirements without the needs for
dedicated service creation areas on fixed locations. With the
flexibility of Seamless MPLS the service creation can be done anywhere
in the network and easily moved between different locations.</t>
<section title="Why Seamless MPLS">
<t>Multiple SP plan to deploy networks with 10k to 100k MPLS nodes.
This is typically at least one order of magnitude higher than typical
deployments and may require a new architecture. Multiple options are
possible and it makes sense for the industry (both vendors and SP) to
restrict the options in order to ease the first deployments (e.g.
restrict the number of options to implement and/or scales for vendors,
reduce interoperability and debugging issues for SP).</t>
<t>Many aggregation networks are already deploying MPLS but are
limited to the use of MPLS per aggregation area. Those MPLS based
aggregation domains are connected to a core network running MPLS as
well. Nevertheless most of the services are not limited to an
aggregation domain but running between several aggregation domains
crossing the core network. In the past it was necessary to provide
connectivity between the different domains and the core on a per
service level and not based on MPLS (e.g. by deploying native
IP-Routing or Ethernet based technologies between aggregation and
core). In most cases service specific configurations on the border
nodes between core and aggregation were required. New services led to
additional configurations and changes in the provisioning tools (see
<xref target="serviceConfig"></xref>).</t>
<t>With Seamless MPLS there are no technology boundaries and no
topology boundaries for the services. Network (or region) boundaries
are for scaling and manageability, and do not affect the service
layer, since the Transport Pseudowire that carries packets from the AN
to the SN doesn't care whether it takes two hops or twenty, nor how
many region boundaries it needs to cross. The network architecture is
about network scaling, network resilience and network manageability;
the service architecture is about optimal delivery: service scaling,
service resilience (via replicated SNs) and service manageability. The
two are decoupled: each can be managed separately and changed
independently.</t>
<t></t>
<figure align="center" anchor="serviceConfig"
title="Service Specific Configurations">
<artwork><![CDATA[+--------------+ +--------------+ +--------------+
| Aggregation | | Core | | Aggregation |
| Domain #1 +---------+ Domain +---------+ Domain #2 |
| MPLS | ^ | MPLS | ^ | MPLS |
+--------------+ | +--------------+ | +--------------+
| |
+------ service specific ------+
configuration
]]></artwork>
</figure>
<t></t>
<t>One of the main motivations of Seamless MPLS is to get rid of
services specific configurations between the different MPLS islands.
Seamless MPLS connects all MPLS domains on the MPLS transport layer
providing a single transport layer for all services - independent of
the service itself. The Seamless MPLS architecture therefore decuples
the service and transport layer and integrates access, aggregation and
core into a single platform. One of the big advantages is that
problems on the transport layer only need to be solved once (and the
solutions are available to all services). With Seamless MPLS it is not
necessary to use service specific configurations on intermediate
nodes; all services can be deployed in an end to end manner.</t>
</section>
<section title="Use Case #1">
<section title="Description">
<t>In most cases at least residential and business services need to
be supported by a network. This section describes a Seamless MPLS
use case which supports such a scenario. The use case includes point
to point services for business customers as well as typical service
creation for residential customers.</t>
<t></t>
<t><figure align="center" anchor="serviceUseCase01"
title="Use Case #1: Service Creation">
<artwork><![CDATA[ +-------------+
| Service |
| Creation |
| Residential |
| Customers |
+------+------+
|
|
|
PW1 +-------+ +---+---+
######################### |
# +--+ AGN11 +---+ AGN21 + +------+
# / | | /| |\ | | +--------+
+--#-+/ +-------+\/ +-------+ \| | | remote |
| AN | /\ + CORE +---......--+ AN |
+--#-+\ +-------+ \+-------+ /| | ####### |
# \ | | | |/################### +--------+
# +--+ AGN12 +---+ AGN22 +##+------+ P2P Business Service
##############################
PW2 +-------+ +-------+
]]></artwork>
</figure></t>
<t></t>
<t><xref target="serviceUseCase01"></xref> shows the different
service creation points and the corresponding pseudowires between
the access nodes and the service creation points. The use case does
not show all PWs (e.g. not the PWs needed to support redundancy) in
order to keep the figure simple. Node and link failures are handled
by rerouting the PWs (based on standard mechanisms). End customers
(either residential or business customers) are connected to the
access nodes using a native technology like Ethernet. The access
nodes terminates the PW(s) carrying the traffic for the end
customers. The link between the access node (AN) and the aggregation
node (AGN) is the first MPLS enabled link.</t>
<t><list style="hanging">
<t hangText="Residential Services:">The service creation for all
residential customers connected to the Access Nodes in an
aggregation domain is located on an Service Node connected to
the AGN2x. The PW (PW1) originated at the AN and terminates at
the AGN2. A second PW is deployed in the case where redundancy
is needed on the AN (the figure shows redundancy but this might
not be the case for all ANs in this Use Case). Additonal PWs can
be deployed as well in case more than a single service creation
is needed for the residential service (e.g. one service creation
point for Internet access and a second service creation point
for IPTV services).</t>
<t hangText="Business Sercvices:">For business services the use
cases shows point to point connections between two access nodes.
PW2 originates at the AN and terminates on the remote AN
crossing two aggregation areas and the core network. If the
access node needs connections to several remote ANs the
corresponding number of PWs will be originated at the AN.
Nevertheless taking the number of ports available and the number
of business customers on a typical access node the number of PWs
will be relatively small.</t>
</list></t>
<t></t>
<figure align="center" anchor="redUseCase01"
title="Use Case #1: Redundancy">
<artwork><![CDATA[ +-------+ +-------+ +------+ +------+
| | | | | | | |
+--+ AGN11 +---+ AGN21 +---+ ABR1 +---+ LSR1 +--> to AGN
/ | | /| | | | | |
+----+/ +-------+\/ +-------+ +------+ /+------+
| AN | /\ \/
+----+\ +-------+ \+-------+ +------+/\ +------+
\ | | | | | | \| |
+--+ AGN12 +---+ AGN22 +---+ ABR2 +---+ LSR2 +--> to AGN
| | | | | | | |
+-------+ +-------+ +------+ +------+
static route ISIS L1 LDP ISIS L2 LDP
<-Access-><--Aggregation Domain--><---------Core--------->
]]></artwork>
</figure>
<t></t>
<t><xref target="redUseCase01"></xref> shows the redundancy at the
access and aggregation network deploying a two stage aggregation
network (AGN1x/AGN2x). Nevertheless redundancy is not a MUST in this
use case. It is also possible to use non redundant connection
between the ANs and AGN1 stage and/or between the AGN1 and AGN2
stages. The AGN2x stage is used to aggregate traffic from several
AGN1x pairs. In this use case an aggregation domain is not limited
to the use of a single pair of AGN2x; the deployment of several AGN2
pairs within the domain is also supported. As design goal for the
scalability of the routing and forwarding within the Seamless MPLS
architecture the following numbers are used:</t>
<t><list style="symbols">
<t>Number of Aggregation Domains: 100</t>
<t>Number of Backbone Nodes: 1.000</t>
<t>Number of Aggregation Nodes: 10.000</t>
<t>Number of Access Nodes: 100.000</t>
</list>The access nodes (AN) are dual homed to two different
aggregation nodes (AGN11 and AGN12) using static routing entries on
the AN. The ANs are always source or sink nodes for MPLS traffic but
not transit nodes. This allows a light MPLS implementation in order
to reduce the complexity in the AN. The aggregation network consists
of two stages with redundant connections between the stages (AGN11
is connected to AGN21 and AGN22 as well as AGN12 to AGN21 and
AGN22). The gateway between the aggregation and core network is
realized using the Area Border Routers (ABR). From the perspective
of the MPLS transport layer all systems are clearly identified using
the loopback address of the system. An ingress node must be able to
establish a service to an arbitrary egress system by using the
corresponding MPLS transport label</t>
</section>
<section title="Typical Numbers">
<t><xref target="numUseCase01"></xref> shows typical numbers which
are expected for Use Case #1 (access node).</t>
<t></t>
<texttable align="center" anchor="numUseCase01"
title="Use Case #1: Typical Numbers for Access Node">
<ttcol>Parameter</ttcol>
<ttcol>Typical Value</ttcol>
<c>IGP Control Plane</c>
<c>2</c>
<c>IP FIB</c>
<c>2</c>
<c>LDP Control Plane</c>
<c>200</c>
<c>LDP FIB</c>
<c>200</c>
<c>BGP Control Plane</c>
<c>0</c>
<c>BGP FIB</c>
<c>0</c>
</texttable>
<t></t>
</section>
</section>
<section title="Use Case #2">
<section title="Description">
<t>In most cases, residential, wholesales and business services need
to be supported by the network.</t>
<t><figure align="center" anchor="usecase2" title="Use Case #2">
<artwork><![CDATA[ +-------------+
| Service |
| platforms |
|(VoIP, VoD..)|
| Residential |
| Customers |
+------+------+
|
|
+---+ +-----+ +--+--+ +-----+
|AN1|----+AGN11+--+AGN21+---+ ABR |
+---+ +--+--+ +--+--+ +--+--+
| | |
+---+ +--+--+ | | +----+
|AN2|----+AGN12+ | | --+ PE |
+---+ +--+--+ | | +----+
| | |
. | |
. | |
. | |
| | |
+---+ +---+ +--+--+ +--+--+ +--+--+
|AN4+---+AN3|----+AGN1x+--+AGN22+---+ ABR |
+---+ +---+ +-----+ +-----+ +-----+
<-Access-><--Aggregation Domain--><---------Core--------->
]]></artwork>
</figure></t>
<t>The above topology (see <xref target="usecase2"></xref>) is
subject to evolutions, depending on AN types and capacities (in
terms of number of customers and/or aggregated bandwidth). For
examples, AGN1x connection toward AGN2y currently forms a ring but
may latter evolve in a square or triangle topology; AGN2y nodes may
not be present...</t>
<t>Most access nodes (AN) are single attached on one aggregation
node using static routing entries on the AN and AGN. Some AN, are
dual attached on two different AGN using static routes. Some AN are
used as transit by some lower level AN. Static routes are expected
to be used between those AN.</t>
<t>IPv4, IPv6 and MPLS interconnection between the aggregation and
core network is realized using the Area Border Routers (ABR). Any
ingress node must be able to establish IPv4, IPv6 and MPLS
connections to any egress node in the seamless MPLS domain.</t>
<t>Regarding MPLS connectivity requirements, a full mesh of MPLS
LSPs is required between the ANs of an aggregation area, at least
for 6PE purposes. Some additional LSPs are needed between ANs and
some PE in the aggregation area or in the core area for access to
services, wholesale and enterprises services. In short, a meshing of
LSP is required between the AGN of the whole seamless MPLS domain.
Finally, LSP between any node to any node should be possible.</t>
<t>From a scalability standpoint, the following numbers are the
targets:</t>
<t><list style="symbols">
<t>Number of Aggregation Domains: 30</t>
<t>Number of Backbone Nodes: 150</t>
<t>Number of Aggregation Nodes: 1.500</t>
<t>Number of Access Nodes: 40.000</t>
</list></t>
</section>
<section title="Typical Numbers">
<t><xref target="numUseCase02"></xref> shows typical numbers which
are expected for Use Case #2 for the purpose of establishing the
transport LSPs. They do not take into account the services built in
addition. (e.g. 6PE will require additional IPv6 routes).</t>
<texttable align="center" anchor="numUseCase02"
title="Use Case #2: Typical Numbers for Access Node">
<ttcol>Parameter</ttcol>
<ttcol>Typical Value</ttcol>
<c>IGP Control Plane</c>
<c>2</c>
<c>IP FIB</c>
<c>2</c>
<c>LDP Control Plane</c>
<c>1000</c>
<c>LDP FIB</c>
<c>1000</c>
</texttable>
<t></t>
</section>
</section>
</section>
<section title="Requirements">
<t>The following section describes the overall requirements which need
to be fulfilled by the Seamless MPLS architecture. Beside the general
requirements of the architecture itself there are also certain
requirements which are related to the different network nodes.</t>
<t><list style="symbols">
<t>End to End Transport LSP: MPLS based services (pseudowire based,
L3-VPN or IP) SHALL be provided by the Seamless MPLS based
infrastructure between any nodes.</t>
<t>Scalability: The network SHALL be scalable to the minimum of
100.000 nodes.</t>
<t>Fast convergence (sub second resilience) SHALL be supported. Fast
reroute (LFA) SHOULD be supported.</t>
<t>Flexibility: The Seamless MPLS architecture SHALL be applied to a
wide variety of existing MPLS deployments. It SHALL use a flexible
approach deploying building blocks with the possiblity to use
certain features only if those features are needed (e.g. dual homing
ANs or fast reroute mechanisms).</t>
<t>Service independence: Service and transport layer SHALL be
decoupled. The architecture SHALL remove the need for service
specific configurations on intermediate nodes.</t>
<t>Native Multicast support: P2MP MPLS LSPs SHOULD be supported by
the Seamless MPLS architecture.</t>
<t>Interoperable end to end OAM mechanisms SHALL be implemented</t>
</list></t>
<section title="Overall">
<section title="Access">
<t>In respect of MPLS functionality the access network should be
kept as simple as possible. Compared to the aggregation and/or core
network within Seamless MPLS a typical access node is less powerful.
The control plane and the forwarding should be as simple as
possible. To reduce the complexity and the costs of an access node
not the full MPLS functionality need to be supported (control and
data plane). The use of an IGP should be avoided. Static routing
should be sufficient. Required functionality to reach the required
scalability should be moved out of the access node. The number of
access nodes can be very high. The support of load balancing for
layer 2 services should be implemented.</t>
</section>
<section title="Aggregation">
<t>The aggregation network aggregates traffic from access nodes. The
aggregation Node must have functionalities that enlarge the
scalability of the simple access nodes that are connected. The IGP
must be link state based. Each aggregation area must be a separated
area. All routes that are interarea should use an EGP to keep the
IGP small. The aggregation node must have the full scalability
concerning control plane and forwarding. The support of load
balancing for layer 2 services must be implemented.</t>
</section>
<section title="Core">
<t>The core connects the aggregation areas. The core network
elements must have the full scalability concerning control plane and
forwarding. The IGP must be link state based. The core area must not
include routes from aggregation areas. All routes that are interarea
should use an EGP to keep the IGP small. Each area of the link state
based IGP should have less than 2000 routes. The support of load
balancing for layer 2 services must be implemented.</t>
</section>
</section>
<section title="Multicast">
<t>Compared with unicast connectivity Multicast is more dynamic. User
generated messages - like joining or leaving multicast groups - are
interacting directly with network components in the access and
aggregation network (in order to build the corresponding forwarding
states). This leads to the need for a highly dynamic handling of
messages on access and aggregation nodes. Nevertheless the core
network SHOULD be stable and state changes triggered by user generated
messages SHOULD be minimized. This rises the need for an hierarchy for
the P2MP support in Seamless MPLS hiding the dynamic behaviour of the
access and aggregation nodes</t>
<t><list style="symbols">
<t>mLDP</t>
<t>P2MP RSVP-TE</t>
</list></t>
</section>
<section title="Availability">
<t>All network elements should be high available (99.999%
availability). Outage times should be as low as possible. A repair
time of 50 milliseconds or less should be guarantied at all nodes and
lines in the network that are redundant. Fast convergence features
SHOULD be used in all control plane protocols. Local Repair functions
SHOULD be used wherever possible. Full redundancy is required at all
equipment that is shared in a network element.</t>
<t><list style="symbols">
<t>Power Supply</t>
<t>Switch Fabric</t>
<t>Routing Processor</t>
</list>A change from an active component to a standby component
SHOULD happen without effecting customers traffic. The Influence of
customer traffic MUST be as low as possible.</t>
</section>
<section title="Scalability">
<t>The network must be highly scalable. As a minimum requirement the
following scalability figures should be met:</t>
<t><list style="symbols">
<t>Number of aggregation domains: 100</t>
<t>Number of backbone nodes: 1.000</t>
<t>Number of aggregation nodes: 10.000</t>
<t>Number of access nodes: 100.000</t>
</list></t>
</section>
<section title="Stability">
<t><list style="symbols">
<t>The platform should be stable under certain circumstances (e.g.
missconfiguration within one area should not cause instability in
other areas).</t>
<t>Differentiate between “All Loopbacks and Link addresses
should be ping able from every where." Vs. “Link addresses
are not necessary ping able from everywhere".</t>
</list></t>
</section>
</section>
<!-- This PI places the pagebreak correctly (before the section title) in the text output. -->
<?rfc needLines="8" ?>
<section title="Architecture">
<section title="Overall">
<t>One of the key questions that emerge when designing an architecture
for a seamless MPLS network is how to handle the sheer size of the
necessary routing and MPLS label information control plane and
forwarding plane state resulting from the stated scalability goals
especially with respect to the total number of access nodes. This
needs to be done without overwhelming the technical scaling limits of
any of the involved nodes in the network (access, aggregation and
core) and without introducing too much complexity in the design of the
network while at the same time still maintaining good convergence
properties to allow for quick MPLS transport and service restoration
in case of network failures.</t>
</section>
<section title="Multi-Domain MPLS networks">
<t>The key design paradigm that leads to a sound and scalable solution
is the divide and conquer approach, whereby the large problem is
decomposed into many smaller problems for which the solution can be
found using well-known standard architectures.</t>
<t>In the specific case of seamless MPLS the overall MPLS network
SHOULD be decomposed into multiple MPLS domains, each well within the
scaling limits of well-known architectures and network node
implementations. From an organizational and operational point of view
it MAY make sense to define the boundaries of such domains along the
pre-existing boundaries of aggregation networks and the core
network.</t>
<t>Examples of how networks can be decomposed include using IGP areas
as well as using multiple BGP autonomous systems.</t>
</section>
<section title="Hierarchy">
<t>These MPLS domains SHOULD then be then be connected into an MPLS
multi-domain network in a hierarchical fashion that enables the
seamless exchange of loopback addresses and MPLS label bindings for
transport LSPs across the entire MPLS internetwork while at the same
time preventing the flooding of unnecessary routing and label binding
information into domains or parts of the network that do not need
them. Such a hierarchical routing and forwarding concept allows a
scalability in different dimensions and allows to hide the complexity
and size of the aggregation and access networks.</t>
</section>
<section title="Intra-Domain Routing">
<t>The intra-domain routing within each of the MPLS domains (i.e.
aggregation domains and core) SHOULD utilize standard IGP protocols
like OSPF or ISIS. By definition, each of these domains is small
enough so that there are no relevant scaling limits within each IGP
domain, given well-known state-of-the-art IGP design principles and
recent router technology.</t>
<t>The intra-domain MPLS LSP setup and label distribution SHOULD
utilize standard protocols like LDP or RSVP.</t>
</section>
<section title="Inter-Domain Routing">
<t>The inter-domain routing is responsible for establishing
connectivity between and across all MPLS domains. The inter-domain
routing SHOULD establish a routing and forwarding hierarchy in order
to achieve the scaling goals of seamless MPLS. Note that the IP
aggregation usually performed between region (IGP areas/AS) in IP
routing does not work for MPLS as MPLS is not capable of aggregating
FEC (because MPLS forwarding use an exact match lookup, while IP uses
longest match).</t>
<t>Therefore it is RECOMMENDED to utilize protocols that support
indirect next-hops (like BGP with MPLS labels “labled
BGP/SAFI4” <xref target="RFC3107"></xref>).</t>
</section>
<section title="Access">
<t>Compared to the aggregation and core parts of the Seamless MPLS
network the access part is special in two respects:</t>
<t><list style="symbols">
<t>The number of ndes in the access is at least one order of
magnitude higher than in any other part of the network.</t>
<t>Because of the large quantity of access nodes, the cost of
these nodes is extremly relevant for the overall costs of the
entire network, i.e. acess nodes are very cost sensitive.</t>
</list>This makes it desirable to design the architecture such that
the AN functionality can be kept as simple as possible. This should
always be kept in mind when evalulating different seamless MPLS
architectures. The goal is to limit both the number of different
protocols needed on the AN as well as the scale to which each protocol
must perform to the absolute minimum.</t>
</section>
</section>
<!-- This PI places the pagebreak correctly (before the section
title) in the text output. -->
<?rfc needLines="8" ?>
<section title="Deployment Scenarios">
<t>This section describes the deployment scenarios based on the use
cases and the generic architecture above.</t>
<section title="Deployment Scenario #1">
<t>Section describing the Seamless MPLS implementation of a large
european ISP.</t>
<section title="Overview">
<t>This deployment scenario describes one way to implement a
seamless MPLS architecture. Specific to this implementation is the
choice of intra- and inter-domain routing and label distribution
protocols, as well as the details of the interworking of these
protocols to achieve the overall scalable hierarchical
architecture.</t>
</section>
<section title="General Network Topology">
<t>There are multiple aggregation domains (in the order of up to
100) connected to the core in a star topology, i.e. aggregation
domains are never connected among themselves, but only to the core.
The core has its own domain.</t>
<figure align="center" anchor="deploy01"
title="Deployment Scenario #1">
<artwork><![CDATA[
+-------+ +-------+ +------+ +------+
| | | | | | | |
+--+ AGN11 +---+ AGN21 +---+ ABR1 +---+ LSR1 +--> to AGN
/ | | /| | | | | |
+----+/ +-------+\/ +-------+ +------+ /+------+
| AN | /\ \/ |
+----+\ +-------+ \+-------+ +------+/\ +------+
\ | | | | | | \| |
+--+ AGN12 +---+ AGN22 +---+ ABR2 +---+ LSR2 +--> to AGN
| | | | | | | |
+-------+ +-------+ +------+ +------+
static route ISIS L1 LDP ISIS L2 LDP
<-Access-><--Aggregation Domain--><---------Core--------->
]]></artwork>
</figure>
<t>As shown in <xref target="deploy01"></xref>, the access nodes
(AN) are connected to the aggregation network via aggregation nodes
called AGN1x, either to a single AGN1x or redundantly to two AGN1x.
Each AGN1x has redundant uplinks to a pair of second-level
aggregation nodes called AGN2x.</t>
<!-- [###dws: do we assume the presence of this link???]
<t>The AGS2 pair is also connected via a direct
link.</t> -->
<t>Each aggregation domain is connected to the core via exactly two
border routers (ABR) on the core side. There can be multiple AGN2
pairs per aggregation domain, but only one ABR pair for each
aggregation domain. Each of the AGN2 in an AGN2 pair connects to one
of the ABRs in the ABR pair responsible for that aggregation
domain.</t>
<!-- [###dws: do we assume the presence of redundant AGS2 to
ABR uplinks???] -->
<t>The ABRs on the core side have redundant connections to a pair of
LSR routers.</t>
<!-- [###dws: do we assume the presence of this link???] -->
<t>The LSR pair is also connected via a direct link.</t>
<t>The core LSR are connected to other core LSR in a partly meshed
topology so that there are disjunct, redundant paths from each LSR
to each other LSR.</t>
</section>
<section title="Hierarchy">
<t>As explained before, hierarchy is the key to a scalable seamless
MPLS architecture. The hierarchy in this implementation is achieved
by forming different MPLS domains for aggregation domains and core,
where within each of these domains a fairly common MPLS deployment
using ISIS as intradomain link-state routing protocol and using LDP
for MPLS label distribution is used.</t>
<t>These MPLS domains are mapped to ISIS areas as follows:
Aggregation domains are mapped to ISIS L1 areas. The core is
configured as ISIS L2. The border routers connecting aggregation and
core are ISIS L1L2 and are referred to as ABRs. From a technical and
operational point of view these ABRs are part of the core, althought
they also belong to the respective aggregation domain purely from a
routing protocol point of view.</t>
<t>For the interdomain-routing BGP with MPLS labels is deployed
(“labled BGP/SAFI4” <xref target="RFC3107"></xref>).</t>
</section>
<section title="Intra-Area Routing">
<section title="Core">
<t>The core uses ISIS L2 to distribute routing information for the
loopback addresses of all core nodes. The border routers (ABR)
that connect to the aggregation domains are also part of the
respective aggregation ISIS L1 area and hence ISIS L1L2.</t>
<t>LDP is used to distribute MPLS label binding information for
the loopback addresses of all core nodes.</t>
</section>
<section title="Aggregation">
<t>The aggregation domains uses ISIS L1 as intra-domain routing
protocol. All AGN loopback addresses are carried in ISIS.</t>
<t>As in the core, the aggregation also uses LDP to distribute
MPLS label bindings for the loopback addresses.</t>
</section>
</section>
<section title="Access">
<t>Access nodes do not have their own domain or IGP area. Instead,
they directly connect to the AGN1 nodes in the aggregation domain.
To keep access devices as simple as possible, ANs do not participate
in ISIS.</t>
<t>Instead, each AN has two static default routes pointing to each
of the AGN1 it is connected to. Appropriate techniques SHOULD be
deployed to make sure that a given default route is invalidated when
the link to an AGN1 or that node itself fails. Examples of such
techniques include monitoring the pysical link state for loss of
light/loss of frame, or using Ethernet link OAM or BFD <xref
target="I-D.ietf-bfd-v4v6-1hop"></xref>.</t>
<t>The AGN1 MUST have a configured static route to the loopback
address of each of the ANs it is connected to, because it cannot
learn the AN loopback address in any other way. These static routes
have to be monitored and invalidated if necessary using the same
techniques as described above for the static default routes on the
AN.</t>
<t>The AGN1 redistributes these routes into ISIS for intra-domain
reachability of all AN loopback addresses.</t>
<t>LDP is used for MPLS label distribution between AGN1 and AN. In
order to keep the AN control plane as lightweight as possible, and
to avoid the necessity for the AN to store 100.000 MPLS label
bindings for each upstream AGN1 peer, LDP is deployed in
downstream-on-demand (DoD) mode, described below.</t>
<t>To allow the label bindings received via LDP DoD to be installed
into the LFIB on the AN without having the specific host route to
the destination loopback address, but only a default route, use of
the LDP Extension for Inter-Area Label Switched Paths <xref
target="RFC5283"> </xref> is made.</t>
<section title="LDP Downstream-on-Demand (DoD)">
<t>LDP downstream-on-demand mode is specified in <xref
target="RFC5036"></xref>. Although it was originally intended to
be used with ATM switch hardware, there is nothing from a protocol
perspective preventing its use in a regular MPLS frame-based
environment. In this mode the upstream LSR will explicitly ask the
downstream LSR for a label binding for a particular FEC when
needed.</t>
<t>The assumption is that a given AN will only have a limited
number of services configured to an even more limited number of
destinations, or egress LER. Instead of learning and storing all
label bindings for all possible loopback addresses within the
entire Seamless MPLS network, the AN will use LDP DoD to only
request the label bindings for the FECs corresponding to the
loopback addresses of those egress nodes to which it has services
configured.</t>
<t>For LDP DoD the AGN1 MUST also ask the AN for label bindings
for specific FECs. FECs are necessary for all pseudowire
destinations at the AN. Most preferable this pseudowire
destination is the LSR-ID of the AN. Depending on the AN
implementation and architecture multiple pseudowire destination
addresses and associated FECs could be needed. The conclusion of
this results to the following requirement:</t>
<t><list style="symbols">
<t>The AGN1 MUST ask the AN for label bindings for all
potential pseudowire destination addresses on the AN. Because
the AGN (at least in many cases) does not take part in the
pseudowire signaling an independent way of receiving the AN
FEC is necessary on the AGN. These potential pseudowire
destinations MUST be known on the AGN1, by configuration or
otherwise. These are typically the loopback addresses of the
AN, to which a static route has been configured anyway on the
AGN1, as explained above. In addition to these static routes,
the AGN1 SHOULD be configured statically to request MPLS label
bindings for these loopback addresses via LDP DoD.</t>
<t>Optionally an automatism that asks for a FEC for the LSR-ID
COULD be implemented. A configuration switch that disables
this option must be implemented. The label is necessary. The
way of initiating the DoD-signaling of the label could be done
with both methods (configuration/automatism).</t>
<t>The AN knows by configuration to which destination a
pseudowire is set up. The AN is always the endpoint of the
pseudowire. Before signalling a pseudowire the AN MUST ask
(via LDP DoD) the AGN for a FEC. Because of this an
independent preconfiguration is not necessary on the AN.</t>
<t>The following are the triggers for ANs to request a
label:</t>
<t><list style="symbols">
<t>When a control session (targeted LDP) to a target has
to be established</t>
<t>When a service label has been received by a control
session (e.g. pseudo wire label)</t>
</list><!-- also label withdraw mechanisms should be specified. How
is a lable withdrawn (protocol issue?). Upstream nodes should be
able to optimize FIB in case a label is not needed any more. --></t>
</list></t>
</section>
</section>
<section title="Inter-Area Routing">
<t>The inter-domain MPLS connectivity from the aggregation domains
to and across the core domain is realized primarily using BGP with
MPLS labels ("labled BGP/SAFI4" <xref target="RFC3107"></xref>). A
very limited amount of route leaking from ISIS L2 into L1 is also
used.</t>
<t>All ABR and PE nodes in the core are part of the labeled iBGP
mesh, which can be either full mesh or based on route reflectors.
These nodes advertise their respective loopback addresses (which are
also carried in ISIS L2) into labeled BGP.</t>
<t>Each ABR node has labeled iBGP sessions with all AGN1 nodes
inside the aggregation domain that they connect to the core. Since
there are two ABR nodes per aggregation domain, this leads to each
AGN1 node having an iBGP sessions with each of the two ABR. Note
that the use of iBGP implies that the entire seamless MPLS
internetwork is just a single AS to which all core and aggregation
nodes belong. The AGN1 nodes advertise their own loopback addresses
into labeled BGP, in addition to these loopbacks also being in ISIS
L1.</t>
<t>Additionally the AGN1 nodes also redistribute all the statically
configured routes to the AN loopback addresses into labeled BGP.
Note that as stated obove, the AGN1 MUST ask the AN for label
bindings for the AN loopback FECs via LDP DoD in order to have a
valid labeled route with a non-null label.</t>
<t>This architecture results in carrying all loopbacks of all nodes
except pure P nodes (AN, AGN, ABR and core PE) in labeled BGP, e.g.
there will be in the order of 100.000 routes in labeled BGP when
approaching the stated scalability goal. Note that this only affects
the BGP RIB size and does not necessarily imply that any node needs
to actually have active forwarding state (LFIB) in the same order of
magnitude. In fact, as will be discussed in the scalability
analysis, no single node needs to install all labeled BGP routes
into the LFIB, but each node only needs a small percentage of the
RIB as active forwarding state in the LFIB. And from a RIB point of
view, BGP is known to scale to hundreds of thousands of routes.</t>
</section>
<section title="Labled iBGP next-hop handling">
<t>The ABR nodes run labeled iBGP both to the core mesh as well as
to the AGN1 nodes of their respective aggregation domains. Therefore
they operate as iBGP route reflectors, reflecting labeled routes
from the aggregation into the core and vice versa.</t>
<t>When reflecting routes from the core into the aggregation domain,
the ABR SHOULD NOT change the BGP NEXT-HOP addresses
(next-hop-unchanged). This is the usual behaviour for iBGP route
reflection. In order to make these routes resolvable to the AGN1
nodes inside the aggregation domain, the ABR MUST leak all other ABR
and core PE loopback addresses from ISIS L2 into ISIS L1 of the
aggregation domain. Note that the number of leaked addresses is
limited so that the overall scalability of the seamless MPLS
architecture is not impacted. In the worst case all core loopback
addresses COULD be leaked into ISIS L1, but even that would not be a
scalability problem.</t>
<t>When reflecting routes from the aggregation into the core, the
ABR MUST set then BGP NEXT-HOP to its own loopback addresses
(next-hop-self). This is not the default behaviour for iBGP route
reflection, but requires special configuration on the ABR. Note that
this also implies that the ABR MUST allocate a new local MPLS label
for each labeled iBGP FEC that it reflects from the aggregation into
the core. This special next-hop handling is essential for the
scalability of the overall seamless MPLS architecture since it
creates the required hierarchy and enables the hiding of all
aggregation and access addresses behind the ABRs from an IGP point
of view. Leaking of aggregation ISIS L1 loopback addresses into ISIS
L2 is not necessary and MUST NOT be allowed.</t>
<t>The resulting hierarchical inter-domain MPLS routing structure is
similar to the one described in <xref target="RFC4364"></xref>
section 10c, only that we use one AS with route reflection instead
of using multiple ASes.</t>
</section>
<section title="Network Availability and Simplicity">
<t>The seamless mpls architecture illustrated in deployment case
study 1 guarantees a sub-second loss of connectivity upon any link
or node failures. Furthermore, in the vast majority of cases, the
loss of connectivity is limited to sub-50msec.</t>
<t>These network availability properties are provided without any
degradation on scale and simplicity. This is a key achievement of
the design.</t>
<t>In the remainder of this section, we first introduce the
different network availability technologies and then review their
applicability for each possible failure scenario.</t>
<section title="IGP Convergence">
<t>IGP convergence can be modelled as a linear process with an
initial delay and a linear FIB update <xref
target="ACM01"></xref>.</t>
<t>The initial delay could conservatively be assumed to be
260msec: 50msec to detect failures with BFD (most failures would
be detected faster with loss of light for example or with faster
BFD timers), 50msec to throttle the LSP generation, 150msec to
throttle the SPF computation (making sure than all the required
LSP’s are received even in case of SRLG failures) and 10msec
for shortest-path-first tree computation.</t>
<t>Assuming 250usec per update (conservative), this allows for
(1000-260)/0.250= 2960 prefixes update within a second following
the outage. More precisely, this allows for 2960 important IGP
prefixes updates. Important prefixes are automatically classified
by the router implementation through simple heuristic (/32 is more
important than non-/32).</t>
<t>The number of IGP important routes (loopbacks) in deployment
case study 1 is much smaller than 2960, and hence sub-second IGP
convergence is conservative.</t>
<t>IGP convergence is a simple technology for the operator
provided that the router vendor optimizes the default IGP behavior
(no need to tune any arcane knob).</t>
</section>
<section title="Per-Prefix LFA FRR">
<t>A per-prefix LFA for a destination D is a precomputed backup
IGP nexthop for that destination. This backup IGP nexthop can be
link protecting or node protecting <xref
target="RFC5286"></xref>.</t>
<t>The analysis of the applicability of Per-Prefix LFA in the
deployment model 1 of Seamless MPLS architecture is
straightforward thanks to <xref
target="I-D.filsfils-rtgwg-lfa-applicability"></xref>.</t>
<t>In deployment model 1, each aggregation network either follows
the triangle or full-mesh topology. Further more, the backbone
region implements a dual-plane. As a consequence, the failure of
any link or node within an aggregation domain is protected by LFA
FRR (sub-50msec) for all impacted IGP prefixes, whether intra-area
or inter-area. No uloop may form as a result of these failures
<xref target="I-D.filsfils-rtgwg-lfa-applicability"></xref>.</t>
<t>Per-Prefix LFA FRR is generally assessed as a simple technology
for the operator <xref
target="I-D.filsfils-rtgwg-lfa-applicability"></xref>. It
certainly is in the context of deployment case study 1 as the
designer enforced triangle and full-mesh topologies in the
aggregation network as well as a dual-plane core network.</t>
</section>
<section title="Hierarchical Dataplane and BGP Prefix Independent Convergence">
<t>In a hierarchical dataplane, the FIB used by the packet
processing engine reflects the recursions between routes. For
example, a BGP route B recursing on IGP route I whose best path is
via interface O is encoded as a FIB entry B pointing to a FIB
entry I pointing to a FIB entry 0.</t>
<t>Hierarchical FIB <xref target="BGPPIC"></xref> extends the
hierarchical dataplane with the concept of a BGP Path-List. A BGP
path-list may be abstracted as a set of primary multipath nhops
and a backup nhop. When the primary set is empty, packets destined
to the BGP destinations are rerouted via the backup nhop.</t>
<t>With hierarchical FIB and hierarchical dataplane, a FIB entry
representing a BGP route points to a FIB entry representing a BGP
Path-List. This entry may either point again to another BGP Path
list entry (BGP over BGP recursion) or more likely points to a FIB
entry representing an IGP route.</t>
<t>A BGP Path-list may be computed automatically by the router and
does not require any operator involvement. Specifically, the
automated computation adapts to any routing policy (this is key to
understand the simplicity of hierarchical FIB and the ability to
enable it as a default router behavior). There is no constraint at
all on the operator design. Any policy is supported (multipath,
primary/backup between neighboring domains or via alternate
domains).</t>
<t>The BGP backup nhop is computed in advance of any failure (ie.
a second bestpath computation after excluding the primary
nhops).</t>
<t>Hierarchical dataplane and hierarchical FIB provide two
important routing availability properties.</t>
<t>First, upon IGP convergence, recursive BGP routes immediately
benefit from the updated IGP paths thanks to the dataplane
indirection. This is key as most of the traffic is destined to BGP
routes, not to IGP routes.</t>
<t>Second, upon loss of the primary BGP nhop, the dataplane can
immediately reroute the packets towards the pre-computed backup
nhop. This redirection is said to be prefix independent as the
only entries that need to be modified are the BGP path-lists.
These entries are shared across all the BGP prefixes with the same
primary and backup next-hops. This scale independence is key. In
the context of deployment model 1, while there might be 100k BGP
routes, we only expect on the order of 200 BGP path-lists.
Assuming 10usec in-place modification per BGP path-list, we see
that the router can enable the backup path for 100k BGP
destinations in less than 2msec (less than 200 * 10usec).</t>
<t>The detection of the loss of the primary BGP nhop (and hence
the need to enable the pre-computed backup BGP nhop) can be local
(a local link failing between an edge device and a single-hop eBGP
peer) or involves an IGP convergence (a remote border router goes
down).</t>
<t>These hierarchical FIB properties benefit to any BGP routes:
Internet, L3VPN, 3107, IPv4 or IPv6. Future evolution of VPLS will
also benefit from such properties <xref
target="I-D.raggarwa-mac-vpn"></xref><xref
target="I-D.sajassi-l2vpn-rvpls-bgp">,</xref></t>
<t>Hierarchical forwarding and hierarchical FIB are very simple
technology to operate. Their ability to adapt to any topology, any
routing policy and any BGP address family allows router vendors to
enable this behavior by default.</t>
</section>
<section title="Local Protection using Anycast BGP">
<t></t>
<section title="Anycast BGP applied to ABR node failure">
<t>In this section we described a mechanism that provides local
protection for area border router (ABR) failures. To illustrate
this mechanism consider an example shown in <xref
target="RoutingAndTrafficFlow"></xref>.<figure
anchor="RoutingAndTrafficFlow"
title="Routing and Traffic Flow">
<artwork align="center"><![CDATA[ +-------+
| |
vl0+ ABR 1 |
/| |
+----------+ +-------+ / +-------+
| | | |/
| PE / LER +-..-+ PLR |
| | | |\
+----------+ +-------+ \ +-------+
\| |
vl0+ ABR 2 |
| |
+-------+
+-------+ +-------+ +-------+
| LDP-L +-----+ LDP-L +-----+ LDP-L |
+-------+ +-------+ +-------+
| BGP-L +-------------------+ BGP-L |
+-------+ +-------+
--------------- traffic ---------------->
<----- routing + label distribution -----
]]></artwork>
</figure></t>
<t>The core router adjacent to ABR1 and ABR2 acts as a point of
local repair (PLR). When the PLR detects ABR1 failure, the PLR
re-routes to ABR2 the traffic that the PLR used to forward to
ABR1, with ABR2 providing the subsequent forwarding for this
traffic. To accomplish this ABR1, ABR2, and the PLR employ the
following procedures.</t>
<t>ABR1, in addition to its own loopback, is provisioned with
another IP address (vl0). This IP address is used to identify
the forwarding state/context on ABR1 that is the subject to the
local protection mechanism outlined in this section. We refer to
this IP address, vl0, as the "context identifier". ABR1
advertises its context identifier in ISIS and LDP. As ABR1
re-advertises to its core peers the BGP routes it receives from
its peers in the aggregation domain(s), ABR1 sets the BGP Next
Hop on these routes to its context identifier (this creates an
association between the forwarding state/context created by
these routes and the context identifier).</t>
<t>ABR2, acting as a protector for ABR1, is configured with the
ABR1's context identifier. ABR2 advertises this context
identifier into LDP and ISIS. The LDP advertisement is done with
no PHP and a non-null label, and the ISIS advertisement is done
with a very high metric. As a result, the PLR would have an LFA
route/LSP to this context identifier with ABR2 as the next hop.
When the PLR detects ABR1's failure, the LFA procedures on the
PLR would result in sending to ABR2 the traffic that the PLR
used to forward to ABR1. Moreover, since ABR2 advertises into
LDP a non-null label for the ABR1's context identifier, this
label would enable ABR2 to identify such traffic (as we'll see
further down the ability to identify such traffic is essential
in order for ABR2 to correctly forward this traffic).</t>
<t><figure align="center" anchor="ABRFailureScenarios"
title="ABR Failure Scenarios">
<artwork><![CDATA[ +-----------------+-----------+-----------+
| FEC 10.0.1.1/32 | Label 200 | NH AGN2-1 |
+-----------------+-----------+-----------+
| FEC 10.0.1.2/32 | Label 233 | NH AGN2-1 | ABR1
+-----------------+-----------+-----------+
| FEC 10.0.1.3/32 | Label 313 | NH AGN2-1 |
+-----------------+-----------+-----------+
+------+ +-------+
| | | | +------------------+
vl0+ ABR1 +----+ AGN21 +----+ AGN11:10.0.1.1/32|
/| | | |\ /+------------------+
/ +------+\ /+-------+ \/
+----+ +-----+/ \/ \ /\ +------------------+
| PE +---+ PLR | /\ X X+ AGN12:10.0.1.2/32|
+----+ +-----+\ / \ / \/ +------------------+
\ +------+ +-------+ /\
\| | | |/ \+------------------+
vl0+ ABR2 +----+ AGN22 +----+ AGN13:10.0.1.3/32|
| | | | +------------------+
+------+ +-------+
+----------------------------------------+
| native forwarding context |
+-----------------+-----------+----------+
| FEC 10.0.1.1/32 | Label 100 | NH AGN21 |
+-----------------+-----------+----------+
| FEC 10.0.1.2/32 | Label 107 | NH AGN21 | ABR2
+-----------------+-----------+----------+
| FEC 10.0.1.3/32 | Label 152 | NH AGN21 |
+-----------------+-----------+----------+
| | |
V V V
+----------------------------------------+
| backup forwarding context |
+-----------------+-----------+----------+
| FEC 10.0.1.1/32 | Label 200 | NH AGN21 |
+-----------------+-----------+----------+
| FEC 10.0.1.2/32 | Label 233 | NH AGN21 | ABR2
+-----------------+-----------+----------+
| FEC 10.0.1.3/32 | Label 313 | NH AGN21 |
+-----------------+-----------+----------+
(ABR2 acting as backup for ABR1)
]]></artwork>
</figure></t>
<t>ABR2, acting as a protector for the forwarding context of
ABR1, has to have the <FEC->label> mapping for the FECs
present in that forwarding context, and should use this mapping
to create the forwarding state it would use when forwarding the
traffic received from the PLR. <xref
target="ABRFailureScenarios"> </xref> shows the
<FEC->label> mapping on ABR1 and ABR2. Note that the
backup forwarding context on ABR2 is a mirror image of the
forwarding context on ABR1. This backup forwarding context is
populated using the routes that have been re-advertised by ABR1
to its core peers (as ABR2 is a BGP core peer of ABR1). The
label that ABR2 advertises into LDP for ABR1's context
identifier points to the backup context. This way, ABR2 forwards
all the traffic received with this label using not its native
forwarding context, but the backup forwarding context.</t>
<t>Note that whether the PLR could rely on the basic LFA to
re-route to ABR2 the traffic that the PLR used to forward to
ABR1 depends on the LFA coverage. Since the basic LFA does not
guarantee 100% coverage in all topologies, relying on basic LFA
may not be sufficient, in which case the basic LFA would need to
be augmented to provide 100% coverage.</t>
<t>The procedures outlined above provide local protection upon
ABR node failure. By virtue of being local protection, the
actions required to restore connectivity upon the failure
detection are fully localized to the router closest to the
failure - the router directly connected to the failed ABR. This
enables to deliver under 50msec connectivity recovery time in
the presence of ABR failure. These actions do not depend on
propagating failure information in ISIS, thus providing
connectivity recovery time that is independent of the ISIS
routing convergence time. In contrast, a combination of
hierarchical FIB organization and ISIS routing convergence,
being a global protection mechanism, does rely on the ISIS
routing convergence time, as the prefix-independent switch-over
on the pre-computed backup next hop occurs upon IGP convergence
(deletion of the IGP route to the remote ABR), and thus would
have several 100s msec connectivity recovery time.</t>
</section>
<section title="Extensions to support ABR's connected to different aggregation regions">
<t>Note that for the purpose of identifying the forwarding
context ABR1's forwarding state could be partitioned, with each
partition being assigned its own IP address (its own context
identifier). ABR1 would advertise all these identifiers into
ISIS and LDP. This may be useful in the scenario where ABR1 is
connected to more than one aggregation domain (more than one L1
area), in which case each context identifier would identify the
ABR1's forwarding state associated with a single aggregation
domain.</t>
<t>One could further refine the above scheme by implementing
protector functionality that would allow a single protector to
protect multiple forwarding contexts, with each forwarding
context being associated with all the forwarding state
maintained by a given (protected) ABR. Such functionality could
be implemented either on a separate router, or could be
co-located with an existing ABR. Details of this are outside the
scope of this document.</t>
</section>
<section title="Anycast BGP applied to a L3VPN PE">
<t>BGP Anycast is also used to protect against L3VPN PE
failures. In general a given VPN site can be multi-homed
(connected to several L3VPN PEs). Moreover, multi-homed sites
may be non-congruent with each other - different multi-homed
sites connected to a given PE may have their other connection(s)
to different other PEs. BGP Anycast scheme, utilizing the
construct of Protector PE, provides forwarding context
protection for multiple egress PEs in the presence of
non-congruent multi-homed sites.</t>
<t>Protector PE function is enhanced from the basic BGP Anycast
1:1 mirroring procedures described for ABR protection, by
supporting multiple backup forwarding contexts, one per
protected egress PE. Each backup forwarding context on the
Protector PE is identified by the context identifier of the
associated protected egress PE.</t>
<t>Protector PE advertises these context identifiers into IGP
with a large metric and into LDP with no PHP and a non-null
label. This results in PLR of each egress PE having an LFA
route/LSP (or bypass LSP if no native LFA coverage for specific
topology) to the associated context identifier with Protector PE
as the next hop. Protector PE creates a backup forwarding
context per protected egress PE based on BGP advertisements from
this egress PE and other egress PEs with the same multi-homed
customer networks.</t>
<t>Similarly to the ABR case described earlier, in case of
specific protected egress PE failure, PLR will follow standard
LFA procedure (or local protection to bypass LSP) and forward
affected flows to Protector PE. Those flows will arrive to
Protector PE on the LSP associated with the context identifier
for the failed egress PE, the backup forwarding context will be
identified by this LSP, and flows will be switched to
alternative egress PE(s).</t>
</section>
</section>
<section title="Assessing loss of connectivity upon any failure">
<t><list style="empty">
<t>We select two typical traffic flows and analyze the loss of
connectivity (LoC) upon each possible failure.</t>
<t>Flow F1 starts from an AN1 in a left aggregation region and
ends on an AN2 in a right aggregation region. Each AN is
dual-homed to two AGN’s.</t>
<t>Flow F2 starts from an L3VPN PE1 in the core and ends at an
L3VPN PE2 in the core.</t>
</list></t>
<t>Note that due to the symmetric network topology in case study
1, uni-directional flows F1' and F2', associated with F1 and F2
and forwarded in the reversed direction (AN2 to AN1 right-to-left
and PE2 to PE1, respectively), take advantage of the same failure
restoration mechanisms as F1 and F2. .</t>
<section title="AN1-AGN link failure or AGN node failure ">
<t>F1 is impacted but LoC <50msec is possible assuming fast
BFD detection and fast-switchover implementation on the AN. F2
is not impacted.</t>
</section>
<section title="Link or node failure within the left aggregation region">
<t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
will occur during the IGP convergence following the LFA
protection. Note: if LFA is not available (other topology then
case study one) or if LFA is not enabled, then the LoC would be
< second as the number of impacted important IGP route in a
seamless architecture is much smaller than 2960.</t>
<t>F2 is not impacted.</t>
</section>
<section title="ABR node failure between left region and the core">
<t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
will occur during the IGP convergence following the LFA
protection.</t>
<t>Note: This case is also called “Local ABR
failure” as the ABR which fails is the one connected to
the aggregation region at the source of flow F1.</t>
<t>Note: remember that the left region receives the routes to
all the remote ABR’s and that the labelled BGP routes are
reflected from the core to the left region with next-hop
unchanged. This ensures that the loss of the (local) ABR between
the left region and the core is seen as an IGP route impact and
hence can be addressed by LFA.</t>
<t>Note: if LFA is not available (other topology then case study
one) or if LFA is not enabled, then the LoC would be < second
as the number of impacted important IGP route in a seamless
architecture is much smaller than 2960.</t>
<t>F2 is not impacted.</t>
</section>
<section title="Link or node failure within the core region">
<t>F1 and F2 are impacted but LoC <50msec thanks to LFA
FRR.</t>
<t>This is specific to the particular core topology used in
deployment case study 1. The core topology has been optimized
<xref target="I-D.filsfils-rtgwg-lfa-applicability"></xref> for
LFA applicability.</t>
<t>As explained in <xref
target="I-D.filsfils-rtgwg-lfa-applicability"></xref>, another
alternative to provide <50msec in this case consists in using
an MPLS-TE full-mesh and MPLS-TE FRR. This is required when the
designer is not able or does not want to optimize the topology
for LFA applicability and he wants to achieve <50msec
protection.</t>
<t>Alternatively, simple IGP convergence would ensure a LoC <
second as the number of impacted important IGP route in a
seamless architecture is much smaller than 2960.</t>
</section>
<section title="PE2 failure">
<t>F1 is not impacted.</t>
<t>F2 is impacted and the LoC is sub-300msec thanks to IGP
convergence and hierarchical FIB.</t>
<t>The detection of the primary nhop failure (PE2 down) is
performed by a single-area IGP convergence.</t>
<t>In this specific case, the convergence should be much faster
than <sec as very few prefixes are impacted upon an edge node
failure. Reusing the introduction on IGP convergence presented
in an earlier section and assuming 2 important impacted prefixes
(two loopbacks per edge node), one would expect that PE2’s
failure is detected in 260msec + 2*0.250msec.</t>
<t>In a hierarchical FIB organization on the ingress PE, once
the loss of an egress PE is detected, all the impacted BGP
Path-Lists associated with that egress PE need to be updated,
and the impacted traffic gets re-routed to the pre-computed
backup PEs. The time it takes to complete this operation is not
constant, but is proportional to the number of unique BGP
Path-Lists affected by the egress PE failure. Number of such
affected BGP Path-Lists is equal to the number of
"non-congruent" multi-homed sites connected to the egress PE,
where the number of non-congruent sites is defined as the number
of other PEs that these sites are connected to (note that in
defining the term "non-congruent" we refer to sites, rather than
to CEs, as a given multi-homed site can use multiple CEs).
Furthermore, per CE BGP policies (e.g. single-path vs.
multi-path) may further increase number of BGP Path-Lists
involved.</t>
<t>The LoC for BGP/BPN traffic upon PE2 failure is thus expected
to be <300msec.</t>
<t>Provided that all the deployment considerations have been
met, LoC is sub-50msec with BGP Anycast.</t>
</section>
<section title="PE2’s PE-CE link failure">
<t>F1 is not impacted.</t>
<t>F2 is impacted and the LoC is sub-50msec thanks to local
interface failure detection and local forwarding to the backup
PE. Forwarding to the backup PE is achieved with hierarchical
FIB or local-repair of BGP egress link providing fast re-route
to the backup BGP nhop PE.</t>
</section>
<section title="ABR node failure between right region and the core">
<t>F2 is not impacted.</t>
<t>F1 is impacted. We analyze the LoC for F1 for both
hierarchical FIB and BGP anycast.</t>
<t>LoC is sub-600msec thanks to hierarchical FIB.</t>
<t>The detection of the primary nhop failure (ABR down) is
performed by a multi-area IGP convergence.</t>
<t>First, the two (local) ABR’s between the left and core
regions must complete the core IGP convergence. The analysis is
similar to the loss of PE2. We would thus expect that the core
convergence completes in ~260msec.</t>
<t>Second, the IGP convergence in the left region will cause all
AGN1 routers to detect the loss of the remote ABR. This second
IGP convergence is very similar to the first one (2 important
prefixes to remove) and hence should also complete in
~260msec.</t>
<t>Once an AGN1 has detected the loss of the remote ABR, thanks
to hierarchical FIB organization, in-place modification of
shared BGP path-list and pre-computation of BGP backup nhop, the
AGN1 reroutes flow F1 via the alternate remote ABR in a few
msec’s.</t>
<t>As a consequence, the LoC for F1 upon remote ABR failure is
thus expected to be <600msec.</t>
<t>Provided that all the deployment considerations have been
met, LoC is sub-50msec with BGP Anycast.</t>
</section>
<section title="Link or node failure within the right aggregation region">
<t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
will occur during the IGP convergence following the LFA
protection.</t>
<t>Note: if LFA is not available (other topology then case study
one) or if LFA is not enabled, then the LoC would be < second
as the number of impacted important IGP route in a seamless
architecture is much smaller than 2960.</t>
<t>F2 is not impacted.</t>
</section>
<section title="AGN (connected to AN2) node failure">
<t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
will occur during the IGP convergence following the LFA
protection.</t>
<t>Note: remember that AGN redistributes the static routes to
ANs within ISIS. The loss of an AGN on the IGP path to AN2 is
thus seen as an IGP route impact and hence LFA FRR is
applicable.</t>
<t>Note: if LFA is not available (other topology then case study
one) or if LFA is not enabled, then the LoC would be < second
as the number of impacted important IGP route in a seamless
architecture is much smaller than 2960.</t>
<t>F2 is not impacted.</t>
</section>
<section title="AGN-AN2 link failure">
<t>F2 is not impacted.</t>
<t>F1 is impacted.</t>
<t>LoC is sub-300msec with IGP convergence as only one prefix
needs to be updated.</t>
<t>Sub-50msec could be guaranteed provided that the LFA
implementation supports a redistributed static as a native IGP
route.</t>
</section>
<section title="AN2 failure">
<t>F1 is impacted and the LoC lasts until the AN is
recovered.</t>
<t>F2 is not impacted.</t>
</section>
<section title="Summary - Loss of connectivity upon any failure">
<t>The Seamless MPLS architecture illustrated in deployment case
study 1 guarantees sub-50msec upon any link or node
failures.</t>
</section>
</section>
<section title="Network Resiliency and Simplicity">
<t>A fundamental aspect of the Seamless MPLS architecture is the
requirement for operational simplicity.</t>
<t>In a network with 10k of IGP/BGP nodes and 100k of MPLS-enabled
nodes, it is extremely important to provide a simple operational
process.</t>
<t>LFA FRR plays a key role in providing simplicity as it is an
automated behavior which does not require any configuration or
interoperability testing.</t>
<t>More specifically, <xref
target="I-D.filsfils-rtgwg-lfa-applicability"></xref> plays a key
role in the Seamless MPLS architecture as it describes simple
design guidelines which determiniscally ensure LFA coverage for
any link and node in the aggregation regions of the network. This
is key as it provides for a simple <50msec protection for the
vast majority of the node and link failures (>90% of the
IGP/BGP3107 footprint at least).</t>
<t>If the guidelines cannot be met, then either the designer will
rely on (1) augmenting native LFA coverage with RSVP, or (2) a
full-mesh TE FRR model, or (3) IGP convergence. The first option
provides the same sub-50msec protection as LFA, but introduces
additional RSVP LSPs. The second option optimizes for sub-50msec
protection, but implies a more complex operational model. The
third option optimizes for simple operation but only provides
<sec protection. Up to each designer to arbitrate between these
three options versus the possibility to engineer the topology for
native LFA protection.</t>
<t>A similar choice involves the protection against ABR node
failure and L3VPN PE node failure. The designer can either use
hierarchical FIB or Anycast BGP. Up to each designer to asssess
the trade-off between the valuation of sub-50msec instead of
sub-1sec versus additional operational considerations related to
Anycast BGP.</t>
</section>
<section title="Conclusion">
<t>The Seamless MPLS architecture illustrated in deployment case
study 1 guarantees sub-50msec for majority of link and node
failures by using LFA FRR, except ABR and L3PE node failures, and
PE-CE link failure.</t>
<t>L3VPN PE-CE link failure can be protected with sub-50msec
restoration, by using hierarchical FIB or local-repair
fast-reroute to the backup BGP nhop PE.</t>
<t>ABR and L3PE node failure can be protected with sub-50msec
restoration, by using BGP Anycast.</t>
<t>Alternatively, ABR and L3PE node failure can be protected with
sub-1sec restoration, by using hierarchical-FIB.</t>
</section>
</section>
<section title="Next-Hop Redundancy">
<t>An aggregation domain is connected to the core network using two
redundant area boarder routers, and MPLS hierarchy is applied on
these ABRs. MPLS hierarchy helps scale the FIB but introduces
additional complexity for the rerouting in case of ABR failure.
Indeed ABR failure requires a BGP converge to update the inner MPLS
hierarchy, in addition to the IGP converge to update the outer MPLS
hierarchy. This is also expected to take more time as BGP
convergence is performed after the IGP convergence and because the
number of prefixes to update in the FIB can be significant. This is
a drawback but the architecture allow for two "local" solutions
which restore the traffic before the BGP convergence takes
place.</t>
<t>One called hierarchical FIB edge, would be required on all edge
LSR involved in the inner (BGP) MPLS hierarchy. Namely all routers
except the AN which are not involved in the inner MPLS hierarchy. It
involves pre-computing and pre-installing in the FIB the BGP backup
path. Such back up path are activated when the IGP advertise the
failure of the primary path.</t>
<t>One called egress fast reroute, would be required on the egress
LSR involved in the inner (BGP) MPLS hierarchy, namely TN and AGN
connected to ABR. It involves:</t>
<t><list>
<t>using a anycast loopback address shared by both nominal and
back up ABR, advertised by both ABR in the IGP and advertised as
BGP Next Hop by the nominal ABR;</t>
<t>activating IP FRR LFA on the (penultimate) hops, acting as
PLR for the anycast loopback;</t>
<t>using on the backup egress nodes (ABR2) an additional
contextual MPLS FIB populated by the labels upstream allocated
by the nominal egress node (ABR1).</t>
</list>Details can be found in <xref target="PEFRR"></xref> and
<xref target="ABRFRR"></xref>, and in the appendix of this draft.
Both solutions have their pro and con, and the choice is left to
each Service Provider or deployment based on the different
requirements. The point is that the seamless MPLS architecture can
handles fast restoration time, even for ABR failures.</t>
</section>
</section>
<!-- End of Deployment Scenario #1 -->
<section title="Scalability Analysis">
<section title="Control and Data Plane State for Deployment Scenario #1">
<section title="Introduction">
<t>Let's call:</t>
<t><list style="symbols">
<t>#AN the number of Access Node (AN) in the seamless MPLS
domain</t>
<t>#AGN the number of AGgregation Node (AGN) in the seamless
MPLS domain</t>
<t>#Core the number of Core (Core) in the core network</t>
<t>#Area the number of aggregation routing domains.</t>
</list>Let's take the following assumptions:</t>
<t><list style="symbols">
<t>Aggregation equipments are equally spread across
aggregation routing domains</t>
<t>the number of IGP links is three times the number of IGP
nodes</t>
<t>the number of IGP prefixes is five times the number of IGP
nodes (links prefixes + 2 loopbacks)</t>
<t>Access Nodes need to set up 1000 (1k) LSPs. 10% (100) are
FEC which are outside of their routing domain. Those 100
remote FEC are the same for all Access Nodes of a given
AGN.</t>
</list>The following sections roughly evaluate the scalability,
both in absolute numbers and relatively with the number of Access
Node which is the biggest scalability factor.</t>
</section>
<section title="Core Domain">
<t>The IGP & LDP core domain are not affected by the number of
access nodes:</t>
<t hangText=""><list style="hanging">
<t hangText="IGP:"><list>
<t>node : #Core ~ o(1)</t>
<t>links : 3*#Core ~ o(1)</t>
<t>IP prefixes : 5*#Core ~ o(1)</t>
</list></t>
<t hangText="LDP FEC:"><list>
<t>#Core ~ o(1)</t>
</list></t>
</list>Core TN FIBs grows linearly with the number of node in
the core domain. In other word, they are not affected by AGN and
AN nodes:</t>
<t><list style="hanging">
<t hangText="Core TN:"><list>
<t>IP FIB : 5*#Core ~ o(1)</t>
<t>MPLS LFIB : #Core ~ o(1)</t>
</list></t>
</list>BGP carries all AN routes which is significant. However,
all AN routes are only needed in the control plane, possibly in a
dedicated BGP Route Reflector (just like for BGP/MPLS VPNs) and
not in the forwarding plane. The number of routes (100k) is
smaller than the number of number of routes in the Internet (300k
and rising) or in major VPN SP (>500k and rising) so the target
can be handled with current implementations. In addition, AN
routes are internal routes whose churn and instability is smaller
and more under control than external routes.</t>
<t><list style="hanging">
<t hangText="BGP Route Reflector (RR)"><list>
<t>NLRI : #AN ~ o(n)</t>
<t>path : 2*#AN ~ o(2n)</t>
</list></t>
</list>ABR handles both the core and aggregations routes. They
do not depend on the total number of AN nodes, but only on the
number of AN in their aggregation domain.</t>
<t hangText="Core TN:"><list style="hanging">
<t hangText="ABR:"><list>
<t>IP FIB : 5*#Core + (5*#AGN + #AN) / #Area ~ o(#AN
/#Area)</t>
<t>MPLS LFIB : #Core + (#AGN + #AN) / #Area ~ o(#AN /
#Area)</t>
</list></t>
</list></t>
</section>
<section title="Aggregation Domain">
<t>In the aggregation domain, IGP & LDP are not affected by
the number of access nodes outside of their domain. They are not
affected by the total number of AN nodes:</t>
<t><list style="hanging">
<t hangText="IGP:"><list>
<t>node : #AGN / #Area ~ o(1)</t>
<t>links : 3*#AGN / #Area ~ o(1)</t>
<t>IP prefixes : #Core + #Area + (5*#AGN + #AN) / #Area ~
o(#AN *5/ #Area)</t>
<t><list style="symbols">
<t>+ 1 loopback per core node + one aggregate per area
+ 5 prefixes per AGN in the area + 1 prefix per AN in
the area.</t>
</list></t>
</list></t>
</list><list style="hanging">
<t hangText="LDP FEC:"><list>
<t>Core + (#AGN + #AN) / #Area ~ o(#AN / #Area)</t>
<t><list style="symbols">
<t>+ 1 loopback per core node + 1 loopback per AGN
& AN node in the area.</t>
</list></t>
</list></t>
</list>AGN FIBs grows with the number of node in the core area,
in their aggregation area, plus the number of inter domain LSP
required by the AN attached to them. They do not depend on the
total number of AN nodes. In the BGP control plane, AGN also needs
to handle all the AN routes.</t>
<t><list style="hanging">
<t hangText="AGN:"><list>
<t>IP FIB : #Core + #Area + (5*#AGN + #AN) / #Area ~ o(#AN
*5/ #Area)</t>
<t>MPLS LFIB : #Core + (#AGN + #AN) / #Area + 100 ~ o(#AN
/ #Area)</t>
</list></t>
</list>AN FIBs grows with its connectivity requirement. They do
not depend on the number of AN, AGN, SN or any others nodes.</t>
<t><list style="hanging">
<t hangText="AN:"><list>
<t>IP RIB : 1 ~ o(1)</t>
<t>MPLS LIB : 1k ~ o(1)</t>
<t>IP FIB : 1 ~ o(1)</t>
<t>MPLS LFIB : 1k ~ o(1)</t>
</list></t>
</list></t>
</section>
<section title="Summary">
<t>AN requirements are kept minimal. BGP is not required and the
size of their FIB is limited to their own connectivity
requirements.</t>
<t>In the core area, IGP and LDP are not affected by the node in
the aggregation domains. In particular they do not grow with the
number of AGN or AN.</t>
<t>In the aggregation areas, IGP and LDP are affected by the
number of core nodes and the number of AGN and AN in their area.
They are not affected by the total number of AGN or AN in the
seamless MPLS domain.</t>
<t>No FIB of any node is required to handle the total number of
AGN or AN in the seamless MPLS domain. In other word, the number
of AGN and AN in the seamless MPLS domain is not limited, if the
number of areas can grow accordingly. The main limitation is the
MPLS connectivity requirements on the AN, i.e. mainly the number
of LSP needed on the AN. Another limitation may be the number of
different LSP needed by AN attached or behind an AGN. However,
given foreseen deployments and current AGN capabilities, this is
not expected to be a limitation.</t>
<t>In the control plane, BGP will typically handle all AN routes.
This is significant but target deployments are well under current
equipments capacities. In addition, if required, additional
techniques could be used to improve this scalability, based on the
experience gained with scaling BGP/MPLS VPN (e.g. route
partitioning between RR planes, route filtering (static or dynamic
with ORF or route refresh) between AN and on AGN to improve AGN
scalability.</t>
</section>
<section title="Numerical application for use case #1">
<t>As a recap, targets for deployment scenario 1 are:</t>
<t><list style="symbols">
<t>Number of Aggregation Domains 100</t>
<t>Number of Backbone Nodes 1.000</t>
<t>Number of AGgregation Nodes 10.000</t>
<t>Number of Access Nodes 100.000</t>
</list>This gives the following scaling numbers for each
category of nodes:</t>
<t><list style="symbols">
<t>AN IP FIB 1</t>
<t>AN MPLS LFIB 1 000</t>
<t>AGN IP FIB 2 600</t>
<t>AGN MPLS LFIB 2 200</t>
<t>ABR IP FIB 7 600</t>
<t>ABR MPLS LFIB 2 100</t>
<t>TN IP FIB 5 000</t>
<t>TN MPLS LFIB 1 000</t>
<t>RR BGP NLRI 100 000</t>
<t>RR BGP paths 200 000</t>
</list></t>
</section>
<section title="Numerical application for use case #2">
<t>As a recap, targets for deployment scenario 1 are:</t>
<t><list style="symbols">
<t>Number of Aggregation Domains 30</t>
<t>Number of Backbone Nodes 150</t>
<t>Number of AGgregation Nodes 1.500</t>
<t>Number of Access Nodes 40.000</t>
</list>This gives the following scaling numbers for each
category of nodes:</t>
<t><list style="symbols">
<t>AN IP FIB 1</t>
<t>AN MPLS LFIB 1 000</t>
<t>AGN IP FIB 1 700</t>
<t>AGN MPLS LFIB 1 800</t>
<t>ABR IP FIB 3 700</t>
<t>ABR MPLS LFIB 1 600</t>
<t>TN IP FIB 750</t>
<t>TN MPLS LFIB 150</t>
<t>RR BGP NLRI 40 000</t>
<t>RR BGP paths 80 000</t>
</list></t>
</section>
</section>
</section>
</section>
<section anchor="Acknowledgements" title="Acknowledgements">
<t>Many people contributed to this document. The authors would like to
thank Wim Henderickx, Clarence Filsfils, Thomas Beckhaus, Wilfried Maas,
Roger Wenner, Kireeti Kompella, Yakov Rekhter, Mark Tinka and Simon
DeLord for their suggestions and review.</t>
</section>
<!-- Possibly a 'Contributors' section ... -->
<section anchor="IANA" title="IANA Considerations">
<t>This memo includes no request to IANA.</t>
<t>All drafts are required to have an IANA considerations section (see
<xref target="I-D.narten-iana-considerations-rfc2434bis">the update of
RFC 2434</xref> for a guide). If the draft does not require IANA to do
anything, the section contains an explicit statement that this is the
case (as above). If there are no requirements for IANA, the section will
be removed during conversion into an RFC by the RFC Editor.</t>
</section>
<section anchor="security" title="Security Considerations">
<t>The Seamless MPLS Architecture is subject to similar security threats
as any MPLS LDP deployment. It is recommended that baseline security
measures are considered as described in the LDP specification <xref
target="RFC5036">RFC5036</xref> including ensuring authenticity and
integrity of LDP messages, as well as protection against spoofing and
Denial of Service attacks. Some deployments may require increased
measures of network security if a subset of Access Nodes are placed in
locations with lower levels of physical security e.g. street cabinets (
common practice for VDSL access ). In such cases it is the
responsibility of the system designer to take into account the physical
security measures ( environmental design, mechanical or electronic
access control, intrusion detection ), as well as monitoring and
auditing measures (configuration and Operating System changes, reloads,
routes advertisements ). But even with all this in mind, the designer
still should consider network security risks and adequate measures
arising from the lower level of physical security of those
locations.</t>
<section title="Access Network Security">
<t>A detailed description for Access Network Security in Seamless MPLS
can be found in the LDP Downstream on Demand document <xref
target="I-D.ietf-mpls-ldp-dod"></xref>. </t>
</section>
<section title="Data Plane Security">
<t>Data plane security risks applicable to the access MPLS network are
listed below (a non-exhaustive list):</t>
<t><list style="letters">
<t>packets from a specific access node flow to an altered
transport layer or service layer destination.</t>
<t>packets belonging to undefined services flow to and from the
access network.</t>
<t>unlabelled packets destined to remote network nodes.</t>
</list>Following mechanisms should be considered to address listed
data plane security risks:</t>
<t><list style="numbers">
<t>addressing (a) - Access and ABR LSRs SHOULD NOT accept labeled
packets over a particular data link, unless from the Access or ABR
LSR perspective this data link is known to attach to a trusted
system based on employed authentication mechanism(s), and the top
label has been distributed to the upstream neighbour by the
receiving Access or ABR LSR.</t>
<t>addressing (a) – ABR LSR MAY restrict network
reachability for access devices to a subset of remote network LSR,
based on authentication or other network security technologies
employed towards Access LSRs. Restricted reachability can be
enforced on the ABR LSR using local routing policies, and can be
distributed towards the core MPLS network using routing policies
associated with access MPLS FECs.</t>
<t>addressing (b) - labeled service routes (e.g. MPLS/VPN, tLDP)
are not accepted from unreliable routing peers. Detection of
unreliable routing peers is achieved by engaging routing protocol
detection and alarm mechanisms, and is out of scope of this
document.</t>
<t>addressing (a) and (b) - no successful attacks have been
mounted on the control plane and has been detected.</t>
<t>addressing (c) - ABR LSR MAY restrict IP network reachability
to and from the access LSR.</t>
</list></t>
</section>
<section title="Control Plane Security">
<t>Similarly to Inter-AS MPLS/VPN deployments <xref
target="RFC4364">RFC4364</xref>, the data plane security depends on
the security of the control plane. To ensure control plane security
access LDP DoD connections MUST only be made with LDP peers that are
considered trusted from the local LSR perspective, meaning they are
reachable over a data link that is known to attach to a trusted system
based on employed authentication mechanism(s) on the local LSR. The
TCP/IP MD5 authentication option <xref target="RFC5925">RFC5925</xref>
should be used with LDP as described in LDP specification <xref
target="RFC5036">RFC5036</xref>. If TCP/IP MD5 authentication is
considered not secure enough, the designer may consider using a more
elaborate and advanced TCP Authentication Option (TCP-AO <xref
target="RFC5925">RFC5925</xref>) for LDP session authentication.
Access IGP (if used) and any routing protocols used in access network
for signalling service routes SHOULD also be secured in a similar
manner. For increased level of authentication in the control plane
security for a subset of access locations with lower physical
security, designer could also consider using:</t>
<t><list style="symbols">
<t>different crypto keys for use in authentication procedures for
these locations.</t>
<t>stricter network protection mechanisms including DoS
protection, interface and session flap dampening.</t>
</list></t>
</section>
</section>
</middle>
<!-- *****BACK MATTER ***** -->
<back>
<!-- References split into informative and normative -->
<!-- There are 2 ways to insert reference entries from the citation libraries:
1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
(for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")
Both are cited textually in the same manner: by using xref elements.
If you use the PI option, xml2rfc will, by default, try to find included files in the same
directory as the including file. You can also define the XML_LIBRARY environment variable
with a value containing a set of directories to search. These can be either in the local
filing system or remote ones accessed by http (http://domain/dir/... ).-->
<references title="Normative References">
<!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->
&RFC2119;
</references>
<references title="Informative References">
<!-- Here we use entities that we defined at the beginning. -->
&RFC2629;
&RFC3107;
&RFC3031;
&RFC3209;
&RFC3353;
&RFC3552;
&RFC4090;
&RFC4364;
&RFC5036;
&RFC5283;
&RFC5286;
&RFC5332;
&RFC5925;
&I-D.kothari-henderickx-l2vpn-vpls-multihoming;
&I-D.narten-iana-considerations-rfc2434bis;
&I-D.ietf-bfd-v4v6-1hop;
&I-D.ietf-mpls-ldp-dod;
&I-D.filsfils-rtgwg-lfa-applicability;
&I-D.sajassi-l2vpn-rvpls-bgp;
&I-D.raggarwa-mac-vpn;
<reference anchor="PEFRR">
<front>
<title>Fast Reroute in MPLS L3VPN Networks - Towards CE-to-CE
Protection, MPLS 2006 Conference</title>
<author fullname="Le Roux, J.L." initials="J.L." surname="Le Roux">
<organization>Le</organization>
</author>
<author fullname="Decraene, B." initials="B." surname="Decraene">
<organization></organization>
<address>
<postal>
<street></street>
<city></city>
<region></region>
<code></code>
<country></country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email></email>
<uri></uri>
</address>
</author>
<author fullname="Ahmad, Z." initials="Z." surname="Ahmad">
<organization></organization>
<address>
<postal>
<street></street>
<city></city>
<region></region>
<code></code>
<country></country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email></email>
<uri></uri>
</address>
</author>
<date />
</front>
</reference>
<reference anchor="ABRFRR">
<front>
<title>Local Protection for LSP tail-end node failure, MPLS World
Congress 2009</title>
<author fullname="Rekhter, Y" initials="Y." surname="Rekhter">
<organization></organization>
</author>
<date />
</front>
</reference>
<reference anchor="BGPPIC">
<front>
<title>BGP PIC, Technical Report</title>
<author fullname="Cisco Systems">
<organization></organization>
</author>
<date month="November" year="2007" />
</front>
</reference>
<reference anchor="ACM01">
<front>
<title>Archieving sub-second IGP convergence in large IP networks,
ACM SIGCOMM Computer Communication Review, v.35 n.3</title>
<author fullname="Pierre Francois">
<organization></organization>
</author>
<author fullname="Clarence Filsfils">
<organization></organization>
<address>
<postal>
<street></street>
<city></city>
<region></region>
<code></code>
<country></country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email></email>
<uri></uri>
</address>
</author>
<author fullname="John Evans">
<organization></organization>
<address>
<postal>
<street></street>
<city></city>
<region></region>
<code></code>
<country></country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email></email>
<uri></uri>
</address>
</author>
<author fullname="Olivier Bonaventure">
<organization></organization>
<address>
<postal>
<street></street>
<city></city>
<region></region>
<code></code>
<country></country>
</postal>
<phone></phone>
<facsimile></facsimile>
<email></email>
<uri></uri>
</address>
</author>
<date month="July" year="2005" />
</front>
</reference>
<!-- A reference written by by an organization not a person. -->
</references>
<!-- Change Log
v00 2006-03-15 EBD Initial version
v01 2006-04-03 EBD Moved PI location back to position 1 -
v3.1 of XMLmind is better with them at this location.
v02 2007-03-07 AH removed extraneous nested_list attribute,
other minor corrections
v03 2007-03-09 EBD Added comments on null IANA sections and fixed heading capitalization.
Modified comments around figure to reflect non-implementation of
figure indent control. Put in reference using anchor="DOMINATION".
Fixed up the date specification comments to reflect current truth.
v04 2007-03-09 AH Major changes: shortened discussion of PIs,
added discussion of rfc include.
v05 2007-03-10 EBD Added preamble to C program example to tell about ABNF and alternative
images. Removed meta-characters from comments (causes problems). -->
</back>
</rfc>
| PAFTECH AB 2003-2026 | 2026-04-22 08:43:51 |