http://stupid.domain.name/ietf/

One document matched: draft-ietf-mpls-seamless-mpls-01.xml
<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
     which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
     There has to be one entity for each item to be referenced. 
     An alternate method (rfc include) is described in the references. -->
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2629 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3107 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3107.xml">
<!ENTITY RFC3031 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3031.xml">
<!ENTITY RFC3209 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3209.xml">
<!ENTITY RFC3353 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3353.xml">
<!ENTITY RFC3552 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3552.xml">
<!ENTITY RFC4090 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4090.xml">
<!ENTITY RFC4364 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4364.xml">
<!ENTITY RFC5036 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5036.xml">
<!ENTITY RFC5283 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5283.xml">
<!ENTITY RFC5286 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5286.xml">
<!ENTITY RFC5332 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5332.xml">
<!ENTITY RFC5925 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5925.xml">
<!ENTITY I-D.narten-iana-considerations-rfc2434bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.narten-iana-considerations-rfc2434bis.xml">
<!ENTITY I-D.kothari-henderickx-l2vpn-vpls-multihoming SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.kothari-henderickx-l2vpn-vpls-multihoming.xml">
<!ENTITY I-D.ietf-bfd-v4v6-1hop SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-bfd-v4v6-1hop.xml">
<!ENTITY I-D.filsfils-rtgwg-lfa-applicability SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.filsfils-rtgwg-lfa-applicability.xml">
<!ENTITY I-D.sajassi-l2vpn-rvpls-bgp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.sajassi-l2vpn-rvpls-bgp.xml">
<!ENTITY I-D.raggarwa-mac-vpn SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.raggarwa-mac-vpn.xml">
<!ENTITY I-D.ietf-mpls-ldp-dod SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mpls-ldp-dod.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
     please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
     (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
     (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="info" docName="draft-ietf-mpls-seamless-mpls-01"
     ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
     ipr values: full3667, noModification3667, noDerivatives3667
     you can add the attributes updates="NNNN" and obsoletes="NNNN" 
     they will automatically be output with "(if approved)" -->

  <!-- ***** FRONT MATTER ***** -->

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the 
         full title is longer than 39 characters -->

    <title abbrev="Seamless MPLS">Seamless MPLS Architecture</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Nicolai Leymann" initials="N.L." role="editor"
            surname="Leymann">
      <organization>Deutsche Telekom AG</organization>

      <address>
        <postal>
          <street>Winterfeldtstrasse 21</street>

          <!-- Reorder these if your country does things differently -->

          <city>Berlin</city>

          <code>10781</code>

          <country>DE</country>
        </postal>

        <phone>+49 30 8353-92761</phone>

        <email>n.leymann@telekom.de</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Bruno Decraene" initials="B.D." surname="Decraene">
      <organization>France Telecom</organization>

      <address>
        <postal>
          <street>38-40 rue du General Leclerc</street>

          <city>Issy Moulineaux cedex 9</city>

          <region></region>

          <code>92794</code>

          <country>FR</country>
        </postal>

        <phone></phone>

        <facsimile></facsimile>

        <email>bruno.decraene@orange-ftgroup.com</email>

        <uri></uri>
      </address>
    </author>

    <author fullname="Clarence Filsfils" initials="C.F." surname="Filsfils">
      <organization>Cisco Systems</organization>

      <address>
        <postal>
          <street></street>

          <city>Brussels</city>

          <region></region>

          <code></code>

          <country>Belgium</country>
        </postal>

        <phone></phone>

        <facsimile></facsimile>

        <email>cfilsfil@cisco.com</email>

        <uri></uri>
      </address>
    </author>

    <author fullname="Maciek Konstantynowicz" initials="M.K."
            surname="Konstantynowicz">
      <organization>Cisco Systems</organization>

      <address>
        <postal>
          <street></street>

          <city></city>

          <region></region>

          <code></code>

          <country></country>
        </postal>

        <phone></phone>

        <facsimile></facsimile>

        <email>maciek@cisco.com</email>

        <uri></uri>
      </address>
    </author>

    <author fullname="Dirk Steinberg" initials="D.S." surname="Steinberg">
      <organization>Steinberg Consulting</organization>

      <address>
        <postal>
          <street>Ringstrasse 2</street>

          <city>Buchholz</city>

          <code>53567</code>

          <country>DE</country>
        </postal>

        <email>dws@steinbergnet.net</email>
      </address>
    </author>

    <date day="12" month="March" year="2012" />

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill 
         in the current day for you. If only the current year is specified, xml2rfc will fill 
	 in the current day and month for you. If the year is not the current one, it is 
	 necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the 
	 purpose of calculating the expiry date).  With drafts it is normally sufficient to 
	 specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>Routing Area</area>

    <workgroup>MPLS Working Group</workgroup>

    <!-- WG name at the upperleft corner of the doc,
         IETF is fine for individual submissions.  
	 If this element is not present, the default is "Network Working Group",
         which is used by the RFC Editor as a nod to the history of the IETF. -->

    <keyword>Seamless MPLS, MPLS, access network, aggregation network, WAN,
    MAN, leymann, kompella, filsfils, hendrickx</keyword>

    <!-- Keywords will be incorporated into HTML output
         files in a meta tag but they have no effect on text or nroff
         output. If you submit your draft to the RFC Editor, the
         keywords will be used for the search engine. This -->

    <abstract>
      <t>This documents describes an architecture which can be used to extend
      MPLS networks to integrate access and aggregation networks into a single
      MPLS domain ("Seamless MPLS"). The Seamless MPLS approach is based on
      existing and well known protocols. It provides a highly flexible and a
      scalable architecture and the possibility to integrate 100.000 of nodes.
      The separation of the service and transport plane is one of the key
      elements; Seamless MPLS provides end to end service independent
      transport. Therefore it removes the need for service specific
      configurations in network transport nodes (without end to end transport
      MPLS, some additional services nodes/configurations would be required to
      glue each transport domain). This draft defines a routing architecture
      using existing standardized protocols. It does not invent any new
      protocols or defines extensions to existing protocols.</t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>MPLS as a mature and well known technology is widely deployed in
      today's core and aggregation/metro area networks. Many metro area
      networks are already based on MPLS delivering Ethernet services to
      residential and business customers. Until now those deployments are
      usually done in different domains; e.g. core and metro area networks are
      handled as separate MPLS domains.</t>

      <t>Seamless MPLS extends the core domain and integrates aggregation and
      access domains into a single MPLS domain ("Seamless MPLS"). This enables
      a very flexible deployment of an end to end service delivery. In order
      to obtain a highly scalable architecture Seamless MPLS takes into
      account that typical access devices (DSLAMs, MSAN) are lacking some
      advanced MPLS features, and may have more scalability limitations. Hence
      access devices are kept as simple as possible.</t>

      <t>Seamless MPLS is not a new protocol suite but describes an
      architecture by deploying existing protocols like BGP, LDP and ISIS.
      Multiple options are possible and this document aims at defining a
      single architecture for the main function in order to ease
      implementation prioritization and deployments in multi vendor networks.
      Yet the architecture should be flexible enough to allow some level of
      personalization, depending on use cases, existing deployed base and
      requirements. Currently, this document focus on end to end unicast
      LSP.</t>

      <section title="Requirements Language">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
        document are to be interpreted as described in <xref
        target="RFC2119">RFC 2119</xref>.</t>
      </section>

      <section title="Terminology">
        <t>This document uses the following terminology<list style="symbols">
            <t>Access Node (AN): An access node is a node which processes
            customers frames or packets at Layer 2 or above. This includes but
            is not limited to DSLAMs or OLTs (in case of (G)PON deployments).
            Access nodes have only limited MPLS functionalities in order to
            reduce complexity in the access network.</t>

            <t>Aggregation Node (AGN): An aggregation node (AGN) is a node
            which aggregates several access nodes (ANs).</t>

            <t>Area Border Router (ABR): Router between aggregation and core
            domain.</t>

            <t>Deployment Scenario: Describes which an implementation of
            Seamless MPLS in order to fullfil the requirements derived from
            one or more use cases.</t>

            <t>Seamless MPLS Domain: A set of MPLS equipments which can set
            MPLS LSPs between them.</t>

            <t>Transport Node (TN): Transport nodes are used to connect access
            nodes to service nodes, and services nodes to services nodes.
            Transport nodes ideally have no customer or service state and are
            therefore decoupled from service creation.</t>

            <t>Seamless MPLS (S-MPLS): Used as a generic term to describe an
            architecture which integrates access, aggregation and core network
            in a single MPLS domain.</t>

            <t>Service Node (SN): A service node is used to create services
            for customers and is connected to one or more transport nodes.
            Typical examples include Broadband Network Gateways (BNGs), video
            servers</t>

            <t>Transport Pseudo Wire (T-PW): A transport pseudowire provides
            service independent transport mechanisms based on Pseudo-Wires
            within the Seamless MPLS architecture.</t>

            <t>Use Case: Describes a typical network including service
            creation points in order to describe the requirments, typical
            numbers etc. which need to be taken into account when applying the
            Seamless MPLS architecture.</t>
          </list></t>
      </section>
    </section>

    <section title="Motivation">
      <t>MPLS is deployed in core and aggregation network for several years
      and provides a mature and stable basis for large networks. In addition
      MPLS is already used in access networks, e.g. such as mobile or DSL
      backhaul. Today MPLS as technology is being used on two different
      layers:</t>

      <t><list style="symbols">
          <t>the Transport Layer and</t>

          <t>the Service Layer (e.g. for MPLS VPNs)</t>
        </list>In both cases the protocols and the encapsulation are identical
      but the use of MPLS is different especially concerning the signalling,
      the control plane, the provisioning, the scalability and the frequency
      of updates. On the service layer only service specific information is
      exchanged; every service can potentially deploy it's own architecture
      and individual protocols. The services are running on top of the
      transport layer. Nevertheless those deployments are usually isolated,
      focussed on a single use case and not integrated into an end-to-end
      manner.</t>

      <t>The motivation of Seamless MPLS is to provide an architecture which
      supports a wide variety of different services on a single MPLS platform
      fully integrating access, aggregation and core network. The architecture
      can be used for residential services, mobile backhaul, business services
      and supports fast reroute, redundancy and load balancing. Seamless MPLS
      provides the deployment of service creation points which can be
      virtually everywhere in the network. This enables network and service
      providers with a flexible service and service creation. Service creation
      can be done based on the existing requirements without the needs for
      dedicated service creation areas on fixed locations. With the
      flexibility of Seamless MPLS the service creation can be done anywhere
      in the network and easily moved between different locations.</t>

      <section title="Why Seamless MPLS">
        <t>Multiple SP plan to deploy networks with 10k to 100k MPLS nodes.
        This is typically at least one order of magnitude higher than typical
        deployments and may require a new architecture. Multiple options are
        possible and it makes sense for the industry (both vendors and SP) to
        restrict the options in order to ease the first deployments (e.g.
        restrict the number of options to implement and/or scales for vendors,
        reduce interoperability and debugging issues for SP).</t>

        <t>Many aggregation networks are already deploying MPLS but are
        limited to the use of MPLS per aggregation area. Those MPLS based
        aggregation domains are connected to a core network running MPLS as
        well. Nevertheless most of the services are not limited to an
        aggregation domain but running between several aggregation domains
        crossing the core network. In the past it was necessary to provide
        connectivity between the different domains and the core on a per
        service level and not based on MPLS (e.g. by deploying native
        IP-Routing or Ethernet based technologies between aggregation and
        core). In most cases service specific configurations on the border
        nodes between core and aggregation were required. New services led to
        additional configurations and changes in the provisioning tools (see
        <xref target="serviceConfig"></xref>).</t>

        <t>With Seamless MPLS there are no technology boundaries and no
        topology boundaries for the services. Network (or region) boundaries
        are for scaling and manageability, and do not affect the service
        layer, since the Transport Pseudowire that carries packets from the AN
        to the SN doesn't care whether it takes two hops or twenty, nor how
        many region boundaries it needs to cross. The network architecture is
        about network scaling, network resilience and network manageability;
        the service architecture is about optimal delivery: service scaling,
        service resilience (via replicated SNs) and service manageability. The
        two are decoupled: each can be managed separately and changed
        independently.</t>

        <t></t>

        <figure align="center" anchor="serviceConfig"
                title="Service Specific Configurations">
          <artwork><![CDATA[+--------------+         +--------------+         +--------------+
|  Aggregation |         |     Core     |         |  Aggregation |             
|   Domain #1  +---------+    Domain    +---------+   Domain #2  |
|     MPLS     | ^       |     MPLS     |       ^ |     MPLS     |
+--------------+ |       +--------------+       | +--------------+
                 |                              |
                 +------ service specific ------+
                          configuration
]]></artwork>
        </figure>

        <t></t>

        <t>One of the main motivations of Seamless MPLS is to get rid of
        services specific configurations between the different MPLS islands.
        Seamless MPLS connects all MPLS domains on the MPLS transport layer
        providing a single transport layer for all services - independent of
        the service itself. The Seamless MPLS architecture therefore decuples
        the service and transport layer and integrates access, aggregation and
        core into a single platform. One of the big advantages is that
        problems on the transport layer only need to be solved once (and the
        solutions are available to all services). With Seamless MPLS it is not
        necessary to use service specific configurations on intermediate
        nodes; all services can be deployed in an end to end manner.</t>
      </section>

      <section title="Use Case #1">
        <section title="Description">
          <t>In most cases at least residential and business services need to
          be supported by a network. This section describes a Seamless MPLS
          use case which supports such a scenario. The use case includes point
          to point services for business customers as well as typical service
          creation for residential customers.</t>

          <t></t>

          <t><figure align="center" anchor="serviceUseCase01"
              title="Use Case #1: Service Creation">
              <artwork><![CDATA[                    +-------------+    
                    |   Service   |
                    |  Creation   |
                    | Residential |
                    |  Customers  |
                    +------+------+
                           |
                           |
                           |
    PW1    +-------+   +---+---+   
   #########################   |   
   #    +--+ AGN11 +---+ AGN21 +  +------+
   #   /   |       |  /|       |\ |      |           +--------+
+--#-+/    +-------+\/ +-------+ \|      |           | remote |
| AN |              /\            + CORE +---......--+   AN   |     
+--#-+\    +-------+  \+-------+ /|      |         #######    |
   #   \   |       |   |       |/################### +--------+
   #    +--+ AGN12 +---+ AGN22 +##+------+  P2P Business Service
   ##############################
    PW2    +-------+   +-------+   
]]></artwork>
            </figure></t>

          <t></t>

          <t><xref target="serviceUseCase01"></xref> shows the different
          service creation points and the corresponding pseudowires between
          the access nodes and the service creation points. The use case does
          not show all PWs (e.g. not the PWs needed to support redundancy) in
          order to keep the figure simple. Node and link failures are handled
          by rerouting the PWs (based on standard mechanisms). End customers
          (either residential or business customers) are connected to the
          access nodes using a native technology like Ethernet. The access
          nodes terminates the PW(s) carrying the traffic for the end
          customers. The link between the access node (AN) and the aggregation
          node (AGN) is the first MPLS enabled link.</t>

          <t><list style="hanging">
              <t hangText="Residential Services:">The service creation for all
              residential customers connected to the Access Nodes in an
              aggregation domain is located on an Service Node connected to
              the AGN2x. The PW (PW1) originated at the AN and terminates at
              the AGN2. A second PW is deployed in the case where redundancy
              is needed on the AN (the figure shows redundancy but this might
              not be the case for all ANs in this Use Case). Additonal PWs can
              be deployed as well in case more than a single service creation
              is needed for the residential service (e.g. one service creation
              point for Internet access and a second service creation point
              for IPTV services).</t>

              <t hangText="Business Sercvices:">For business services the use
              cases shows point to point connections between two access nodes.
              PW2 originates at the AN and terminates on the remote AN
              crossing two aggregation areas and the core network. If the
              access node needs connections to several remote ANs the
              corresponding number of PWs will be originated at the AN.
              Nevertheless taking the number of ports available and the number
              of business customers on a typical access node the number of PWs
              will be relatively small.</t>
            </list></t>

          <t></t>

          <figure align="center" anchor="redUseCase01"
                  title="Use Case #1: Redundancy">
            <artwork><![CDATA[           +-------+   +-------+   +------+   +------+
           |       |   |       |   |      |   |      |
        +--+ AGN11 +---+ AGN21 +---+ ABR1 +---+ LSR1 +--> to AGN
       /   |       |  /|       |   |      |   |      |
+----+/    +-------+\/ +-------+   +------+  /+------+
| AN |              /\                     \/
+----+\    +-------+  \+-------+   +------+/\ +------+
       \   |       |   |       |   |      |  \|      |
        +--+ AGN12 +---+ AGN22 +---+ ABR2 +---+ LSR2 +--> to AGN
           |       |   |       |   |      |   |      |
           +-------+   +-------+   +------+   +------+

static route     ISIS L1 LDP             ISIS L2 LDP

<-Access-><--Aggregation Domain--><---------Core---------> 
]]></artwork>
          </figure>

          <t></t>

          <t><xref target="redUseCase01"></xref> shows the redundancy at the
          access and aggregation network deploying a two stage aggregation
          network (AGN1x/AGN2x). Nevertheless redundancy is not a MUST in this
          use case. It is also possible to use non redundant connection
          between the ANs and AGN1 stage and/or between the AGN1 and AGN2
          stages. The AGN2x stage is used to aggregate traffic from several
          AGN1x pairs. In this use case an aggregation domain is not limited
          to the use of a single pair of AGN2x; the deployment of several AGN2
          pairs within the domain is also supported. As design goal for the
          scalability of the routing and forwarding within the Seamless MPLS
          architecture the following numbers are used:</t>

          <t><list style="symbols">
              <t>Number of Aggregation Domains: 100</t>

              <t>Number of Backbone Nodes: 1.000</t>

              <t>Number of Aggregation Nodes: 10.000</t>

              <t>Number of Access Nodes: 100.000</t>
            </list>The access nodes (AN) are dual homed to two different
          aggregation nodes (AGN11 and AGN12) using static routing entries on
          the AN. The ANs are always source or sink nodes for MPLS traffic but
          not transit nodes. This allows a light MPLS implementation in order
          to reduce the complexity in the AN. The aggregation network consists
          of two stages with redundant connections between the stages (AGN11
          is connected to AGN21 and AGN22 as well as AGN12 to AGN21 and
          AGN22). The gateway between the aggregation and core network is
          realized using the Area Border Routers (ABR). From the perspective
          of the MPLS transport layer all systems are clearly identified using
          the loopback address of the system. An ingress node must be able to
          establish a service to an arbitrary egress system by using the
          corresponding MPLS transport label</t>
        </section>

        <section title="Typical Numbers">
          <t><xref target="numUseCase01"></xref> shows typical numbers which
          are expected for Use Case #1 (access node).</t>

          <t></t>

          <texttable align="center" anchor="numUseCase01"
                     title="Use Case #1: Typical Numbers for Access Node">
            <ttcol>Parameter</ttcol>

            <ttcol>Typical Value</ttcol>

            <c>IGP Control Plane</c>

            <c>2</c>

            <c>IP FIB</c>

            <c>2</c>

            <c>LDP Control Plane</c>

            <c>200</c>

            <c>LDP FIB</c>

            <c>200</c>

            <c>BGP Control Plane</c>

            <c>0</c>

            <c>BGP FIB</c>

            <c>0</c>
          </texttable>

          <t></t>
        </section>
      </section>

      <section title="Use Case #2">
        <section title="Description">
          <t>In most cases, residential, wholesales and business services need
          to be supported by the network.</t>

          <t><figure align="center" anchor="usecase2" title="Use Case #2">
              <artwork><![CDATA[                      +-------------+
                      |   Service   |
                      |  platforms  |
                      |(VoIP, VoD..)|
                      | Residential |
                      |  Customers  |
                      +------+------+
                             |
                             |
        +---+    +-----+  +--+--+   +-----+
        |AN1|----+AGN11+--+AGN21+---+ ABR |
        +---+    +--+--+  +--+--+   +--+--+
                    |        |         |
        +---+    +--+--+     |         |          +----+
        |AN2|----+AGN12+     |         |        --+ PE |
        +---+    +--+--+     |         |          +----+
                    |        |         |
                    .        |         |
                    .        |         |
                    .        |         |
                    |        |         |
+---+   +---+    +--+--+  +--+--+   +--+--+
|AN4+---+AN3|----+AGN1x+--+AGN22+---+ ABR |
+---+   +---+    +-----+  +-----+   +-----+

     <-Access-><--Aggregation Domain--><---------Core--------->
]]></artwork>
            </figure></t>

          <t>The above topology (see <xref target="usecase2"></xref>) is
          subject to evolutions, depending on AN types and capacities (in
          terms of number of customers and/or aggregated bandwidth). For
          examples, AGN1x connection toward AGN2y currently forms a ring but
          may latter evolve in a square or triangle topology; AGN2y nodes may
          not be present...</t>

          <t>Most access nodes (AN) are single attached on one aggregation
          node using static routing entries on the AN and AGN. Some AN, are
          dual attached on two different AGN using static routes. Some AN are
          used as transit by some lower level AN. Static routes are expected
          to be used between those AN.</t>

          <t>IPv4, IPv6 and MPLS interconnection between the aggregation and
          core network is realized using the Area Border Routers (ABR). Any
          ingress node must be able to establish IPv4, IPv6 and MPLS
          connections to any egress node in the seamless MPLS domain.</t>

          <t>Regarding MPLS connectivity requirements, a full mesh of MPLS
          LSPs is required between the ANs of an aggregation area, at least
          for 6PE purposes. Some additional LSPs are needed between ANs and
          some PE in the aggregation area or in the core area for access to
          services, wholesale and enterprises services. In short, a meshing of
          LSP is required between the AGN of the whole seamless MPLS domain.
          Finally, LSP between any node to any node should be possible.</t>

          <t>From a scalability standpoint, the following numbers are the
          targets:</t>

          <t><list style="symbols">
              <t>Number of Aggregation Domains: 30</t>

              <t>Number of Backbone Nodes: 150</t>

              <t>Number of Aggregation Nodes: 1.500</t>

              <t>Number of Access Nodes: 40.000</t>
            </list></t>
        </section>

        <section title="Typical Numbers">
          <t><xref target="numUseCase02"></xref> shows typical numbers which
          are expected for Use Case #2 for the purpose of establishing the
          transport LSPs. They do not take into account the services built in
          addition. (e.g. 6PE will require additional IPv6 routes).</t>

          <texttable align="center" anchor="numUseCase02"
                     title="Use Case #2: Typical Numbers for Access Node">
            <ttcol>Parameter</ttcol>

            <ttcol>Typical Value</ttcol>

            <c>IGP Control Plane</c>

            <c>2</c>

            <c>IP FIB</c>

            <c>2</c>

            <c>LDP Control Plane</c>

            <c>1000</c>

            <c>LDP FIB</c>

            <c>1000</c>
          </texttable>

          <t></t>
        </section>
      </section>
    </section>

    <section title="Requirements">
      <t>The following section describes the overall requirements which need
      to be fulfilled by the Seamless MPLS architecture. Beside the general
      requirements of the architecture itself there are also certain
      requirements which are related to the different network nodes.</t>

      <t><list style="symbols">
          <t>End to End Transport LSP: MPLS based services (pseudowire based,
          L3-VPN or IP) SHALL be provided by the Seamless MPLS based
          infrastructure between any nodes.</t>

          <t>Scalability: The network SHALL be scalable to the minimum of
          100.000 nodes.</t>

          <t>Fast convergence (sub second resilience) SHALL be supported. Fast
          reroute (LFA) SHOULD be supported.</t>

          <t>Flexibility: The Seamless MPLS architecture SHALL be applied to a
          wide variety of existing MPLS deployments. It SHALL use a flexible
          approach deploying building blocks with the possiblity to use
          certain features only if those features are needed (e.g. dual homing
          ANs or fast reroute mechanisms).</t>

          <t>Service independence: Service and transport layer SHALL be
          decoupled. The architecture SHALL remove the need for service
          specific configurations on intermediate nodes.</t>

          <t>Native Multicast support: P2MP MPLS LSPs SHOULD be supported by
          the Seamless MPLS architecture.</t>

          <t>Interoperable end to end OAM mechanisms SHALL be implemented</t>
        </list></t>

      <section title="Overall">
        <section title="Access">
          <t>In respect of MPLS functionality the access network should be
          kept as simple as possible. Compared to the aggregation and/or core
          network within Seamless MPLS a typical access node is less powerful.
          The control plane and the forwarding should be as simple as
          possible. To reduce the complexity and the costs of an access node
          not the full MPLS functionality need to be supported (control and
          data plane). The use of an IGP should be avoided. Static routing
          should be sufficient. Required functionality to reach the required
          scalability should be moved out of the access node. The number of
          access nodes can be very high. The support of load balancing for
          layer 2 services should be implemented.</t>
        </section>

        <section title="Aggregation">
          <t>The aggregation network aggregates traffic from access nodes. The
          aggregation Node must have functionalities that enlarge the
          scalability of the simple access nodes that are connected. The IGP
          must be link state based. Each aggregation area must be a separated
          area. All routes that are interarea should use an EGP to keep the
          IGP small. The aggregation node must have the full scalability
          concerning control plane and forwarding. The support of load
          balancing for layer 2 services must be implemented.</t>
        </section>

        <section title="Core">
          <t>The core connects the aggregation areas. The core network
          elements must have the full scalability concerning control plane and
          forwarding. The IGP must be link state based. The core area must not
          include routes from aggregation areas. All routes that are interarea
          should use an EGP to keep the IGP small. Each area of the link state
          based IGP should have less than 2000 routes. The support of load
          balancing for layer 2 services must be implemented.</t>
        </section>
      </section>

      <section title="Multicast">
        <t>Compared with unicast connectivity Multicast is more dynamic. User
        generated messages - like joining or leaving multicast groups - are
        interacting directly with network components in the access and
        aggregation network (in order to build the corresponding forwarding
        states). This leads to the need for a highly dynamic handling of
        messages on access and aggregation nodes. Nevertheless the core
        network SHOULD be stable and state changes triggered by user generated
        messages SHOULD be minimized. This rises the need for an hierarchy for
        the P2MP support in Seamless MPLS hiding the dynamic behaviour of the
        access and aggregation nodes</t>

        <t><list style="symbols">
            <t>mLDP</t>

            <t>P2MP RSVP-TE</t>
          </list></t>
      </section>

      <section title="Availability">
        <t>All network elements should be high available (99.999%
        availability). Outage times should be as low as possible. A repair
        time of 50 milliseconds or less should be guarantied at all nodes and
        lines in the network that are redundant. Fast convergence features
        SHOULD be used in all control plane protocols. Local Repair functions
        SHOULD be used wherever possible. Full redundancy is required at all
        equipment that is shared in a network element.</t>

        <t><list style="symbols">
            <t>Power Supply</t>

            <t>Switch Fabric</t>

            <t>Routing Processor</t>
          </list>A change from an active component to a standby component
        SHOULD happen without effecting customers traffic. The Influence of
        customer traffic MUST be as low as possible.</t>
      </section>

      <section title="Scalability">
        <t>The network must be highly scalable. As a minimum requirement the
        following scalability figures should be met:</t>

        <t><list style="symbols">
            <t>Number of aggregation domains: 100</t>

            <t>Number of backbone nodes: 1.000</t>

            <t>Number of aggregation nodes: 10.000</t>

            <t>Number of access nodes: 100.000</t>
          </list></t>
      </section>

      <section title="Stability">
        <t><list style="symbols">
            <t>The platform should be stable under certain circumstances (e.g.
            missconfiguration within one area should not cause instability in
            other areas).</t>

            <t>Differentiate between “All Loopbacks and Link addresses
            should be ping able from every where." Vs. “Link addresses
            are not necessary ping able from everywhere".</t>
          </list></t>
      </section>
    </section>

    <!-- This PI places the pagebreak correctly (before the section title) in the text output. -->

    <?rfc needLines="8" ?>

    <section title="Architecture">
      <section title="Overall">
        <t>One of the key questions that emerge when designing an architecture
        for a seamless MPLS network is how to handle the sheer size of the
        necessary routing and MPLS label information control plane and
        forwarding plane state resulting from the stated scalability goals
        especially with respect to the total number of access nodes. This
        needs to be done without overwhelming the technical scaling limits of
        any of the involved nodes in the network (access, aggregation and
        core) and without introducing too much complexity in the design of the
        network while at the same time still maintaining good convergence
        properties to allow for quick MPLS transport and service restoration
        in case of network failures.</t>
      </section>

      <section title="Multi-Domain MPLS networks">
        <t>The key design paradigm that leads to a sound and scalable solution
        is the divide and conquer approach, whereby the large problem is
        decomposed into many smaller problems for which the solution can be
        found using well-known standard architectures.</t>

        <t>In the specific case of seamless MPLS the overall MPLS network
        SHOULD be decomposed into multiple MPLS domains, each well within the
        scaling limits of well-known architectures and network node
        implementations. From an organizational and operational point of view
        it MAY make sense to define the boundaries of such domains along the
        pre-existing boundaries of aggregation networks and the core
        network.</t>

        <t>Examples of how networks can be decomposed include using IGP areas
        as well as using multiple BGP autonomous systems.</t>
      </section>

      <section title="Hierarchy">
        <t>These MPLS domains SHOULD then be then be connected into an MPLS
        multi-domain network in a hierarchical fashion that enables the
        seamless exchange of loopback addresses and MPLS label bindings for
        transport LSPs across the entire MPLS internetwork while at the same
        time preventing the flooding of unnecessary routing and label binding
        information into domains or parts of the network that do not need
        them. Such a hierarchical routing and forwarding concept allows a
        scalability in different dimensions and allows to hide the complexity
        and size of the aggregation and access networks.</t>
      </section>

      <section title="Intra-Domain Routing">
        <t>The intra-domain routing within each of the MPLS domains (i.e.
        aggregation domains and core) SHOULD utilize standard IGP protocols
        like OSPF or ISIS. By definition, each of these domains is small
        enough so that there are no relevant scaling limits within each IGP
        domain, given well-known state-of-the-art IGP design principles and
        recent router technology.</t>

        <t>The intra-domain MPLS LSP setup and label distribution SHOULD
        utilize standard protocols like LDP or RSVP.</t>
      </section>

      <section title="Inter-Domain Routing">
        <t>The inter-domain routing is responsible for establishing
        connectivity between and across all MPLS domains. The inter-domain
        routing SHOULD establish a routing and forwarding hierarchy in order
        to achieve the scaling goals of seamless MPLS. Note that the IP
        aggregation usually performed between region (IGP areas/AS) in IP
        routing does not work for MPLS as MPLS is not capable of aggregating
        FEC (because MPLS forwarding use an exact match lookup, while IP uses
        longest match).</t>

        <t>Therefore it is RECOMMENDED to utilize protocols that support
        indirect next-hops (like BGP with MPLS labels “labled
        BGP/SAFI4” <xref target="RFC3107"></xref>).</t>
      </section>

      <section title="Access">
        <t>Compared to the aggregation and core parts of the Seamless MPLS
        network the access part is special in two respects:</t>

        <t><list style="symbols">
            <t>The number of ndes in the access is at least one order of
            magnitude higher than in any other part of the network.</t>

            <t>Because of the large quantity of access nodes, the cost of
            these nodes is extremly relevant for the overall costs of the
            entire network, i.e. acess nodes are very cost sensitive.</t>
          </list>This makes it desirable to design the architecture such that
        the AN functionality can be kept as simple as possible. This should
        always be kept in mind when evalulating different seamless MPLS
        architectures. The goal is to limit both the number of different
        protocols needed on the AN as well as the scale to which each protocol
        must perform to the absolute minimum.</t>
      </section>
    </section>

    <!-- This PI places the pagebreak correctly (before the section
         title) in the text output. -->

    <?rfc needLines="8" ?>

    <section title="Deployment Scenarios">
      <t>This section describes the deployment scenarios based on the use
      cases and the generic architecture above.</t>

      <section title="Deployment Scenario #1">
        <t>Section describing the Seamless MPLS implementation of a large
        european ISP.</t>

        <section title="Overview">
          <t>This deployment scenario describes one way to implement a
          seamless MPLS architecture. Specific to this implementation is the
          choice of intra- and inter-domain routing and label distribution
          protocols, as well as the details of the interworking of these
          protocols to achieve the overall scalable hierarchical
          architecture.</t>
        </section>

        <section title="General Network Topology">
          <t>There are multiple aggregation domains (in the order of up to
          100) connected to the core in a star topology, i.e. aggregation
          domains are never connected among themselves, but only to the core.
          The core has its own domain.</t>

          <figure align="center" anchor="deploy01"
                  title="Deployment Scenario #1">
            <artwork><![CDATA[
           +-------+   +-------+   +------+   +------+
           |       |   |       |   |      |   |      |
        +--+ AGN11 +---+ AGN21 +---+ ABR1 +---+ LSR1 +--> to AGN
       /   |       |  /|       |   |      |   |      |
+----+/    +-------+\/ +-------+   +------+  /+------+
| AN |              /\                     \/     |
+----+\    +-------+  \+-------+   +------+/\ +------+
       \   |       |   |       |   |      |  \|      |
        +--+ AGN12 +---+ AGN22 +---+ ABR2 +---+ LSR2 +--> to AGN
           |       |   |       |   |      |   |      |
           +-------+   +-------+   +------+   +------+

static route     ISIS L1 LDP             ISIS L2 LDP

<-Access-><--Aggregation Domain--><---------Core---------> 
]]></artwork>
          </figure>

          <t>As shown in <xref target="deploy01"></xref>, the access nodes
          (AN) are connected to the aggregation network via aggregation nodes
          called AGN1x, either to a single AGN1x or redundantly to two AGN1x.
          Each AGN1x has redundant uplinks to a pair of second-level
          aggregation nodes called AGN2x.</t>

          <!-- [###dws: do we assume the presence of this link???]
	       <t>The AGS2 pair is also connected via a direct
	       link.</t> -->

          <t>Each aggregation domain is connected to the core via exactly two
          border routers (ABR) on the core side. There can be multiple AGN2
          pairs per aggregation domain, but only one ABR pair for each
          aggregation domain. Each of the AGN2 in an AGN2 pair connects to one
          of the ABRs in the ABR pair responsible for that aggregation
          domain.</t>

          <!-- [###dws: do we assume the presence of redundant AGS2 to
	       ABR uplinks???] -->

          <t>The ABRs on the core side have redundant connections to a pair of
          LSR routers.</t>

          <!-- [###dws: do we assume the presence of this link???] -->

          <t>The LSR pair is also connected via a direct link.</t>

          <t>The core LSR are connected to other core LSR in a partly meshed
          topology so that there are disjunct, redundant paths from each LSR
          to each other LSR.</t>
        </section>

        <section title="Hierarchy">
          <t>As explained before, hierarchy is the key to a scalable seamless
          MPLS architecture. The hierarchy in this implementation is achieved
          by forming different MPLS domains for aggregation domains and core,
          where within each of these domains a fairly common MPLS deployment
          using ISIS as intradomain link-state routing protocol and using LDP
          for MPLS label distribution is used.</t>

          <t>These MPLS domains are mapped to ISIS areas as follows:
          Aggregation domains are mapped to ISIS L1 areas. The core is
          configured as ISIS L2. The border routers connecting aggregation and
          core are ISIS L1L2 and are referred to as ABRs. From a technical and
          operational point of view these ABRs are part of the core, althought
          they also belong to the respective aggregation domain purely from a
          routing protocol point of view.</t>

          <t>For the interdomain-routing BGP with MPLS labels is deployed
          (“labled BGP/SAFI4” <xref target="RFC3107"></xref>).</t>
        </section>

        <section title="Intra-Area Routing">
          <section title="Core">
            <t>The core uses ISIS L2 to distribute routing information for the
            loopback addresses of all core nodes. The border routers (ABR)
            that connect to the aggregation domains are also part of the
            respective aggregation ISIS L1 area and hence ISIS L1L2.</t>

            <t>LDP is used to distribute MPLS label binding information for
            the loopback addresses of all core nodes.</t>
          </section>

          <section title="Aggregation">
            <t>The aggregation domains uses ISIS L1 as intra-domain routing
            protocol. All AGN loopback addresses are carried in ISIS.</t>

            <t>As in the core, the aggregation also uses LDP to distribute
            MPLS label bindings for the loopback addresses.</t>
          </section>
        </section>

        <section title="Access">
          <t>Access nodes do not have their own domain or IGP area. Instead,
          they directly connect to the AGN1 nodes in the aggregation domain.
          To keep access devices as simple as possible, ANs do not participate
          in ISIS.</t>

          <t>Instead, each AN has two static default routes pointing to each
          of the AGN1 it is connected to. Appropriate techniques SHOULD be
          deployed to make sure that a given default route is invalidated when
          the link to an AGN1 or that node itself fails. Examples of such
          techniques include monitoring the pysical link state for loss of
          light/loss of frame, or using Ethernet link OAM or BFD <xref
          target="I-D.ietf-bfd-v4v6-1hop"></xref>.</t>

          <t>The AGN1 MUST have a configured static route to the loopback
          address of each of the ANs it is connected to, because it cannot
          learn the AN loopback address in any other way. These static routes
          have to be monitored and invalidated if necessary using the same
          techniques as described above for the static default routes on the
          AN.</t>

          <t>The AGN1 redistributes these routes into ISIS for intra-domain
          reachability of all AN loopback addresses.</t>

          <t>LDP is used for MPLS label distribution between AGN1 and AN. In
          order to keep the AN control plane as lightweight as possible, and
          to avoid the necessity for the AN to store 100.000 MPLS label
          bindings for each upstream AGN1 peer, LDP is deployed in
          downstream-on-demand (DoD) mode, described below.</t>

          <t>To allow the label bindings received via LDP DoD to be installed
          into the LFIB on the AN without having the specific host route to
          the destination loopback address, but only a default route, use of
          the LDP Extension for Inter-Area Label Switched Paths <xref
          target="RFC5283"> </xref> is made.</t>

          <section title="LDP Downstream-on-Demand (DoD)">
            <t>LDP downstream-on-demand mode is specified in <xref
            target="RFC5036"></xref>. Although it was originally intended to
            be used with ATM switch hardware, there is nothing from a protocol
            perspective preventing its use in a regular MPLS frame-based
            environment. In this mode the upstream LSR will explicitly ask the
            downstream LSR for a label binding for a particular FEC when
            needed.</t>

            <t>The assumption is that a given AN will only have a limited
            number of services configured to an even more limited number of
            destinations, or egress LER. Instead of learning and storing all
            label bindings for all possible loopback addresses within the
            entire Seamless MPLS network, the AN will use LDP DoD to only
            request the label bindings for the FECs corresponding to the
            loopback addresses of those egress nodes to which it has services
            configured.</t>

            <t>For LDP DoD the AGN1 MUST also ask the AN for label bindings
            for specific FECs. FECs are necessary for all pseudowire
            destinations at the AN. Most preferable this pseudowire
            destination is the LSR-ID of the AN. Depending on the AN
            implementation and architecture multiple pseudowire destination
            addresses and associated FECs could be needed. The conclusion of
            this results to the following requirement:</t>

            <t><list style="symbols">
                <t>The AGN1 MUST ask the AN for label bindings for all
                potential pseudowire destination addresses on the AN. Because
                the AGN (at least in many cases) does not take part in the
                pseudowire signaling an independent way of receiving the AN
                FEC is necessary on the AGN. These potential pseudowire
                destinations MUST be known on the AGN1, by configuration or
                otherwise. These are typically the loopback addresses of the
                AN, to which a static route has been configured anyway on the
                AGN1, as explained above. In addition to these static routes,
                the AGN1 SHOULD be configured statically to request MPLS label
                bindings for these loopback addresses via LDP DoD.</t>

                <t>Optionally an automatism that asks for a FEC for the LSR-ID
                COULD be implemented. A configuration switch that disables
                this option must be implemented. The label is necessary. The
                way of initiating the DoD-signaling of the label could be done
                with both methods (configuration/automatism).</t>

                <t>The AN knows by configuration to which destination a
                pseudowire is set up. The AN is always the endpoint of the
                pseudowire. Before signalling a pseudowire the AN MUST ask
                (via LDP DoD) the AGN for a FEC. Because of this an
                independent preconfiguration is not necessary on the AN.</t>

                <t>The following are the triggers for ANs to request a
                label:</t>

                <t><list style="symbols">
                    <t>When a control session (targeted LDP) to a target has
                    to be established</t>

                    <t>When a service label has been received by a control
                    session (e.g. pseudo wire label)</t>
                  </list><!-- also label withdraw mechanisms should be specified. How
		   is a lable withdrawn (protocol issue?). Upstream nodes should be
		   able to optimize FIB in case a label is not needed any more. --></t>
              </list></t>
          </section>
        </section>

        <section title="Inter-Area Routing">
          <t>The inter-domain MPLS connectivity from the aggregation domains
          to and across the core domain is realized primarily using BGP with
          MPLS labels ("labled BGP/SAFI4" <xref target="RFC3107"></xref>). A
          very limited amount of route leaking from ISIS L2 into L1 is also
          used.</t>

          <t>All ABR and PE nodes in the core are part of the labeled iBGP
          mesh, which can be either full mesh or based on route reflectors.
          These nodes advertise their respective loopback addresses (which are
          also carried in ISIS L2) into labeled BGP.</t>

          <t>Each ABR node has labeled iBGP sessions with all AGN1 nodes
          inside the aggregation domain that they connect to the core. Since
          there are two ABR nodes per aggregation domain, this leads to each
          AGN1 node having an iBGP sessions with each of the two ABR. Note
          that the use of iBGP implies that the entire seamless MPLS
          internetwork is just a single AS to which all core and aggregation
          nodes belong. The AGN1 nodes advertise their own loopback addresses
          into labeled BGP, in addition to these loopbacks also being in ISIS
          L1.</t>

          <t>Additionally the AGN1 nodes also redistribute all the statically
          configured routes to the AN loopback addresses into labeled BGP.
          Note that as stated obove, the AGN1 MUST ask the AN for label
          bindings for the AN loopback FECs via LDP DoD in order to have a
          valid labeled route with a non-null label.</t>

          <t>This architecture results in carrying all loopbacks of all nodes
          except pure P nodes (AN, AGN, ABR and core PE) in labeled BGP, e.g.
          there will be in the order of 100.000 routes in labeled BGP when
          approaching the stated scalability goal. Note that this only affects
          the BGP RIB size and does not necessarily imply that any node needs
          to actually have active forwarding state (LFIB) in the same order of
          magnitude. In fact, as will be discussed in the scalability
          analysis, no single node needs to install all labeled BGP routes
          into the LFIB, but each node only needs a small percentage of the
          RIB as active forwarding state in the LFIB. And from a RIB point of
          view, BGP is known to scale to hundreds of thousands of routes.</t>
        </section>

        <section title="Labled iBGP next-hop handling">
          <t>The ABR nodes run labeled iBGP both to the core mesh as well as
          to the AGN1 nodes of their respective aggregation domains. Therefore
          they operate as iBGP route reflectors, reflecting labeled routes
          from the aggregation into the core and vice versa.</t>

          <t>When reflecting routes from the core into the aggregation domain,
          the ABR SHOULD NOT change the BGP NEXT-HOP addresses
          (next-hop-unchanged). This is the usual behaviour for iBGP route
          reflection. In order to make these routes resolvable to the AGN1
          nodes inside the aggregation domain, the ABR MUST leak all other ABR
          and core PE loopback addresses from ISIS L2 into ISIS L1 of the
          aggregation domain. Note that the number of leaked addresses is
          limited so that the overall scalability of the seamless MPLS
          architecture is not impacted. In the worst case all core loopback
          addresses COULD be leaked into ISIS L1, but even that would not be a
          scalability problem.</t>

          <t>When reflecting routes from the aggregation into the core, the
          ABR MUST set then BGP NEXT-HOP to its own loopback addresses
          (next-hop-self). This is not the default behaviour for iBGP route
          reflection, but requires special configuration on the ABR. Note that
          this also implies that the ABR MUST allocate a new local MPLS label
          for each labeled iBGP FEC that it reflects from the aggregation into
          the core. This special next-hop handling is essential for the
          scalability of the overall seamless MPLS architecture since it
          creates the required hierarchy and enables the hiding of all
          aggregation and access addresses behind the ABRs from an IGP point
          of view. Leaking of aggregation ISIS L1 loopback addresses into ISIS
          L2 is not necessary and MUST NOT be allowed.</t>

          <t>The resulting hierarchical inter-domain MPLS routing structure is
          similar to the one described in <xref target="RFC4364"></xref>
          section 10c, only that we use one AS with route reflection instead
          of using multiple ASes.</t>
        </section>

        <section title="Network Availability and Simplicity">
          <t>The seamless mpls architecture illustrated in deployment case
          study 1 guarantees a sub-second loss of connectivity upon any link
          or node failures. Furthermore, in the vast majority of cases, the
          loss of connectivity is limited to sub-50msec.</t>

          <t>These network availability properties are provided without any
          degradation on scale and simplicity. This is a key achievement of
          the design.</t>

          <t>In the remainder of this section, we first introduce the
          different network availability technologies and then review their
          applicability for each possible failure scenario.</t>

          <section title="IGP Convergence">
            <t>IGP convergence can be modelled as a linear process with an
            initial delay and a linear FIB update <xref
            target="ACM01"></xref>.</t>

            <t>The initial delay could conservatively be assumed to be
            260msec: 50msec to detect failures with BFD (most failures would
            be detected faster with loss of light for example or with faster
            BFD timers), 50msec to throttle the LSP generation, 150msec to
            throttle the SPF computation (making sure than all the required
            LSP’s are received even in case of SRLG failures) and 10msec
            for shortest-path-first tree computation.</t>

            <t>Assuming 250usec per update (conservative), this allows for
            (1000-260)/0.250= 2960 prefixes update within a second following
            the outage. More precisely, this allows for 2960 important IGP
            prefixes updates. Important prefixes are automatically classified
            by the router implementation through simple heuristic (/32 is more
            important than non-/32).</t>

            <t>The number of IGP important routes (loopbacks) in deployment
            case study 1 is much smaller than 2960, and hence sub-second IGP
            convergence is conservative.</t>

            <t>IGP convergence is a simple technology for the operator
            provided that the router vendor optimizes the default IGP behavior
            (no need to tune any arcane knob).</t>
          </section>

          <section title="Per-Prefix LFA FRR">
            <t>A per-prefix LFA for a destination D is a precomputed backup
            IGP nexthop for that destination. This backup IGP nexthop can be
            link protecting or node protecting <xref
            target="RFC5286"></xref>.</t>

            <t>The analysis of the applicability of Per-Prefix LFA in the
            deployment model 1 of Seamless MPLS architecture is
            straightforward thanks to <xref
            target="I-D.filsfils-rtgwg-lfa-applicability"></xref>.</t>

            <t>In deployment model 1, each aggregation network either follows
            the triangle or full-mesh topology. Further more, the backbone
            region implements a dual-plane. As a consequence, the failure of
            any link or node within an aggregation domain is protected by LFA
            FRR (sub-50msec) for all impacted IGP prefixes, whether intra-area
            or inter-area. No uloop may form as a result of these failures
            <xref target="I-D.filsfils-rtgwg-lfa-applicability"></xref>.</t>

            <t>Per-Prefix LFA FRR is generally assessed as a simple technology
            for the operator <xref
            target="I-D.filsfils-rtgwg-lfa-applicability"></xref>. It
            certainly is in the context of deployment case study 1 as the
            designer enforced triangle and full-mesh topologies in the
            aggregation network as well as a dual-plane core network.</t>
          </section>

          <section title="Hierarchical Dataplane and BGP Prefix Independent Convergence">
            <t>In a hierarchical dataplane, the FIB used by the packet
            processing engine reflects the recursions between routes. For
            example, a BGP route B recursing on IGP route I whose best path is
            via interface O is encoded as a FIB entry B pointing to a FIB
            entry I pointing to a FIB entry 0.</t>

            <t>Hierarchical FIB <xref target="BGPPIC"></xref> extends the
            hierarchical dataplane with the concept of a BGP Path-List. A BGP
            path-list may be abstracted as a set of primary multipath nhops
            and a backup nhop. When the primary set is empty, packets destined
            to the BGP destinations are rerouted via the backup nhop.</t>

            <t>With hierarchical FIB and hierarchical dataplane, a FIB entry
            representing a BGP route points to a FIB entry representing a BGP
            Path-List. This entry may either point again to another BGP Path
            list entry (BGP over BGP recursion) or more likely points to a FIB
            entry representing an IGP route.</t>

            <t>A BGP Path-list may be computed automatically by the router and
            does not require any operator involvement. Specifically, the
            automated computation adapts to any routing policy (this is key to
            understand the simplicity of hierarchical FIB and the ability to
            enable it as a default router behavior). There is no constraint at
            all on the operator design. Any policy is supported (multipath,
            primary/backup between neighboring domains or via alternate
            domains).</t>

            <t>The BGP backup nhop is computed in advance of any failure (ie.
            a second bestpath computation after excluding the primary
            nhops).</t>

            <t>Hierarchical dataplane and hierarchical FIB provide two
            important routing availability properties.</t>

            <t>First, upon IGP convergence, recursive BGP routes immediately
            benefit from the updated IGP paths thanks to the dataplane
            indirection. This is key as most of the traffic is destined to BGP
            routes, not to IGP routes.</t>

            <t>Second, upon loss of the primary BGP nhop, the dataplane can
            immediately reroute the packets towards the pre-computed backup
            nhop. This redirection is said to be prefix independent as the
            only entries that need to be modified are the BGP path-lists.
            These entries are shared across all the BGP prefixes with the same
            primary and backup next-hops. This scale independence is key. In
            the context of deployment model 1, while there might be 100k BGP
            routes, we only expect on the order of 200 BGP path-lists.
            Assuming 10usec in-place modification per BGP path-list, we see
            that the router can enable the backup path for 100k BGP
            destinations in less than 2msec (less than 200 * 10usec).</t>

            <t>The detection of the loss of the primary BGP nhop (and hence
            the need to enable the pre-computed backup BGP nhop) can be local
            (a local link failing between an edge device and a single-hop eBGP
            peer) or involves an IGP convergence (a remote border router goes
            down).</t>

            <t>These hierarchical FIB properties benefit to any BGP routes:
            Internet, L3VPN, 3107, IPv4 or IPv6. Future evolution of VPLS will
            also benefit from such properties <xref
            target="I-D.raggarwa-mac-vpn"></xref><xref
            target="I-D.sajassi-l2vpn-rvpls-bgp">,</xref></t>

            <t>Hierarchical forwarding and hierarchical FIB are very simple
            technology to operate. Their ability to adapt to any topology, any
            routing policy and any BGP address family allows router vendors to
            enable this behavior by default.</t>
          </section>

          <section title="Local Protection using Anycast BGP">
            <t></t>

            <section title="Anycast BGP applied to ABR node failure">
              <t>In this section we described a mechanism that provides local
              protection for area border router (ABR) failures. To illustrate
              this mechanism consider an example shown in <xref
              target="RoutingAndTrafficFlow"></xref>.<figure
                  anchor="RoutingAndTrafficFlow"
                  title="Routing and Traffic Flow">
                  <artwork align="center"><![CDATA[                              +-------+
                              |       |
                           vl0+ ABR 1 |
                             /|       |
  +----------+    +-------+ / +-------+
  |          |    |       |/
  | PE / LER +-..-+  PLR  |
  |          |    |       |\
  +----------+    +-------+ \ +-------+
                             \|       |
                           vl0+ ABR 2 |
                              |       |
                              +-------+


 +-------+     +-------+     +-------+
 | LDP-L +-----+ LDP-L +-----+ LDP-L |
 +-------+     +-------+     +-------+
 | BGP-L +-------------------+ BGP-L |
 +-------+                   +-------+

--------------- traffic ---------------->
<----- routing + label distribution -----
]]></artwork>
                </figure></t>

              <t>The core router adjacent to ABR1 and ABR2 acts as a point of
              local repair (PLR). When the PLR detects ABR1 failure, the PLR
              re-routes to ABR2 the traffic that the PLR used to forward to
              ABR1, with ABR2 providing the subsequent forwarding for this
              traffic. To accomplish this ABR1, ABR2, and the PLR employ the
              following procedures.</t>

              <t>ABR1, in addition to its own loopback, is provisioned with
              another IP address (vl0). This IP address is used to identify
              the forwarding state/context on ABR1 that is the subject to the
              local protection mechanism outlined in this section. We refer to
              this IP address, vl0, as the "context identifier". ABR1
              advertises its context identifier in ISIS and LDP. As ABR1
              re-advertises to its core peers the BGP routes it receives from
              its peers in the aggregation domain(s), ABR1 sets the BGP Next
              Hop on these routes to its context identifier (this creates an
              association between the forwarding state/context created by
              these routes and the context identifier).</t>

              <t>ABR2, acting as a protector for ABR1, is configured with the
              ABR1's context identifier. ABR2 advertises this context
              identifier into LDP and ISIS. The LDP advertisement is done with
              no PHP and a non-null label, and the ISIS advertisement is done
              with a very high metric. As a result, the PLR would have an LFA
              route/LSP to this context identifier with ABR2 as the next hop.
              When the PLR detects ABR1's failure, the LFA procedures on the
              PLR would result in sending to ABR2 the traffic that the PLR
              used to forward to ABR1. Moreover, since ABR2 advertises into
              LDP a non-null label for the ABR1's context identifier, this
              label would enable ABR2 to identify such traffic (as we'll see
              further down the ability to identify such traffic is essential
              in order for ABR2 to correctly forward this traffic).</t>

              <t><figure align="center" anchor="ABRFailureScenarios"
                  title="ABR Failure Scenarios">
                  <artwork><![CDATA[                 +-----------------+-----------+-----------+
                 | FEC 10.0.1.1/32 | Label 200 | NH AGN2-1 |
                 +-----------------+-----------+-----------+
                 | FEC 10.0.1.2/32 | Label 233 | NH AGN2-1 | ABR1
                 +-----------------+-----------+-----------+
                 | FEC 10.0.1.3/32 | Label 313 | NH AGN2-1 |
                 +-----------------+-----------+-----------+

                    +------+    +-------+
                    |      |    |       |    +------------------+
                 vl0+ ABR1 +----+ AGN21 +----+ AGN11:10.0.1.1/32|
                   /|      |    |       |\  /+------------------+
                  / +------+\  /+-------+ \/
 +----+   +-----+/           \/         \ /\ +------------------+
 | PE +---+ PLR |            /\          X  X+ AGN12:10.0.1.2/32|
 +----+   +-----+\          /  \        / \/ +------------------+
                  \ +------+    +-------+ /\
                   \|      |    |       |/  \+------------------+
                 vl0+ ABR2 +----+ AGN22 +----+ AGN13:10.0.1.3/32|
                    |      |    |       |    +------------------+
                    +------+    +-------+


                 +----------------------------------------+
                 |      native forwarding context         |
                 +-----------------+-----------+----------+
                 | FEC 10.0.1.1/32 | Label 100 | NH AGN21 |
                 +-----------------+-----------+----------+
                 | FEC 10.0.1.2/32 | Label 107 | NH AGN21 | ABR2
                 +-----------------+-----------+----------+
                 | FEC 10.0.1.3/32 | Label 152 | NH AGN21 |
                 +-----------------+-----------+----------+
                                 |     |     |
                                 V     V     V
                 +----------------------------------------+
                 |      backup forwarding context         |
                 +-----------------+-----------+----------+
                 | FEC 10.0.1.1/32 | Label 200 | NH AGN21 |
                 +-----------------+-----------+----------+
                 | FEC 10.0.1.2/32 | Label 233 | NH AGN21 | ABR2
                 +-----------------+-----------+----------+
                 | FEC 10.0.1.3/32 | Label 313 | NH AGN21 |
                 +-----------------+-----------+----------+
                  (ABR2 acting as backup for ABR1)
]]></artwork>
                </figure></t>

              <t>ABR2, acting as a protector for the forwarding context of
              ABR1, has to have the <FEC->label> mapping for the FECs
              present in that forwarding context, and should use this mapping
              to create the forwarding state it would use when forwarding the
              traffic received from the PLR. <xref
              target="ABRFailureScenarios"> </xref> shows the
              <FEC->label> mapping on ABR1 and ABR2. Note that the
              backup forwarding context on ABR2 is a mirror image of the
              forwarding context on ABR1. This backup forwarding context is
              populated using the routes that have been re-advertised by ABR1
              to its core peers (as ABR2 is a BGP core peer of ABR1). The
              label that ABR2 advertises into LDP for ABR1's context
              identifier points to the backup context. This way, ABR2 forwards
              all the traffic received with this label using not its native
              forwarding context, but the backup forwarding context.</t>

              <t>Note that whether the PLR could rely on the basic LFA to
              re-route to ABR2 the traffic that the PLR used to forward to
              ABR1 depends on the LFA coverage. Since the basic LFA does not
              guarantee 100% coverage in all topologies, relying on basic LFA
              may not be sufficient, in which case the basic LFA would need to
              be augmented to provide 100% coverage.</t>

              <t>The procedures outlined above provide local protection upon
              ABR node failure. By virtue of being local protection, the
              actions required to restore connectivity upon the failure
              detection are fully localized to the router closest to the
              failure - the router directly connected to the failed ABR. This
              enables to deliver under 50msec connectivity recovery time in
              the presence of ABR failure. These actions do not depend on
              propagating failure information in ISIS, thus providing
              connectivity recovery time that is independent of the ISIS
              routing convergence time. In contrast, a combination of
              hierarchical FIB organization and ISIS routing convergence,
              being a global protection mechanism, does rely on the ISIS
              routing convergence time, as the prefix-independent switch-over
              on the pre-computed backup next hop occurs upon IGP convergence
              (deletion of the IGP route to the remote ABR), and thus would
              have several 100s msec connectivity recovery time.</t>
            </section>

            <section title="Extensions to support ABR's connected to different aggregation regions">
              <t>Note that for the purpose of identifying the forwarding
              context ABR1's forwarding state could be partitioned, with each
              partition being assigned its own IP address (its own context
              identifier). ABR1 would advertise all these identifiers into
              ISIS and LDP. This may be useful in the scenario where ABR1 is
              connected to more than one aggregation domain (more than one L1
              area), in which case each context identifier would identify the
              ABR1's forwarding state associated with a single aggregation
              domain.</t>

              <t>One could further refine the above scheme by implementing
              protector functionality that would allow a single protector to
              protect multiple forwarding contexts, with each forwarding
              context being associated with all the forwarding state
              maintained by a given (protected) ABR. Such functionality could
              be implemented either on a separate router, or could be
              co-located with an existing ABR. Details of this are outside the
              scope of this document.</t>
            </section>

            <section title="Anycast BGP applied to a L3VPN PE">
              <t>BGP Anycast is also used to protect against L3VPN PE
              failures. In general a given VPN site can be multi-homed
              (connected to several L3VPN PEs). Moreover, multi-homed sites
              may be non-congruent with each other - different multi-homed
              sites connected to a given PE may have their other connection(s)
              to different other PEs. BGP Anycast scheme, utilizing the
              construct of Protector PE, provides forwarding context
              protection for multiple egress PEs in the presence of
              non-congruent multi-homed sites.</t>

              <t>Protector PE function is enhanced from the basic BGP Anycast
              1:1 mirroring procedures described for ABR protection, by
              supporting multiple backup forwarding contexts, one per
              protected egress PE. Each backup forwarding context on the
              Protector PE is identified by the context identifier of the
              associated protected egress PE.</t>

              <t>Protector PE advertises these context identifiers into IGP
              with a large metric and into LDP with no PHP and a non-null
              label. This results in PLR of each egress PE having an LFA
              route/LSP (or bypass LSP if no native LFA coverage for specific
              topology) to the associated context identifier with Protector PE
              as the next hop. Protector PE creates a backup forwarding
              context per protected egress PE based on BGP advertisements from
              this egress PE and other egress PEs with the same multi-homed
              customer networks.</t>

              <t>Similarly to the ABR case described earlier, in case of
              specific protected egress PE failure, PLR will follow standard
              LFA procedure (or local protection to bypass LSP) and forward
              affected flows to Protector PE. Those flows will arrive to
              Protector PE on the LSP associated with the context identifier
              for the failed egress PE, the backup forwarding context will be
              identified by this LSP, and flows will be switched to
              alternative egress PE(s).</t>
            </section>
          </section>

          <section title="Assessing loss of connectivity upon any failure">
            <t><list style="empty">
                <t>We select two typical traffic flows and analyze the loss of
                connectivity (LoC) upon each possible failure.</t>

                <t>Flow F1 starts from an AN1 in a left aggregation region and
                ends on an AN2 in a right aggregation region. Each AN is
                dual-homed to two AGN’s.</t>

                <t>Flow F2 starts from an L3VPN PE1 in the core and ends at an
                L3VPN PE2 in the core.</t>
              </list></t>

            <t>Note that due to the symmetric network topology in case study
            1, uni-directional flows F1' and F2', associated with F1 and F2
            and forwarded in the reversed direction (AN2 to AN1 right-to-left
            and PE2 to PE1, respectively), take advantage of the same failure
            restoration mechanisms as F1 and F2. .</t>

            <section title="AN1-AGN link failure or AGN node failure ">
              <t>F1 is impacted but LoC <50msec is possible assuming fast
              BFD detection and fast-switchover implementation on the AN. F2
              is not impacted.</t>
            </section>

            <section title="Link or node failure within the left aggregation region">
              <t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
              will occur during the IGP convergence following the LFA
              protection. Note: if LFA is not available (other topology then
              case study one) or if LFA is not enabled, then the LoC would be
              < second as the number of impacted important IGP route in a
              seamless architecture is much smaller than 2960.</t>

              <t>F2 is not impacted.</t>
            </section>

            <section title="ABR node failure between left region and the core">
              <t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
              will occur during the IGP convergence following the LFA
              protection.</t>

              <t>Note: This case is also called “Local ABR
              failure” as the ABR which fails is the one connected to
              the aggregation region at the source of flow F1.</t>

              <t>Note: remember that the left region receives the routes to
              all the remote ABR’s and that the labelled BGP routes are
              reflected from the core to the left region with next-hop
              unchanged. This ensures that the loss of the (local) ABR between
              the left region and the core is seen as an IGP route impact and
              hence can be addressed by LFA.</t>

              <t>Note: if LFA is not available (other topology then case study
              one) or if LFA is not enabled, then the LoC would be < second
              as the number of impacted important IGP route in a seamless
              architecture is much smaller than 2960.</t>

              <t>F2 is not impacted.</t>
            </section>

            <section title="Link or node failure within the core region">
              <t>F1 and F2 are impacted but LoC <50msec thanks to LFA
              FRR.</t>

              <t>This is specific to the particular core topology used in
              deployment case study 1. The core topology has been optimized
              <xref target="I-D.filsfils-rtgwg-lfa-applicability"></xref> for
              LFA applicability.</t>

              <t>As explained in <xref
              target="I-D.filsfils-rtgwg-lfa-applicability"></xref>, another
              alternative to provide <50msec in this case consists in using
              an MPLS-TE full-mesh and MPLS-TE FRR. This is required when the
              designer is not able or does not want to optimize the topology
              for LFA applicability and he wants to achieve <50msec
              protection.</t>

              <t>Alternatively, simple IGP convergence would ensure a LoC <
              second as the number of impacted important IGP route in a
              seamless architecture is much smaller than 2960.</t>
            </section>

            <section title="PE2 failure">
              <t>F1 is not impacted.</t>

              <t>F2 is impacted and the LoC is sub-300msec thanks to IGP
              convergence and hierarchical FIB.</t>

              <t>The detection of the primary nhop failure (PE2 down) is
              performed by a single-area IGP convergence.</t>

              <t>In this specific case, the convergence should be much faster
              than <sec as very few prefixes are impacted upon an edge node
              failure. Reusing the introduction on IGP convergence presented
              in an earlier section and assuming 2 important impacted prefixes
              (two loopbacks per edge node), one would expect that PE2’s
              failure is detected in 260msec + 2*0.250msec.</t>

              <t>In a hierarchical FIB organization on the ingress PE, once
              the loss of an egress PE is detected, all the impacted BGP
              Path-Lists associated with that egress PE need to be updated,
              and the impacted traffic gets re-routed to the pre-computed
              backup PEs. The time it takes to complete this operation is not
              constant, but is proportional to the number of unique BGP
              Path-Lists affected by the egress PE failure. Number of such
              affected BGP Path-Lists is equal to the number of
              "non-congruent" multi-homed sites connected to the egress PE,
              where the number of non-congruent sites is defined as the number
              of other PEs that these sites are connected to (note that in
              defining the term "non-congruent" we refer to sites, rather than
              to CEs, as a given multi-homed site can use multiple CEs).
              Furthermore, per CE BGP policies (e.g. single-path vs.
              multi-path) may further increase number of BGP Path-Lists
              involved.</t>

              <t>The LoC for BGP/BPN traffic upon PE2 failure is thus expected
              to be <300msec.</t>

              <t>Provided that all the deployment considerations have been
              met, LoC is sub-50msec with BGP Anycast.</t>
            </section>

            <section title="PE2’s PE-CE link failure">
              <t>F1 is not impacted.</t>

              <t>F2 is impacted and the LoC is sub-50msec thanks to local
              interface failure detection and local forwarding to the backup
              PE. Forwarding to the backup PE is achieved with hierarchical
              FIB or local-repair of BGP egress link providing fast re-route
              to the backup BGP nhop PE.</t>
            </section>

            <section title="ABR node failure between right region and the core">
              <t>F2 is not impacted.</t>

              <t>F1 is impacted. We analyze the LoC for F1 for both
              hierarchical FIB and BGP anycast.</t>

              <t>LoC is sub-600msec thanks to hierarchical FIB.</t>

              <t>The detection of the primary nhop failure (ABR down) is
              performed by a multi-area IGP convergence.</t>

              <t>First, the two (local) ABR’s between the left and core
              regions must complete the core IGP convergence. The analysis is
              similar to the loss of PE2. We would thus expect that the core
              convergence completes in ~260msec.</t>

              <t>Second, the IGP convergence in the left region will cause all
              AGN1 routers to detect the loss of the remote ABR. This second
              IGP convergence is very similar to the first one (2 important
              prefixes to remove) and hence should also complete in
              ~260msec.</t>

              <t>Once an AGN1 has detected the loss of the remote ABR, thanks
              to hierarchical FIB organization, in-place modification of
              shared BGP path-list and pre-computation of BGP backup nhop, the
              AGN1 reroutes flow F1 via the alternate remote ABR in a few
              msec’s.</t>

              <t>As a consequence, the LoC for F1 upon remote ABR failure is
              thus expected to be <600msec.</t>

              <t>Provided that all the deployment considerations have been
              met, LoC is sub-50msec with BGP Anycast.</t>
            </section>

            <section title="Link or node failure within the right aggregation region">
              <t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
              will occur during the IGP convergence following the LFA
              protection.</t>

              <t>Note: if LFA is not available (other topology then case study
              one) or if LFA is not enabled, then the LoC would be < second
              as the number of impacted important IGP route in a seamless
              architecture is much smaller than 2960.</t>

              <t>F2 is not impacted.</t>
            </section>

            <section title="AGN (connected to AN2) node failure">
              <t>F1 is impacted but LoC <50msec thanks to LFA FRR. No uloop
              will occur during the IGP convergence following the LFA
              protection.</t>

              <t>Note: remember that AGN redistributes the static routes to
              ANs within ISIS. The loss of an AGN on the IGP path to AN2 is
              thus seen as an IGP route impact and hence LFA FRR is
              applicable.</t>

              <t>Note: if LFA is not available (other topology then case study
              one) or if LFA is not enabled, then the LoC would be < second
              as the number of impacted important IGP route in a seamless
              architecture is much smaller than 2960.</t>

              <t>F2 is not impacted.</t>
            </section>

            <section title="AGN-AN2 link failure">
              <t>F2 is not impacted.</t>

              <t>F1 is impacted.</t>

              <t>LoC is sub-300msec with IGP convergence as only one prefix
              needs to be updated.</t>

              <t>Sub-50msec could be guaranteed provided that the LFA
              implementation supports a redistributed static as a native IGP
              route.</t>
            </section>

            <section title="AN2 failure">
              <t>F1 is impacted and the LoC lasts until the AN is
              recovered.</t>

              <t>F2 is not impacted.</t>
            </section>

            <section title="Summary - Loss of connectivity upon any failure">
              <t>The Seamless MPLS architecture illustrated in deployment case
              study 1 guarantees sub-50msec upon any link or node
              failures.</t>
            </section>
          </section>

          <section title="Network Resiliency and Simplicity">
            <t>A fundamental aspect of the Seamless MPLS architecture is the
            requirement for operational simplicity.</t>

            <t>In a network with 10k of IGP/BGP nodes and 100k of MPLS-enabled
            nodes, it is extremely important to provide a simple operational
            process.</t>

            <t>LFA FRR plays a key role in providing simplicity as it is an
            automated behavior which does not require any configuration or
            interoperability testing.</t>

            <t>More specifically, <xref
            target="I-D.filsfils-rtgwg-lfa-applicability"></xref> plays a key
            role in the Seamless MPLS architecture as it describes simple
            design guidelines which determiniscally ensure LFA coverage for
            any link and node in the aggregation regions of the network. This
            is key as it provides for a simple <50msec protection for the
            vast majority of the node and link failures (>90% of the
            IGP/BGP3107 footprint at least).</t>

            <t>If the guidelines cannot be met, then either the designer will
            rely on (1) augmenting native LFA coverage with RSVP, or (2) a
            full-mesh TE FRR model, or (3) IGP convergence. The first option
            provides the same sub-50msec protection as LFA, but introduces
            additional RSVP LSPs. The second option optimizes for sub-50msec
            protection, but implies a more complex operational model. The
            third option optimizes for simple operation but only provides
            <sec protection. Up to each designer to arbitrate between these
            three options versus the possibility to engineer the topology for
            native LFA protection.</t>

            <t>A similar choice involves the protection against ABR node
            failure and L3VPN PE node failure. The designer can either use
            hierarchical FIB or Anycast BGP. Up to each designer to asssess
            the trade-off between the valuation of sub-50msec instead of
            sub-1sec versus additional operational considerations related to
            Anycast BGP.</t>
          </section>

          <section title="Conclusion">
            <t>The Seamless MPLS architecture illustrated in deployment case
            study 1 guarantees sub-50msec for majority of link and node
            failures by using LFA FRR, except ABR and L3PE node failures, and
            PE-CE link failure.</t>

            <t>L3VPN PE-CE link failure can be protected with sub-50msec
            restoration, by using hierarchical FIB or local-repair
            fast-reroute to the backup BGP nhop PE.</t>

            <t>ABR and L3PE node failure can be protected with sub-50msec
            restoration, by using BGP Anycast.</t>

            <t>Alternatively, ABR and L3PE node failure can be protected with
            sub-1sec restoration, by using hierarchical-FIB.</t>
          </section>
        </section>

        <section title="Next-Hop Redundancy">
          <t>An aggregation domain is connected to the core network using two
          redundant area boarder routers, and MPLS hierarchy is applied on
          these ABRs. MPLS hierarchy helps scale the FIB but introduces
          additional complexity for the rerouting in case of ABR failure.
          Indeed ABR failure requires a BGP converge to update the inner MPLS
          hierarchy, in addition to the IGP converge to update the outer MPLS
          hierarchy. This is also expected to take more time as BGP
          convergence is performed after the IGP convergence and because the
          number of prefixes to update in the FIB can be significant. This is
          a drawback but the architecture allow for two "local" solutions
          which restore the traffic before the BGP convergence takes
          place.</t>

          <t>One called hierarchical FIB edge, would be required on all edge
          LSR involved in the inner (BGP) MPLS hierarchy. Namely all routers
          except the AN which are not involved in the inner MPLS hierarchy. It
          involves pre-computing and pre-installing in the FIB the BGP backup
          path. Such back up path are activated when the IGP advertise the
          failure of the primary path.</t>

          <t>One called egress fast reroute, would be required on the egress
          LSR involved in the inner (BGP) MPLS hierarchy, namely TN and AGN
          connected to ABR. It involves:</t>

          <t><list>
              <t>using a anycast loopback address shared by both nominal and
              back up ABR, advertised by both ABR in the IGP and advertised as
              BGP Next Hop by the nominal ABR;</t>

              <t>activating IP FRR LFA on the (penultimate) hops, acting as
              PLR for the anycast loopback;</t>

              <t>using on the backup egress nodes (ABR2) an additional
              contextual MPLS FIB populated by the labels upstream allocated
              by the nominal egress node (ABR1).</t>
            </list>Details can be found in <xref target="PEFRR"></xref> and
          <xref target="ABRFRR"></xref>, and in the appendix of this draft.
          Both solutions have their pro and con, and the choice is left to
          each Service Provider or deployment based on the different
          requirements. The point is that the seamless MPLS architecture can
          handles fast restoration time, even for ABR failures.</t>
        </section>
      </section>

      <!-- End of Deployment Scenario #1 -->

      <section title="Scalability Analysis">
        <section title="Control and Data Plane State for Deployment Scenario #1">
          <section title="Introduction">
            <t>Let's call:</t>

            <t><list style="symbols">
                <t>#AN the number of Access Node (AN) in the seamless MPLS
                domain</t>

                <t>#AGN the number of AGgregation Node (AGN) in the seamless
                MPLS domain</t>

                <t>#Core the number of Core (Core) in the core network</t>

                <t>#Area the number of aggregation routing domains.</t>
              </list>Let's take the following assumptions:</t>

            <t><list style="symbols">
                <t>Aggregation equipments are equally spread across
                aggregation routing domains</t>

                <t>the number of IGP links is three times the number of IGP
                nodes</t>

                <t>the number of IGP prefixes is five times the number of IGP
                nodes (links prefixes + 2 loopbacks)</t>

                <t>Access Nodes need to set up 1000 (1k) LSPs. 10% (100) are
                FEC which are outside of their routing domain. Those 100
                remote FEC are the same for all Access Nodes of a given
                AGN.</t>
              </list>The following sections roughly evaluate the scalability,
            both in absolute numbers and relatively with the number of Access
            Node which is the biggest scalability factor.</t>
          </section>

          <section title="Core Domain">
            <t>The IGP & LDP core domain are not affected by the number of
            access nodes:</t>

            <t hangText=""><list style="hanging">
                <t hangText="IGP:"><list>
                    <t>node : #Core ~ o(1)</t>

                    <t>links : 3*#Core ~ o(1)</t>

                    <t>IP prefixes : 5*#Core ~ o(1)</t>
                  </list></t>

                <t hangText="LDP FEC:"><list>
                    <t>#Core ~ o(1)</t>
                  </list></t>
              </list>Core TN FIBs grows linearly with the number of node in
            the core domain. In other word, they are not affected by AGN and
            AN nodes:</t>

            <t><list style="hanging">
                <t hangText="Core TN:"><list>
                    <t>IP FIB : 5*#Core ~ o(1)</t>

                    <t>MPLS LFIB : #Core ~ o(1)</t>
                  </list></t>
              </list>BGP carries all AN routes which is significant. However,
            all AN routes are only needed in the control plane, possibly in a
            dedicated BGP Route Reflector (just like for BGP/MPLS VPNs) and
            not in the forwarding plane. The number of routes (100k) is
            smaller than the number of number of routes in the Internet (300k
            and rising) or in major VPN SP (>500k and rising) so the target
            can be handled with current implementations. In addition, AN
            routes are internal routes whose churn and instability is smaller
            and more under control than external routes.</t>

            <t><list style="hanging">
                <t hangText="BGP Route Reflector (RR)"><list>
                    <t>NLRI : #AN ~ o(n)</t>

                    <t>path : 2*#AN ~ o(2n)</t>
                  </list></t>
              </list>ABR handles both the core and aggregations routes. They
            do not depend on the total number of AN nodes, but only on the
            number of AN in their aggregation domain.</t>

            <t hangText="Core TN:"><list style="hanging">
                <t hangText="ABR:"><list>
                    <t>IP FIB : 5*#Core + (5*#AGN + #AN) / #Area ~ o(#AN
                    /#Area)</t>

                    <t>MPLS LFIB : #Core + (#AGN + #AN) / #Area ~ o(#AN /
                    #Area)</t>
                  </list></t>
              </list></t>
          </section>

          <section title="Aggregation Domain">
            <t>In the aggregation domain, IGP & LDP are not affected by
            the number of access nodes outside of their domain. They are not
            affected by the total number of AN nodes:</t>

            <t><list style="hanging">
                <t hangText="IGP:"><list>
                    <t>node : #AGN / #Area ~ o(1)</t>

                    <t>links : 3*#AGN / #Area ~ o(1)</t>

                    <t>IP prefixes : #Core + #Area + (5*#AGN + #AN) / #Area ~
                    o(#AN *5/ #Area)</t>

                    <t><list style="symbols">
                        <t>+ 1 loopback per core node + one aggregate per area
                        + 5 prefixes per AGN in the area + 1 prefix per AN in
                        the area.</t>
                      </list></t>
                  </list></t>
              </list><list style="hanging">
                <t hangText="LDP FEC:"><list>
                    <t>Core + (#AGN + #AN) / #Area ~ o(#AN / #Area)</t>

                    <t><list style="symbols">
                        <t>+ 1 loopback per core node + 1 loopback per AGN
                        & AN node in the area.</t>
                      </list></t>
                  </list></t>
              </list>AGN FIBs grows with the number of node in the core area,
            in their aggregation area, plus the number of inter domain LSP
            required by the AN attached to them. They do not depend on the
            total number of AN nodes. In the BGP control plane, AGN also needs
            to handle all the AN routes.</t>

            <t><list style="hanging">
                <t hangText="AGN:"><list>
                    <t>IP FIB : #Core + #Area + (5*#AGN + #AN) / #Area ~ o(#AN
                    *5/ #Area)</t>

                    <t>MPLS LFIB : #Core + (#AGN + #AN) / #Area + 100 ~ o(#AN
                    / #Area)</t>
                  </list></t>
              </list>AN FIBs grows with its connectivity requirement. They do
            not depend on the number of AN, AGN, SN or any others nodes.</t>

            <t><list style="hanging">
                <t hangText="AN:"><list>
                    <t>IP RIB : 1 ~ o(1)</t>

                    <t>MPLS LIB : 1k ~ o(1)</t>

                    <t>IP FIB : 1 ~ o(1)</t>

                    <t>MPLS LFIB : 1k ~ o(1)</t>
                  </list></t>
              </list></t>
          </section>

          <section title="Summary">
            <t>AN requirements are kept minimal. BGP is not required and the
            size of their FIB is limited to their own connectivity
            requirements.</t>

            <t>In the core area, IGP and LDP are not affected by the node in
            the aggregation domains. In particular they do not grow with the
            number of AGN or AN.</t>

            <t>In the aggregation areas, IGP and LDP are affected by the
            number of core nodes and the number of AGN and AN in their area.
            They are not affected by the total number of AGN or AN in the
            seamless MPLS domain.</t>

            <t>No FIB of any node is required to handle the total number of
            AGN or AN in the seamless MPLS domain. In other word, the number
            of AGN and AN in the seamless MPLS domain is not limited, if the
            number of areas can grow accordingly. The main limitation is the
            MPLS connectivity requirements on the AN, i.e. mainly the number
            of LSP needed on the AN. Another limitation may be the number of
            different LSP needed by AN attached or behind an AGN. However,
            given foreseen deployments and current AGN capabilities, this is
            not expected to be a limitation.</t>

            <t>In the control plane, BGP will typically handle all AN routes.
            This is significant but target deployments are well under current
            equipments capacities. In addition, if required, additional
            techniques could be used to improve this scalability, based on the
            experience gained with scaling BGP/MPLS VPN (e.g. route
            partitioning between RR planes, route filtering (static or dynamic
            with ORF or route refresh) between AN and on AGN to improve AGN
            scalability.</t>
          </section>

          <section title="Numerical application for use case #1">
            <t>As a recap, targets for deployment scenario 1 are:</t>

            <t><list style="symbols">
                <t>Number of Aggregation Domains 100</t>

                <t>Number of Backbone Nodes 1.000</t>

                <t>Number of AGgregation Nodes 10.000</t>

                <t>Number of Access Nodes 100.000</t>
              </list>This gives the following scaling numbers for each
            category of nodes:</t>

            <t><list style="symbols">
                <t>AN IP FIB 1</t>

                <t>AN MPLS LFIB 1 000</t>

                <t>AGN IP FIB 2 600</t>

                <t>AGN MPLS LFIB 2 200</t>

                <t>ABR IP FIB 7 600</t>

                <t>ABR MPLS LFIB 2 100</t>

                <t>TN IP FIB 5 000</t>

                <t>TN MPLS LFIB 1 000</t>

                <t>RR BGP NLRI 100 000</t>

                <t>RR BGP paths 200 000</t>
              </list></t>
          </section>

          <section title="Numerical application for use case #2">
            <t>As a recap, targets for deployment scenario 1 are:</t>

            <t><list style="symbols">
                <t>Number of Aggregation Domains 30</t>

                <t>Number of Backbone Nodes 150</t>

                <t>Number of AGgregation Nodes 1.500</t>

                <t>Number of Access Nodes 40.000</t>
              </list>This gives the following scaling numbers for each
            category of nodes:</t>

            <t><list style="symbols">
                <t>AN IP FIB 1</t>

                <t>AN MPLS LFIB 1 000</t>

                <t>AGN IP FIB 1 700</t>

                <t>AGN MPLS LFIB 1 800</t>

                <t>ABR IP FIB 3 700</t>

                <t>ABR MPLS LFIB 1 600</t>

                <t>TN IP FIB 750</t>

                <t>TN MPLS LFIB 150</t>

                <t>RR BGP NLRI 40 000</t>

                <t>RR BGP paths 80 000</t>
              </list></t>
          </section>
        </section>
      </section>
    </section>

    <section anchor="Acknowledgements" title="Acknowledgements">
      <t>Many people contributed to this document. The authors would like to
      thank Wim Henderickx, Clarence Filsfils, Thomas Beckhaus, Wilfried Maas,
      Roger Wenner, Kireeti Kompella, Yakov Rekhter, Mark Tinka and Simon
      DeLord for their suggestions and review.</t>
    </section>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>This memo includes no request to IANA.</t>

      <t>All drafts are required to have an IANA considerations section (see
      <xref target="I-D.narten-iana-considerations-rfc2434bis">the update of
      RFC 2434</xref> for a guide). If the draft does not require IANA to do
      anything, the section contains an explicit statement that this is the
      case (as above). If there are no requirements for IANA, the section will
      be removed during conversion into an RFC by the RFC Editor.</t>
    </section>

    <section anchor="security" title="Security Considerations">
      <t>The Seamless MPLS Architecture is subject to similar security threats
      as any MPLS LDP deployment. It is recommended that baseline security
      measures are considered as described in the LDP specification <xref
      target="RFC5036">RFC5036</xref> including ensuring authenticity and
      integrity of LDP messages, as well as protection against spoofing and
      Denial of Service attacks. Some deployments may require increased
      measures of network security if a subset of Access Nodes are placed in
      locations with lower levels of physical security e.g. street cabinets (
      common practice for VDSL access ). In such cases it is the
      responsibility of the system designer to take into account the physical
      security measures ( environmental design, mechanical or electronic
      access control, intrusion detection ), as well as monitoring and
      auditing measures (configuration and Operating System changes, reloads,
      routes advertisements ). But even with all this in mind, the designer
      still should consider network security risks and adequate measures
      arising from the lower level of physical security of those
      locations.</t>

      <section title="Access Network Security">
        <t>A detailed description for Access Network Security in Seamless MPLS
        can be found in the LDP Downstream on Demand document <xref
        target="I-D.ietf-mpls-ldp-dod"></xref>. </t>
      </section>

      <section title="Data Plane Security">
        <t>Data plane security risks applicable to the access MPLS network are
        listed below (a non-exhaustive list):</t>

        <t><list style="letters">
            <t>packets from a specific access node flow to an altered
            transport layer or service layer destination.</t>

            <t>packets belonging to undefined services flow to and from the
            access network.</t>

            <t>unlabelled packets destined to remote network nodes.</t>
          </list>Following mechanisms should be considered to address listed
        data plane security risks:</t>

        <t><list style="numbers">
            <t>addressing (a) - Access and ABR LSRs SHOULD NOT accept labeled
            packets over a particular data link, unless from the Access or ABR
            LSR perspective this data link is known to attach to a trusted
            system based on employed authentication mechanism(s), and the top
            label has been distributed to the upstream neighbour by the
            receiving Access or ABR LSR.</t>

            <t>addressing (a) – ABR LSR MAY restrict network
            reachability for access devices to a subset of remote network LSR,
            based on authentication or other network security technologies
            employed towards Access LSRs. Restricted reachability can be
            enforced on the ABR LSR using local routing policies, and can be
            distributed towards the core MPLS network using routing policies
            associated with access MPLS FECs.</t>

            <t>addressing (b) - labeled service routes (e.g. MPLS/VPN, tLDP)
            are not accepted from unreliable routing peers. Detection of
            unreliable routing peers is achieved by engaging routing protocol
            detection and alarm mechanisms, and is out of scope of this
            document.</t>

            <t>addressing (a) and (b) - no successful attacks have been
            mounted on the control plane and has been detected.</t>

            <t>addressing (c) - ABR LSR MAY restrict IP network reachability
            to and from the access LSR.</t>
          </list></t>
      </section>

      <section title="Control Plane Security">
        <t>Similarly to Inter-AS MPLS/VPN deployments <xref
        target="RFC4364">RFC4364</xref>, the data plane security depends on
        the security of the control plane. To ensure control plane security
        access LDP DoD connections MUST only be made with LDP peers that are
        considered trusted from the local LSR perspective, meaning they are
        reachable over a data link that is known to attach to a trusted system
        based on employed authentication mechanism(s) on the local LSR. The
        TCP/IP MD5 authentication option <xref target="RFC5925">RFC5925</xref>
        should be used with LDP as described in LDP specification <xref
        target="RFC5036">RFC5036</xref>. If TCP/IP MD5 authentication is
        considered not secure enough, the designer may consider using a more
        elaborate and advanced TCP Authentication Option (TCP-AO <xref
        target="RFC5925">RFC5925</xref>) for LDP session authentication.
        Access IGP (if used) and any routing protocols used in access network
        for signalling service routes SHOULD also be secured in a similar
        manner. For increased level of authentication in the control plane
        security for a subset of access locations with lower physical
        security, designer could also consider using:</t>

        <t><list style="symbols">
            <t>different crypto keys for use in authentication procedures for
            these locations.</t>

            <t>stricter network protection mechanisms including DoS
            protection, interface and session flap dampening.</t>
          </list></t>
      </section>
    </section>
  </middle>

  <!--  *****BACK MATTER ***** -->

  <back>
    <!-- References split into informative and normative -->

    <!-- There are 2 ways to insert reference entries from the citation libraries:
     1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
     2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
        (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

     Both are cited textually in the same manner: by using xref elements.
     If you use the PI option, xml2rfc will, by default, try to find included files in the same
     directory as the including file. You can also define the XML_LIBRARY environment variable
     with a value containing a set of directories to search.  These can be either in the local
     filing system or remote ones accessed by http (http://domain/dir/... ).-->

    <references title="Normative References">
      <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->

      &RFC2119;
    </references>

    <references title="Informative References">
      <!-- Here we use entities that we defined at the beginning. -->

      &RFC2629;

      &RFC3107;

      &RFC3031;

      &RFC3209;

      &RFC3353;

      &RFC3552;

      &RFC4090;

      &RFC4364;

      &RFC5036;

      &RFC5283;

      &RFC5286;

      &RFC5332;

      &RFC5925;

      &I-D.kothari-henderickx-l2vpn-vpls-multihoming;

      &I-D.narten-iana-considerations-rfc2434bis;

      &I-D.ietf-bfd-v4v6-1hop;

      &I-D.ietf-mpls-ldp-dod;

      &I-D.filsfils-rtgwg-lfa-applicability;

      &I-D.sajassi-l2vpn-rvpls-bgp;

      &I-D.raggarwa-mac-vpn;

      <reference anchor="PEFRR">
        <front>
          <title>Fast Reroute in MPLS L3VPN Networks - Towards CE-to-CE
          Protection, MPLS 2006 Conference</title>

          <author fullname="Le Roux, J.L." initials="J.L." surname="Le Roux">
            <organization>Le</organization>
          </author>

          <author fullname="Decraene, B." initials="B." surname="Decraene">
            <organization></organization>

            <address>
              <postal>
                <street></street>

                <city></city>

                <region></region>

                <code></code>

                <country></country>
              </postal>

              <phone></phone>

              <facsimile></facsimile>

              <email></email>

              <uri></uri>
            </address>
          </author>

          <author fullname="Ahmad, Z." initials="Z." surname="Ahmad">
            <organization></organization>

            <address>
              <postal>
                <street></street>

                <city></city>

                <region></region>

                <code></code>

                <country></country>
              </postal>

              <phone></phone>

              <facsimile></facsimile>

              <email></email>

              <uri></uri>
            </address>
          </author>

          <date />
        </front>
      </reference>

      <reference anchor="ABRFRR">
        <front>
          <title>Local Protection for LSP tail-end node failure, MPLS World
          Congress 2009</title>

          <author fullname="Rekhter, Y" initials="Y." surname="Rekhter">
            <organization></organization>
          </author>

          <date />
        </front>
      </reference>

      <reference anchor="BGPPIC">
        <front>
          <title>BGP PIC, Technical Report</title>

          <author fullname="Cisco Systems">
            <organization></organization>
          </author>

          <date month="November" year="2007" />
        </front>
      </reference>

      <reference anchor="ACM01">
        <front>
          <title>Archieving sub-second IGP convergence in large IP networks,
          ACM SIGCOMM Computer Communication Review, v.35 n.3</title>

          <author fullname="Pierre Francois">
            <organization></organization>
          </author>

          <author fullname="Clarence Filsfils">
            <organization></organization>

            <address>
              <postal>
                <street></street>

                <city></city>

                <region></region>

                <code></code>

                <country></country>
              </postal>

              <phone></phone>

              <facsimile></facsimile>

              <email></email>

              <uri></uri>
            </address>
          </author>

          <author fullname="John Evans">
            <organization></organization>

            <address>
              <postal>
                <street></street>

                <city></city>

                <region></region>

                <code></code>

                <country></country>
              </postal>

              <phone></phone>

              <facsimile></facsimile>

              <email></email>

              <uri></uri>
            </address>
          </author>

          <author fullname="Olivier Bonaventure">
            <organization></organization>

            <address>
              <postal>
                <street></street>

                <city></city>

                <region></region>

                <code></code>

                <country></country>
              </postal>

              <phone></phone>

              <facsimile></facsimile>

              <email></email>

              <uri></uri>
            </address>
          </author>

          <date month="July" year="2005" />
        </front>
      </reference>

      <!-- A reference written by by an organization not a person. -->
    </references>

    <!-- Change Log

v00 2006-03-15  EBD   Initial version

v01 2006-04-03  EBD   Moved PI location back to position 1 -
                      v3.1 of XMLmind is better with them at this location.
v02 2007-03-07  AH    removed extraneous nested_list attribute,
                      other minor corrections
v03 2007-03-09  EBD   Added comments on null IANA sections and fixed heading capitalization.
                      Modified comments around figure to reflect non-implementation of
                      figure indent control.  Put in reference using anchor="DOMINATION".
                      Fixed up the date specification comments to reflect current truth.
v04 2007-03-09 AH     Major changes: shortened discussion of PIs,
                      added discussion of rfc include.
v05 2007-03-10 EBD    Added preamble to C program example to tell about ABNF and alternative 
                      images. Removed meta-characters from comments (causes problems).  -->
  </back>
</rfc>
PAFTECH AB 2003-2026
2026-04-22 08:43:51