One document matched: draft-ietf-rtgwg-mrt-frr-architecture-02.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
     which is available here: http://xml.resource.org. -->



<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
     There has to be one entity for each item to be referenced. 
     An alternate method (rfc include) is described in the references. -->

<!ENTITY I-D.ietf-rtgwg-remote-lfa SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-rtgwg-remote-lfa.xml">
<!ENTITY RFC5714 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5714.xml">
<!ENTITY RFC5384 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5384.xml">
<!ENTITY RFC5715 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5715.xml">
<!ENTITY RFC5286 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5286.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY I-D.ietf-rtgwg-ipfrr-notvia-addresses SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-rtgwg-ipfrr-notvia-addresses.xml">
<!ENTITY I-D.ietf-rtgwg-lfa-applicability SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-rtgwg-lfa-applicability.xml">
<!ENTITY I-D.ietf-rtgwg-ordered-fib SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-rtgwg-ordered-fib.xml">

<!ENTITY I-D.enyedi-rtgwg-mrt-frr-algorithm SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.enyedi-rtgwg-mrt-frr-algorithm.xml">
<!ENTITY I-D.atlas-rtgwg-mrt-mc-arch SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.atlas-rtgwg-mrt-mc-arch.xml">
<!ENTITY I-D.ietf-pim-mtid SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-pim-mtid.xml">
<!ENTITY I-D.ietf-mpls-ldp-multi-topology SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mpls-ldp-multi-topology.xml">

]>





<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
     please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
     (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="no" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
     (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-ietf-rtgwg-mrt-frr-architecture-02" ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
     ipr values: full3667, noModification3667, noDerivatives3667
     you can add the attributes updates="NNNN" and obsoletes="NNNN" 
     they will automatically be output with "(if approved)" -->





  <!-- ***** FRONT MATTER ***** -->

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the 
         full title is longer than 39 characters -->

    <title abbrev="MRT Unicast FRR Architecture">An Architecture for IP/LDP Fast-Reroute Using Maximally Redundant Trees</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Alia Atlas" initials="A.K.A." role="editor" surname="Atlas">
     <organization>Juniper Networks</organization>
     <address>
       <postal>
         <street>10 Technology Park Drive</street>
         <city>Westford</city>
         <region>MA</region>
         <code>01886</code>
         <country>USA</country>
       </postal>
       <email>akatlas@juniper.net</email>
      </address>
    </author>

    <author fullname="Robert Kebler" initials="R.K." surname="Kebler">
     <organization>Juniper Networks</organization>
     <address>
       <postal>
         <street>10 Technology Park Drive</street>
         <city>Westford</city>
         <region>MA</region>
         <code>01886</code>
         <country>USA</country>
       </postal>
       <email>rkebler@juniper.net</email>
      </address>
    </author>

    <author fullname="Gábor Sándor Enyedi" initials="G.S.E." surname="Enyedi">
      <organization>Ericsson</organization>
      <address>
        <postal>
          <street>Konyves Kalman krt 11.</street>
          <city>Budapest</city>
          <country>Hungary</country>
          <code>1097</code>
        </postal>
        <email>Gabor.Sandor.Enyedi@ericsson.com</email>
     </address>
    </author>

    <author fullname="András Császár" initials="A.C." surname="Császár">
      <organization>Ericsson</organization>
      <address>
        <postal>
          <street>Konyves Kalman krt 11</street>
          <city>Budapest</city>
          <country>Hungary</country>
          <code>1097</code>
        </postal>
        <email>Andras.Csaszar@ericsson.com</email>
     </address>
    </author>

   <author fullname="Jeff Tantsura" initials="J.T." surname="Tantsura">
   <organization>Ericsson</organization>
   <address>
     <postal>
       <street>300 Holger Way</street>
       <city>San Jose</city>
         <region>CA</region>
         <code>95134</code>
         <country>USA</country>       
     </postal>
     <email>jeff.tantsura@ericsson.com</email>
   </address>
   </author>

    <author fullname="Maciek Konstantynowicz" initials="M.K." surname="Konstantynowicz">
     <organization>Cisco Systems</organization>
     <address>
       <email>maciek@bgp.nu</email>
      </address>
    </author>

    <author fullname="Russ White" initials="R.W." surname="White">
      <organization>Verisign</organization>
      <address>
        <postal>
           <street>12061 Bluemont Way</street>
           <city>Reston</city>
           <region>VA</region>
           <code>20190</code>
           <country>USA</country>       
        </postal>
        <email>riwhite@verisign.com</email>
      </address>
    </author>

    <author fullname="Mike Shand" initials="M.S." surname="Shand">
      <address>
        <email>mike@mshand.org.uk</email>
      </address>
    </author>

    <date year="2013" />

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill 
         in the current day for you. If only the current year is specified, xml2rfc will fill 
	 in the current day and month for you. If the year is not the current one, it is 
	 necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the 
	 purpose of calculating the expiry date).  With drafts it is normally sufficient to 
	 specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>Routing</area>

    <workgroup>Routing Area Working Group</workgroup>

    <abstract>

      <t>As IP and LDP Fast-Reroute are increasingly deployed, the
      coverage limitations of Loop-Free Alternates are seen as a
      problem that requires a straightforward and consistent solution
      for IP and LDP, for unicast and multicast.  This draft describes
      an architecture based on redundant backup trees where a single
      failure can cut a point-of-local-repair from the destination
      only on one of the pair of redundant trees.</t>

      <t>One innovative algorithm to compute such topologies is maximally
      disjoint backup trees.  Each router can compute its next-hops
      for each pair of maximally disjoint trees rooted at each node in
      the IGP area with computational complexity similar to that
      required by Dijkstra.</t>
      
      <t>The additional state, address and computation requirements are
      believed to be significantly less than the Not-Via architecture
      requires.</t>
    </abstract>
  </front>










  <middle>
    <section title="Introduction">
      <t>There is still work required to completely provide IP and LDP
      Fast-Reroute<xref target="RFC5714"/> for unicast and multicast
      traffic.  This draft proposes an architecture to provide 100%
      coverage for unicast traffic.  The associated multicast
      architecture is described in <xref
      target="I-D.atlas-rtgwg-mrt-mc-arch"/>.</t>

      <t>Loop-free alternates (LFAs)<xref target="RFC5286"/> provide a
      useful mechanism for link and node protection but getting
      complete coverage is quite hard.  <xref target="LFARevisited"/>
      defines sufficient conditions to determine if a network provides
      link-protecting LFAs and also proves that augmenting a network
      to provide better coverage is NP-hard. <xref
      target="I-D.ietf-rtgwg-lfa-applicability"/> discusses the
      applicability of LFA to different topologies with a focus on
      common PoP architectures.</t>

      <t>While Not-Via <xref
      target="I-D.ietf-rtgwg-ipfrr-notvia-addresses"/> is defined as
      an architecture, in practice, it has proved too complicated and
      stateful to spark substantial interest in implementation or
      deployment.  Academic implementations <xref
      target="LightweightNotVia"/> exist and have found the address
      management complexity high (but no standardization has been done
      to reduce this).</t>

      <t>A different approach is needed and that is what is described
      here.  It is based on the idea of using disjoint backup
      topologies as realized by Maximally Redundant Trees (described in
      <xref target="LightweightNotVia"/>); the general
      architecture can also apply to future improved redundant tree
      algorithms.</t>

<section title="Goals for Extending IP Fast-Reroute coverage beyond LFA">

<t>Any scheme proposed for extending IPFRR network topology coverage
beyond LFA, apart from attaining basic IPFRR properties, should also
aim to achieve the following usability goals:</t>

<t><list style="symbols">

<t>ensure maximum physically feasible link and node disjointness
regardless of topology,</t>

<t>automatically compute backup next-hops based on the topology
information distributed by link-state IGP,</t>

<t>do not require any signaling in the case of failure and use
pre-programmed backup next-hops for forwarding,</t>

<t>introduce minimal amount of additional addressing and state on
routers,</t>

<t>enable gradual introduction of the new scheme and backward
compatibility,</t>

<t>and do not impose requirements for external computation.</t>
</list></t>

</section>

    </section><!-- End of Introduction !-->






<section title="Terminology">
<t><list style="hanging">

     <t hangText="2-connected: ">A graph that has no cut-vertices.
     This is a graph that requires two nodes to be removed before the
     network is partitioned.</t>

     <t hangText="2-connected cluster: ">A maximal set of nodes that
     are 2-connected.</t>

     <t hangText="2-edge-connected: ">A network graph where at least
     two links must be removed to partition the network.</t>

     <t hangText="ADAG: ">Almost Directed Acyclic Graph - a graph
     that, if all links incoming to the root were removed, would be a
     DAG.</t>

     <t hangText="block: ">Either a 2-connected cluster, a cut-edge,
     or an isolated vertex.</t>

     <t hangText="cut-link: ">A link whose removal partitions the
     network.  A cut-link by definition must be connected between two
     cut-vertices.  If there are multiple parallel links, then they
     are referred to as cut-links in this document if removing the set
     of parallel links would partition the network. </t>

     <t hangText="cut-vertex: ">A vertex whose removal partitions the
     network.</t>

     <t hangText="DAG: ">Directed Acyclic Graph - a graph where all
     links are directed and there are no cycles in it.</t>

     <t hangText="GADAG: ">Generalized ADAG - a graph that is
     the combination of the ADAGs of all blocks.</t>

     <t hangText="Maximally Redundant Trees (MRT): ">A pair of trees
     where the path from any node X to the root R along the first tree
     and the path from the same node X to the root along the second
     tree share the minimum number of nodes and the minimum number of
     links.  Each such shared node is a cut-vertex.  Any shared links
     are cut-links.  Any RT is an MRT but many MRTs are not RTs.</t>

     <t hangText="network graph: ">A graph that reflects the network
     topology where all links connect exactly two nodes and broadcast
     links have been transformed into the standard pseudo-node
     representation.</t>

     <t hangText="Redundant Trees (RT): ">A pair of trees where the
     path from any node X to the root R along the first tree is
     node-disjoint with the path from the same node X to the root
     along the second tree.  These can be computed in 2-connected
     graphs.</t>

</list></t>


</section>
    <section title="Maximally Redundant Trees (MRT)">

<t>In the last few years, there's been substantial research on how to
compute and use redundant trees. Redundant trees are directed spanning
trees that provide disjoint paths towards their common root.  These
redundant trees only exist and provide link protection if the network
is 2-edge-connected and node protection if the network is 2-connected.
Such connectiveness may not be the case in real networks, either due
to architecture or due to a previous failure.  The work on maximally
redundant trees has added two useful pieces that make them ready for
use in a real network.</t>

<t><list style="symbols">

<t>Computable regardless of network topology: The maximally redundant
trees are computed so that only the cut-edges or cut-vertices are
shared between the multiple trees.</t>

<t>Computationally practical algorithm is based on a common network
topology database. Algorithm variants can compute in O( e) or O(e + n
log n), as given in <xref target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>.</t>

</list></t>

<t>There is, of course, significantly more in the literature related to
redundant trees and even fast-reroute, but the formulation of the
Maximally Redundant Trees (MRT) algorithm makes it very well suited to
use in routers.</t>

<t>A known disadvantage of MRT, and redundant trees in general, is
that the trees do not necessarily provide shortest detour paths.  The
use of the shortest-path-first algorithm in tree-building and
including all links in the network as possibilities for one path or
another should improve this.  Modeling is underway to investigate and
compare the MRT alternates to the optimal <xref
target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>.  Providing shortest
detour paths would require failure-specific detour paths to the
destinations, but the state-reduction advantage of MRT lies in the
detour being established per destination (root) instead of per
destination AND per failure.</t>

<t>The specific algorithms to compute MRTs as well as the logic behind
that algorithm and alternative computational approaches are given in
detail in <xref target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>.  Those
interested are highly recommended to read that document.  This
document describes how the MRTs can be used and not how to compute
them.</t>

<t>The most important thing to understand about MRTs is that for each
pair of destination-routed MRTs, there is a path from every node X to
the destination D on the Blue MRT that is as disjoint as possible from
the path on the Red MRT.  The two paths along the two MRTs to a given
destination-root of a 2-connected graph are node-disjoint and
link-disjoint, while in any non-2-connected graph, only the
cut-vertices and cut-edges can be contained by both of the paths.</t>

<t>For example, in <xref target="fig_example_2_connected"/>, there is
a network graph that is 2-connected in (a) and associated MRTs in (b)
and (c).  One can consider the paths from B to R; on the Blue MRT, the
paths are B->F->D->E->R or B->C->D->E->R.  On
the Red MRT, the path is B->A->R.  These are clearly link and
node-disjoint.  These MRTs are redundant trees because the paths are
disjoint.</t>

<figure anchor="fig_example_2_connected" title="A 2-connected Network" align="center">
<artwork align="center"><![CDATA[
[E]---[D]---|           [E]<--[D]<--|                [E]-->[D]---|
 |     |    |            |     ^    |                       |    |
 |     |    |            V     |    |                       V    V
[R]   [F]  [C]          [R]   [F]  [C]               [R]   [F]  [C]
 |     |    |                  ^    ^                 ^     |    |
 |     |    |                  |    |                 |     V    |
[A]---[B]---|           [A]-->[B]---|                [A]---[B]<--|

      (a)                     (b)                         (c)
a 2-connected graph     Blue MRT towards R          Red MRT towards R
]]></artwork>
</figure>

<t>By contrast, in <xref target="Non-2-connected_Network_Example"/>,
the network in (a) is not 2-connected.  If F, G or the link F<->G
failed, then the network would be partitioned.  It is clearly
impossible to have two link-disjoint or node-disjoint paths from G, I
or J to R.  The MRTs given in (b) and (c) offer paths that are as
disjoint as possible.  For instance, the paths from B to R are the
same as in <xref target="fig_example_2_connected"/> and the path from
G to R on the Blue MRT is G->F->D->E->R and on the Red MRT
is G->F->B->A->R.</t>

<figure anchor="Non-2-connected_Network_Example"
  title="A non-2-connected network" align="center">
<artwork align="center"><![CDATA[

                 [E]---[D]---|
                  |     |    |     |----[I]
                  |     |    |     |     |
                 [R]---[C]  [F]---[G]    |
                  |     |    |     |     |
                  |     |    |     |----[J]
                 [A]---[B]---|
    
                             (a)
                   a non-2-connected graph

  [E]<--[D]<--|                        [E]-->[D]---|                
   |     ^    |          [I]                  |    |          [I]    
   V     |    |           ^                   V    V           |     
  [R]<--[C]  [F]<--[G]    |            [R]---[C]  [F]<--[G]    |     
         ^    ^     |     |             ^     |    |     ^     V     
         |    |     |--->[J]            |     V    |     |----[J]     
  [A]-->[B]---|                        [A]<--[B]<--|                 

              (b)                                    (c)
       Blue MRT towards R                    Red MRT towards R

]]></artwork>
</figure>

</section>

<section anchor="mrt_and_frr" title="Maximally Redundant Trees (MRT) and Fast-Reroute">

<t>In normal IGP routing, each router has its shortest-path-tree to
all destinations.  From the perspective of a particular destination,
D, this looks like a reverse SPT (rSPT).  To use maximally redundant
trees, in addition, each destination D has two MRTs associated with
it; by convention these will be called the blue and red MRTs.</t>

<t>Any IP/LDP fast-reroute technique beyond LFA requires an additional
dataplane procedure, such as an additional forwarding mechanism.  The
well-known options are tunneling (e.g. <xref
target="I-D.ietf-rtgwg-ipfrr-notvia-addresses"/> or <xref
target="I-D.ietf-rtgwg-remote-lfa"/>), per-interface forwarding
(e.g. Loop-Free Failure Insensitive Routing in <xref
target="EnyediThesis"/>), and multi-topology forwarding.  MRT is
realized by using multi-topology forwarding.  There is a Blue MRT
forwarding topology and a Red MRT forwarding topology.</t>

<t>MRTs are practical to maintain redundancy even after a single link
or node failure. If a pair of MRTs is computed rooted at each
destination, all the destinations remain reachable along one of the
MRTs in the case of a single link or node failure.</t>

<t>When there is a link or node failure affecting the rSPT, each node
will still have at least one path via one of the MRTs to reach the
destination D.  For example, in <xref
target="Non-2-connected_Network_Example"/>, C would normally forward
traffic to R across the C<->R link.  If that C<->R link
fails, then C could use either the Blue MRT path C->D->E->R
or the Red MRT path C->B->A->R.</t>

<t>As is always the case with fast-reroute technologies, forwarding
does not change until a local failure is detected.  Packets are
forwarded along the shortest path.  The appropriate alternate to use
is pre-computed.  <xref target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>
describes exactly how to determine whether the Blue MRT next-hops or
the Red MRT next-hops should be the MRT alternate next-hops for a
particular primary next-hop N to a particular destination D.</t>

<t>MRT alternates are always available to use, unless the network has
been partitioned.  It is a local decision whether to use an MRT
alternate, a Loop-Free Alternate or some other type of alternate.
When a network needs to use a micro-loop prevention mechanism <xref
target="RFC5715"/> such as Ordered FIB<xref
target="I-D.ietf-rtgwg-ordered-fib"/> or Farside Tunneling<xref
target="RFC5715"/>, then the whole IGP area needs to have alternates
available so that the micro-loop prevention mechanism, which requires
slower network convergence, can take the necessary time without
impacting traffic badly.</t>

<t>As described in <xref target="RFC5286"/>, when a worse failure than
is anticipated happens, using LFAs that are not downstream neighbors
can cause micro-looping.  An example is given of link-protecting
alternates causing a loop on node failure.  Even if a worse failure
than anticipated happened, the use of MRT alternates will not cause
looping.  Therefore, while node-protecting LFAs may be prefered, an
the certainty that no alternate-induced looping will occur is an
advantage of using MRT alternates when the available node-protecting
LFA is not a downstream path.</t>

</section>
     
<section anchor="sec_uni_forwarding" title="Unicast Forwarding with MRT Fast-Reroute">
<t>With LFA, there is no need to tunnel unicast traffic, whether IP or
LDP.  The traffic is simply sent to an alternate.  As mentioned
earlier in <xref target="mrt_and_frr"/>, MRT needs multi-topology
forwarding.  Unfortunately, neither IP nor LDP provide extra bits for
a packet to indicate its topology.</t>

<t>Once the MRTs are computed, the two sets of MRTs are seen by the
forwarding plane as essentially two additional topologies.  The same
considerations apply for forwarding along the MRTs as for handling
multiple topologies.</t>  


<section anchor="sec_ldp_uni_forward" title="LDP Unicast Forwarding - Avoid Tunneling">
<t>For LDP, it is very desirable to avoid tunneling because, for at
least node protection, tunneling requires knowledge of remote LDP
label mappings and thus requires targeted LDP sessions and the
associated management complexity.  There are two different mechanisms
that can be used.</t>

<t><list style="numbers">

   <t> Option A - Encode MT-ID in Labels: In addition to sending a
   single label for a FEC, a router would provide two additional
   labels with the MT-IDs associated with the Blue MRT or Red MRT
   forwarding topologies.  This is very simple for hardware
   support. It does reduce the label space for other uses.  It also
   increases the memory to store the labels and the communication
   required by LDP.</t>

   <t>Option B - Create Topology-Identification Labels: Use the
   label-stacking ability of MPLS and specify only two additional
   labels - one for each associated MRT color - by a new FEC type.
   When sending a packet onto an MRT, first swap the LDP label and
   then push the topology-identification label for that MRT color.
   When receiving a packet with a topology-identification label, pop
   it and use it to guide the next-hop selection in combination with
   the next label in the stack; then swap the remaining label, if
   appropriate, and push the topology-identification label for the
   next-hop.  This has minimal usage of additional labels, memory and
   LDP communication.  It does increase the size of packets and the
   complexity of the required label operations and look-ups.  This can
   use the same mechanisms as are needed for context-aware label
   spaces.</t> </list></t>

<t>Note that with LDP unicast forwarding, regardless of whether
topology-identification label or encoding topology in label is used,
no additional loopbacks per router are required. This is because LDP
labels are used on a hop-by-hop basis to identify MRT-blue and MRT-red
forwading topologies.</t>

<t>For greatest hardware compatibility, routers implementing MRT LDP
fast-reroute MUST support Option A of encoding the MT-ID in the
labels.  The extensions to indicate an MT-ID for a FEC are described
in Section 3.2.1 of <xref target="I-D.ietf-mpls-ldp-multi-topology"/>
</t>

</section>

<section title="IP Unicast Traffic">

<t>For IP, there is no currently practical alternative except
tunneling.  The tunnel egress could be the original destination in the
area, the next-next-hop, etc..  If the tunnel egress is the original
destination router, then the traffic remains on the redundant tree
with sub-optimal routing.  If the tunnel egress is the next-next-hop,
then protection of multi-homed prefixes and node-failure for ABRs is
not available.  Selection of the tunnel egress is a router-local
decision.</t>

<t>There are three options available for marking IP packets with which
MRT it should be forwarded in.</t> 

<t><list style="numbers"> 

<t>Tunnel IP packets via an LDP LSP. This has the advantage that more
installed routers can do line-rate encapsulation and decapsulation.
Also, no additional IP addresses would need to be allocated or
signaled.

<list style="letters"> 

<t>Option A - LDP Destination-Topology Label: Use a label that
indicates both destination and MRT.  This method allows easy tunneling
to the next-next-hop as well as to the IGP-area destination.  For a
proxy-node, the destination to use is the non-proxy-node immediately
before the proxy-node on that particular color MRT.</t>

<t>Option B - LDP Topology Label: Use a Topology-Identifier label on
top of the IP packet.  This is very simple.  If tunneling to a
next-next-hop is desired, then a two-deep label stack can be used with
[ Topology-ID label, Next-Next-Hop Label ].</t> </list></t>

<t>Tunnel IP packets in IP.  Each router supporting this option would
announce two additional loopback addresses and their associated MRT
color. Those addresses are used as destination addresses for MRT-blue
and MRT-red IP tunnels respectively. They allow the transit nodes to
identify the traffic as being forwarded along either MRT-blue or
MRT-red tree topology to reach the tunnel destination. Announcements
of these two additional loopback addresses per router with their MRT
color requires IGP extensions.</t>
</list></t>

<t>For greatest hardware compatibility and ease in removing the
MRT-topology marking at area/level boundaries, routers that support
MPLS and implement IP MRT fast-reroute SHOULD support Option A - using
an LDP label that indicates the destination and MT-ID.</t>

<t> For proxy-nodes associated with one or more multi-homed prefixes,
there is no router associated with the proxy-node, so its loopbacks
can't be known or used.  Instead, the loopback addresses of the
routers that are attached to the proxy-node can be used.  One of those
routers will be on the Red MRT and the other on the Blue MRT.  The
MRT-red loopback of the first router would be used to reach the router
on the Red MRT and similarly the MRT-blue loopback of the second
router would be used.  The routers connected to the proxy-node are the
end of the area/level and can decapsulate the traffic and properly
forward it into the next area.</t>

</section>

</section>

<section title="Protocol Extensions and Considerations: OSPF and ISIS">

<t>There are two possible approaches to what additional information to
distribute in the IGP.  The first is to allow full flexibility in all
information and distribute whichever values and combinations are
desired.  The second is to simply distribute flags indicating a
particular well-known profile is supported.  Thus the MRT Island
Creation process is trivial.  The profile approach is recommended,
with the added flexibility of being able to specify more specific
information if necessary and supported.</t>

<t> For example, a simple profile "metric-insensitive MRT unicast
fast-reroute via LDP" could specify:
<list style="hanging">
  <t hangText="MRT Island Creation: ">Only include other routers
  advertising this profile.</t>

  <t hangText="MRT Algorithm ID: ">The MRT Lowpoint algorithm defined
  in <xref target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>.</t>

  <t hangText="Red MRT MT-ID: ">The Red MRT MT-ID is the single
  well-known value allocated by IANA from the OSPF, ISIS, LDP and PIM
  MT-ID spaces.</t>

  <t hangText="Blue MRT MT-ID: ">The Blue MRT MT-ID is the single
  well-known value allocated by IANA from the OSPF, ISIS, LDP and PIM
  MT-ID spaces.</t>

  <t hangText="GADAG Root Election Priority: ">Pick the router with
  the lowest router ID to be the GADAG root.</t>

  <t hangText="Forwarding Mechanisms for IP: ">Use IP-in-LDP.</t>

  <t hangText="MRT Capabilities: ">Computes MRTs, IP Fast-Reroute, LDP
  Fast-Reroute</t>
</list></t>

<t>The following captures an initial understanding of the aspects that
must be considered to fully form a profile to advertise.  For some
profiles, associated information may need to be distributed, such as
GADAG Root Election Priority, Red MRT Loopback Address, Blue MRT
Loopback Address, or MRT Algorithm ID.</t>

<t><list style="hanging">

  <t hangText="MRT Island Creation ID: ">This identifies the process
  that the router uses to form an MRT Island.  By advertising an ID
  for the process, it is possible to have different processes in the
  future.  It may be desirable to advertise a list ordered by
  preference to allow transitions.</t>

  <t hangText="MRT Algorithm ID: ">This identifies the particular MRT
  algorithm used by the router.  By having an Algorithm ID, it is
  possible to change the algorithm used or use different ones in
  different networks.  It may be desirable to advertise a list ordered
  by preference to allow transitions.</t>

  <t hangText="Red MRT MT-ID: ">This specifies the MT-ID to be
  associated with the Red MRT forwarding topology.  It is needed for
  use in signaling.  All routers in the MRT Island MUST agree on a value.</t>

  <t hangText="Blue MRT MT-ID: ">This specifies the MT-ID to be
  associated with the Blue MRT forwarding topology.  It is needed for
  use in signaling.  All routers in the MRT Island MUST agree on a value.</t>

  <t hangText="GADAG Root Election Priority: ">This specifies the
  priority of the router for being used as the GADAG root of its
  island.  A GADAG root is elected from the set of routers with the
  highest priority; ties are broken based upon highest Router ID.  The
  sensitivity of the MRT Algorithms to GADAG root selection is still
  being evaluated.  This provides the network operator with a knob to
  force particular GADAG root selection.</t>

  <t hangText="Forwarding Mechanism for IP: ">This specifies which
  forwarding mechanisms the router supports for IP traffic.  An MRT
  island must support a common set of forwarding mechanisms, which may
  be less than the full set advertised.  Multiple forwarding
  mechanisms may be specified, such as IP-in-IPv4, IP-in-IPv6 or
  IP-in-LDP Label.  None is also an option.</t>

  <t hangText="Red MRT Loopback Address: ">This provides the router's
  loopback address to reach the router via the Red MRT forwarding
  topology.  It can, of course, be specified for both IPv4 and IPv6.</t>

  <t hangText="Blue MRT Loopback Address: ">This provides the router's
  loopback address to reach the router via the Blue MRT forwarding
  topology.  It can, of course, be specified for both IPv4 and IPv6.</t>

  <t hangText="MRT Capabilities Available: ">This is the set of
  capabilities that the router is configured to support.</t>

  <t hangText="MRT Capabilities Required: ">This is the set of
  capabilities that other routers must have available to be added into
  the MRT island.</t>

  <t hangText="MRT Capability: Computes MRTs:  ">The router can compute MRTs.</t>

  <t hangText="MRT Capability: IP Fast-Reroute: ">The router can use the
  computed MRTs for IP fast-reroute.</t>

  <t hangText="MRT Capability: LDP Fast-Reroute: ">The router can use the
  computed MRTs for LDP fast-reroute.</t>

  <t hangText="MRT Capability: PIM Fast-Reroute: ">The router can use
  the computed MRTs for PIM fast-reroute.</t>

  <t hangText="MRT Capability: mLDP Fast-Reroute: ">The router can use
  the computed MRTs for mLDP fast-reroute.</t>

  <t hangText="MRT Capability: PIM Global Protection: ">The router can use
  the computed MRTs for PIM Global Protection 1+1.</t>

  <t hangText="MRT Capability: mLDP Global Protection: ">The router can use
  the computed MRTs for mLDP Global Protection 1+1.</t>
</list></t>

<t>The assumption is that a router will form an MRT island, compute
MRTs within that island, and then use those MRTs for the purposes
specified in the profile.  If multiple profiles are supported with
different purposes (e.g. mLDP Global Protection), then the router may
use a different profile and associated MRT island to be used for the
purposes in that different profile.  If a router wanted to form
multiple MRT islands for different application purposes, that could be
done by specifying different Red MRT MT-ID and Blue MRT MT-IDs. </t>

<t>As with LFA, it is expected that OSPF Virtual Links will not be supported.</t>
</section>

<section title="Protocol Extensions and considerations: LDP">

<t>Capability negotiation in LDP is needed to indicate support for
MRT; having this explicit allows the use of MRT-specific signaling
extensions.  A router also needs to indicate, via FEC advertisement,
whether it supports LDP Destination-Topology Labels, LDP Topology
Labels, or both.  Since the label or labels are swapped at each LSR,
consistency across the network is not required.</t>

<t>If both mechanisms are supported, then if a Destination-Topology
label is provided for a FEC, that should be used so that an ABR/LBR
can indicate the appropriate labels, as discussed in Section <xref
target= "sec_abr_forwarding"/>.</t>

</section>

<section title="Multi-homed Prefixes">

<t>One advantage of LFAs that is necessary to preserve is the ability
to protect multi-homed prefixes against ABR failure.  For instance, if
a prefix from the backbone is available via both ABR A and ABR B, if A
fails, then the traffic should be redirected to B. This can also be
done for backups via MRT.</t>

<t>This generalizes to any multi-homed prefix.  A multi-homed prefix
could be:
<list style="symbols">
<t>An out-of-area prefix announced by more than one ABR,</t>
<t>An AS-External route announced by 2 or more ASBRs,</t>
<t>A prefix with iBGP multipath to different ASBRs,</t>
<t>etc.</t>
</list></t>

<t>For each prefix, the attached ABRs are selected and a proxy-node is
created connected to those ABRs.  If there exist multiple multi-homed
prefixes that share the same connectivity and costs to each of those
ABRs, then a single proxy-node can be used to represent the set.  An
example of this is shown in <xref target="fig_abr_mhp"/>.</t>

<figure anchor="fig_abr_mhp" 
title="Prefixes Advertised by Multiple ABRs" align="center">
<artwork align="center"><![CDATA[
        2    2                     2     2
      A----B----C                A----B----C          
    2 |         | 2            2 |         | 2
      |         |                |         |           
    [ABR1]    [ABR2]           [ABR1]    [ABR2]
      |         |                |         |
     p,10      p,15           10 |---[P]---| 15

    (a) Initial topology         (b)with proxy-node

    A<---B<---C                 A--->B--->C          
    |         ^                 ^         | 
    V         |                 |         V           
  [ABR1]    [ABR2]            [ABR1]    [ABR2]
    |                                     |
    |-->[P]                         [P]<--| 

    (c) Blue MRT                (d) Red MRT

]]></artwork>
</figure>

<t>The proxy-nodes and associated links are added to the network
topology after all real links have been assigned to a direction and
before the actual MRTs are computed. Proxy-nodes cannot be transited
when computing the MRTs.  In addition to computing the pair of MRTs
associated with each router destination D in the area, a pair of MRTs
can be computed for each such proxy-node to fully protect against ABR
failure. </t>

<t>Each ABR or attaching router must remove the MRT marking[see <xref
target="sec_uni_forwarding"/>] and then forward the traffic outside of
the area (or island of MRT-fast-reroute-supporting routers).</t>

<t>If ASBR protection is desired, this has additonal complexities if
the ASBRs are in different areas.  Similarly, protecting labeled BGP
traffic in the event of an ASBR failure has additional complexities
due to the per-ASBR label spaces involved.</t>

</section>

<section anchor= "sec_abr_forwarding" title="Inter-Area and ABR Forwarding Behavior">

<t>In regular forwarding, packets destined outside the area arrive at
the ABR and the ABR forwards them into the other area because the
next-hops from the area with the best route (according to tie-breaking
rules) are used by the ABR.  The question is then what to do with
packets marked with an MRT that are received by the ABR.</t>

<t>For unicast fast-reroute, the need to stay on an MRT forwarding
topology terminates at the ABR/LBR whose best route is via a different
area/level.  It is highly desirable to go back to the default
forwarding topology when leaving an area/level.  There are three basic
reasons for this.  First, the default topology uses shortest paths;
the packet will thus take the shortest possible route to the
destination.  Second, this allows failures that might appear in
multiple areas (e.g. ABR/LBR failures) to be separately identified and
repaired around.  Third, the packet can be fast-rerouted again, if
necessary, due to a failure in a different area.</t>

<t>An ABR/LBR that receives a packet marked with an MRT towards a
destination in another area/level should forward the MRT marked packet
in the area/level with the best route along its associated MRT.  If
the packet came from that area/level, this correctly avoids the
failure.</t>

<t>How does an ABR/LBR ensure that MRT-marked packets do not arrive at
the ABR/LBR?  There are two different mechanisms depending upon the
forwarding mechanism being used.</t>

<t>If the LDP label encodes the MT-ID as well as the destination, then
the ABR/LBR is responsible for advertising a particular label to each
neighbor.  Additionally, an LDP label is associated with an MT-ID due
to the MT FEC that was used and not due to any intrisic particular
value for the label.  Assume that an ABR/LBR has allocated three
labels for a particular destination; those labels are L_primary,
L_blue, and L_red.  When the ABR/LBR advertises label bindings to
routers in the area with the best route to the destination, the
ABR/LBR provides L_primary for the default topology, L_blue for the
Blue MRT MT-ID and L_red for the Red MRT MT-ID, exactly as expected.
However, when the ABR/LBR advertises label bindings to routers in
other areas, the ABR/LBR advertises L_primary for the default
topology, for the Blue MRT MT-ID, and for the Red MRT MT-ID.  The
ABR/LBR installs next-hops from the best area for L_primary based on
the default topology, for L_blue based on the Blue MRT forwarding
topology, and for L_red based on the Red MRT forwarding topology.
Therefore, packets from the non-best area will arrive at the ABR/LBR
with a label L_primary and will be forwarded into the best area along
the default topology.  By controlling what labels are advertised, the
ABR/LBR can thus enforce that packets exiting the area do so on the
shortest-path default topology.</t>

<t>If IP-in-IP forwarding is used, then the ABR/LBR behavior is
dependent upon the outermost IP address.  If the outermost IP address
is an MRT loopback address of the ABR/LBR, then the packet is
decapsulated and forwarded based upon the inner IP address, which
should go on the default SPT topology.  If the outermost IP address is
not an MRT loopback address of the ABR/LBR, then the packet is simply
forwarded along the associated forwarding topology.  A PLR sending
traffic to a destination outside its local area/level will pick the
MRT and use the associated MRT loopback address of the ABR/LBR
immediately before the proxy-node on that MRT.</t>

<t>Thus, regardless of which of these two forwarding mechanisms are
used, there is no need for additional computation or per-area
forwarding state.</t>

<figure anchor="fig_abr_mrt" title="ABR Forwarding Behavior and MRTs"
align="center">
<artwork align="center"><![CDATA[
        
    +----[C]----     --[D]--[E]                --[D]--[E]
    |           \   /         \               /         \
p--[A] Area 10 [ABR1]  Area 0 [H]--p   +-[ABR1]  Area 0 [H]-+
    |           /   \         /        |      \         /   |
    +----[B]----     --[F]--[G]        |       --[F]--[G]   |
                                       |                    |
                                       | other              |
                                       +----------[p]-------+
                                         area 

      (a) Example topology        (b) Proxy node view in Area 0 nodes


                +----[C]<---       [D]->[E]
                V           \             \
             +-[A] Area 10 [ABR1]  Area 0 [H]-+
             |  ^           /             /   |
             |  +----[B]<---       [F]->[G]   V
             |                                |
             +------------->[p]<--------------+

               (c) rSPT towards destination p



          ->[D]->[E]                         -<[D]<-[E]
         /          \                       /         \
    [ABR1]  Area 0 [H]-+             +-[ABR1]         [H]
                   /   |             |      \         
            [F]->[G]   V             V       -<[F]<-[G]
                       |             |
                       |             |
             [p]<------+             +--------->[p]

  (d) Blue MRT in Area 0           (e) Red MRT in Area 0

]]></artwork>
</figure>

<t>The other potential forwarding mechanisms require additional
computation by the penultimate router along the in-local-area MRT
immediately before the ABR/LBR is reached.  The penultimate router can
determine that the ABR/LBR will forward the packet out of area/level
and, in that case, the penultimate router can remove the MRT marking
but still forward the packet along the MRT next-hop to reach the ABR.
For instance, in <xref target="fig_abr_mrt"/>, if node H fails, node E
has to put traffic towards prefix p onto the red MRT.  But since node
D knows that ABR1 will use a best from another area, it is safe for D
to remove the MRT marking and just send the packet to ABR1 still on
the red MRT but unmarked.  ABR1 will use the shortest path in Area 10.
</t>

<t>In all cases for ISIS and most cases for OSPF, the penultimate
router can determine what decision the adjacent ABR will make.  The
one case where it can't be determined is when two ASBRs are in
different non-backbone areas attached to the same ABR, then the ASBR's
Area ID may be needed for tie-breaking (prefer the route with the
largest OPSF area ID) and the Area ID isn't announced as part of the
ASBR link-state advertisement (LSA).  In this one case, suboptimal
forwarding along the MRT in the other area would happen.  If this is a
realistic deployment scenario, OSPF extensions could be
considered.</t>

</section>

<section title="Issues with Area Abstraction">

<t>MRT fast-reroute provides complete coverage in a area that is
2-connected.  Where a failure would partition the network, of course,
no alternate can protect against that failure.  Similarly, there are
ways of connecting multi-homed prefixes that make it impractical to
protect them without excessive complexity.</t>

<figure anchor="fig_mhp_areas" title="AS external prefixes in different areas">
<artwork align="center"><![CDATA[
       50
     |----[ASBR Y]---[B]---[ABR 2]---[C]      Backbone Area 0:
     |                                |           ABR 1, ABR 2, C, D
     |                                |
     |                                |       Area 20:  A, ASBR X
     |                                |
     p ---[ASBR X]---[A]---[ABR 1]---[D]      Area 10: B, ASBR Y
        5                                  p is a Type 1 AS-external

]]></artwork>
</figure>

<t>Consider the network in <xref target="fig_mhp_areas"/> and assume
there is a richer connective topology that isn't shown, where the same
prefix is announced by ASBR X and ASBR Y which are in different
non-backbone areas.  If the link from A to ASBR X fails, then an MRT
alternate could forward the packet to ABR 1 and ABR 1 could forward it
to D, but then D would find the shortest route is back via ABR 1 to
Area 20.  The only real way to get it from A to ASBR Y is to
explicitly tunnel it to ASBR Y.</t>

<t>Tunnelling to the backup ASBR is for future consideration.  The
previously proposed PHP approach needs to have an exception if BGP
policies (e.g. BGP local preference) determines which ASBR to use.
Consider the case in <xref target="fig_mhp_areas_bgp_pref"/>.  If the
link between A and ASBR X (the preferred border router) fails, A can
put the packets to p onto an MRT alternate, even tunnel it towards
ASBR Y.  Node B, however, must not remove the MRT marking in this
case, as nodes in Area 0, including ASBR Y itself would not know that
their preferred ASBR is down.</t>

<figure anchor="fig_mhp_areas_bgp_pref" 
title="Failure of path towards ASBR preferred by BGP">

<artwork align="center"><![CDATA[

              Area 20                    BB Area 0  
  p ---[ASBR X]-X-[A]---[B]---[ABR 1]---[D]---[ASBR Y]--- p

              BGP prefers ASBR X for prefix p

]]></artwork>
</figure>


<t>The fine details of how to solve multi-area external prefix cases,
or identifying certain cases as too unlikely and too complex to
protect is for further consideration.</t>

</section>

<section title="Partial Deployment and Islands of Compatible MRT FRR routers">
<t>A natural concern with new functionality is how to have it be
useful when it is not deployed across an entire IGP area.  In the case
of MRT FRR, where it provides alternates when appropriate LFAs aren't
available, there are also deployment scenarios where it may make sense
to only enable some routers in an area with MRT FRR.  A simple example
of such a scenario would be a ring of 6 or more routers that is
connected via two routers to the rest of the area.</t>

<t>First, a computing router S must determine its local island of
compatible MRT fast-reroute routers.  A router that has a common
profile flag and is connected either to S or to another router already
determined to be in S's local island can be added to S's local
island.</t>

<t>Destinations inside the local island can obviously use MRT
alternates.  Destinations outside the local island can be treated like
a multi-homed prefix with caveats to avoid looping.  For LDP labels
including both destination and topology, the routers at the borders of
the local island need to originate labels for the original FEC and the
associated MRT-specific labels.  Packets sent to an LDP label marked
as blue or red MRT to a destination outside the local island will have
the last router in the local island swap the label to one for the
destination and forward the packet along the outgoing interface on the
MRT towards a router outside the local island that was represented by
the proxy-node.</t>

<t>For IP in IP encapsulations, remote destinations' loopback
addresses for the MRTs cannot be used, even if they were available.
Instead, the MRT loopback address of the router attached to a
proxy-node, which represents destinations outside the local island,
can be used. Packets sent to the router's MRT loopback address will
have their outer IP header removed and will need to be explicitly
forwarded along the outgoing interface on the MRT towards a router
outside the local island that was represented by the proxy-node.  This
behavior requires essentially remembering the MT-ID indicated by the
outer IP address.  An alternate option would be to advertise different
loopback addresses to be associated with the proxy-node; the outer IP
address would still be removed but it would indicate the outgoing
interface to use and no lookup would be necessary on the internal IP
address while maintaining MT-ID context.</t>

<t>A key question is which routers outside the MRT island can packets
be forwarded to so that they are not forwarded back into the MRT
island.  An example of the necessary network graph transformations are
given in <xref target="fig_island_ext_dest"/>.  There are two parts to
the computation.  First, the MRT island is collapsed into a single
node; this assumes that the cost of transiting the MRT island is
nothing and is pessimistic but allows for simpler computation.  Then,
for each destination (other than the MRT island), the routers adjacent
to the MRT island are checked to see if they are loop-free with
respect to the MRT island and the destination.  The loop-free
neighbors of the MRT island that are closest to the destination are
selected.  Then, a graph of just the MRT island is augmented with
proxy-nodes that are attached via the outgoing interfaces to the
selected loop-free neighbors.  Finally, the MRTs rooted at each
proxy-node are computed on that augmented MRT island graph.
Essentially, the MRT island must have a loop-free neighbor to be able
to have an alternate.</t>

<figure anchor="fig_island_ext_dest" 
title="Computing alternates to destinations outside the MRT Island">

<artwork align="center"><![CDATA[

      [G]---[E]---(B)---(C)---(D)    
       | \   |     |           | 
       |  \  |     |           |
       |   \ |     |           |
      [H]---[F]---(A)---(S)----|

   (1) Network Graph with Partial Deployment

     [E],[F],[G],[H] :  No support for MRT-FRR
     (A),(B),(C),(D),(S):  MRT Island - supports MRT-FRR


 [G]---[E]----|                     |---(B)---(C)---(D)
  | \   |     |                     |    |           | 
  |  \  |  ( MRT Island )      [ proxy ] |           | 
  |   \ |     |                     |    |           | 
 [H]---[F]----|                     |---(A)---(S)----|

  (2) Graph for determining    (3) Graph for MRT computation
      loop-free neighbors

]]></artwork>
</figure>

<t>Naturally, there are more complicated options to improve coverage,
such as connecting multiple MRT islands across tunnels, but it is not
clear that the additional complexity is necessary.</t>


<!-- Andras: loop avoidance issues not yet solved? What if the packet
     after getting out of the island is forwarded back?
-->


<!-- Andras: 

<t>In an MRT island, a MHP proxy node should only be connected to
those MRT island nodes from which a shortest path that does not loop
back to the island leads to the destination prefix. If MRT deployment
is sparse, it may easily happen that such nodes do not exist in the
current MRT island.  In this case, the task is to tunnel the packet to
an outside node, a "candidate", which can forward it on the shortest
path.  We need to find redundant paths to these nodes.  This can be
done by creating an extended island by virtually connecting the
candidate to node in the island and by finding a pair of MRTs in this
extended area rooted to the candidate.  These candidate nodes should
be virtually added to the MRT as proxy nodes, but they should be
connected to MRT island nodes from which the remote repair node is
reachable on the shortest path without looping back to the island.</t>


<t>Observe that we need to discuss what the MRT island border node
needs to do, when the packet is leaving the island but should be
forwarded to an out-of-island candidate.  Recall, that such a packet
was forwarded along an MRT, and we need to switch back to the shortest
path when leaving the MRT island.  Therefore, the MRT island border
node must change the destination address or MPLS label to the one
describing forwarding along shortest path. This swapping is simple,
for LDP.  If however we are using pure IP, we need to change the
destination IP address in the outer encapsulation header to the
default address of the candidate.</t>

<t>Moreover, it is possible, that the candidate is not an MRT capable
node, so it does not have two extra IP addresses automatically.  In
this case the operator is may configure two additional IP addresses
for this legacy node, so that MRT capable routers consistently select
one as the default address, one as the address on the blue tree, and
one as the address on the red tree.  E.g., besides the default address
the operator configures two IP additional address that are higher than
the default. In this case, the smallest IP address is the default
address, the medium can be the red address while the highest can be
the blue address ? the main point is that each router should handle
these addresses consistently.</t>



<t>In order to further improve failure coverage, we can connect MRT
capable islands explicitly with tunnels.  This means that border
routers of islands may put the packet into an IP-in-IP tunnel or push
an MPLS label, and send them to another island.  The concept is
depicted in <xref target="fig_multi-island"/>.  There we have X and Y,
two MRT capable nodes, which can reach each other along the shortest
paths, using only non-MRT capable nodes in both directions.  In this
case we can build up a tunnel between them and consider them as
immediate neighbors.  This, trivially, unifies the two MRT capable
islands.  Uniting MRT capable islands gives us the possibility to
treat them as a single super island, and in this way give us
relatively high coverage.  Since now it is not important if the MRT
tunnell egress is in the same MRT capable island or not, if it is MRT
capable, we can find paths along the MRTs in the super island.</t>

<figure anchor="fig_multi-island" title="Unifying multiple MRT islands">
<artwork align="center"><![CDATA[
.....                                             ...........
    .....    Tunnell or LSP connecting two     ...          . ...
        .   MRT capable island border routers  .      MRT
MRT     .<====================================>.      island
island [X]---------[ ]----------[ ]-----------[Y]     #2
#1       .                                    .
        .                                     .
........                                       .....
                                                   ..............

]]></artwork>
</figure>
 
<t>Observe, however, that this kind of connection has its drawbacks. 
First, applying such tunnels adds another header to the packets, which
is an overhead; second, realizing such tunneling requires using
targeted LDP (or labeled iBGP) between island border routers to tell
other nodes which label the tunnell endpoint needs to put packets back
to the MRT LSP.  On the above figure, X needs the label used by Y for
forwarding packets not along the original LSP, but along one of the
MRTs.</t>

-->

</section>

<section title="Network Convergence and Preparing for the Next Failure">

<t>After a failure, MRT detours ensure that packets reach their
intended destination while the IGP has not reconverged onto the new
topology.  As link-state updates reach the routers, the IGP process
calculates the new shortest paths. Two things need attention:
micro-loop prevention and MRT re-calculation.</t>

<section title="Micro-forwarding loop prevention and MRTs">

<t>As is well known<xref target="RFC5715"/>, micro-loops can occur
during IGP convergence; such loops can be local to the failure or
remote from the failure.  Managing micro-loops is an orthogonal issue
to having alternates for local repair, such as MRT fast-reroute
provides.</t>

<t>There are two possible micro-loop prevention mechanism discussed in
<xref target="RFC5715"/>.  The first is Ordered FIB <xref
target="I-D.ietf-rtgwg-ordered-fib"/>.  The second is Farside
Tunneling which requires tunnels or an alternate topology to reach
routers on the farside of the failure.</t>

<t>Since MRTs provide an alternate topology through which traffic can
be sent and which can be manipulated separately from the SPT, it is
possible that MRTs could be used to support Farside Tunneling.
Details of how to do so are outside of this document.</t>

</section>

<section title="MRT Recalculation">

<t>When a failure event happens, traffic is put by the PLRs onto the
MRT topologies.  After that, each router recomputes its shortest path
tree (SPT) and moves traffic over to that.  Only after all the PLRs
have switched to using their SPTs and traffic has drained from the MRT
topologies should each router install the recomputed MRTs into the
FIBs.</t>

<t>At each router, therefore, the sequence is as follows:
<list style="numbers">
<t>Receive failure notification</t>
<t>Recompute SPT</t>
<t>Install new SPT</t>
<t>Recompute MRTs</t>
<t>Wait configured period for all routers to be using their SPTs and
traffic to drain from the MRTs.</t>
<t>Install new MRTs.</t>
</list></t>

<t>While the recomputed MRTs are not installed in the FIB, protection
coverage is lowered.  Therefore, it is important to recalculate the
MRTs and install them quickly.</t>

</section>

</section>


<section anchor="Acknowledgements" title="Acknowledgements">
  <t>The authors would like to thank Hannes Gredler, Jeff Tantsura,
  Ted Qian, Kishore Tiruveedhula, Santosh Esale, Nitin Bahadur, Harish
  Sitaraman and Raveendra Torvi for their suggestions and review.</t>

</section>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>This doument includes no request to IANA.</t>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>This architecture is not currently believed to introduce new security concerns.</t>
    </section>
  </middle>

  <!--  *****BACK MATTER ***** -->

  <back>


    <!-- References split into informative and normative -->

    <!-- There are 2 ways to insert reference entries from the citation libraries:
     1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
     2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
        (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

     Both are cited textually in the same manner: by using xref elements.
     If you use the PI option, xml2rfc will, by default, try to find included files in the same
     directory as the including file. You can also define the XML_LIBRARY environment variable
     with a value containing a set of directories to search.  These can be either in the local
     filing system or remote ones accessed by http (http://domain/dir/... ).-->

    <references title="Normative References">
    &RFC5714;
    &RFC5286;
    &I-D.enyedi-rtgwg-mrt-frr-algorithm;

    </references>

    <references title="Informative References">
    &I-D.ietf-rtgwg-ipfrr-notvia-addresses;
    &I-D.ietf-rtgwg-lfa-applicability;
    &RFC5715;
    &I-D.ietf-rtgwg-ordered-fib;
    &I-D.atlas-rtgwg-mrt-mc-arch;
    &I-D.ietf-mpls-ldp-multi-topology;
    &I-D.ietf-rtgwg-remote-lfa;
      <reference anchor="LightweightNotVia"
                 target="http://mycite.omikk.bme.hu/doc/71691.pdf">
       <front>
          <title>IP Fast ReRoute: Lightweight Not-Via without Additional Addresses</title>
    <author fullname="Gábor Sándor Enyedi" initials="G.S.E." surname="Enyedi"/>
          <author fullname="Gabor Retvari" initials="G.R." surname="Retvari"/>
          <author fullname="Peter Szilagyi" initials="P.S." surname="Szilagyi"/>
    <author fullname="András Császár" initials="A.C." surname="Császár"/>
          <date year="2009" />
       </front>
        <seriesInfo name="Proceedings of IEEE INFOCOM" value=""/>
        <format type='PDF' target="http://mycite.omikk.bme.hu/doc/71691.pdf"/>
      </reference>

      <reference anchor="LFARevisited"
                 target="http://opti.tmit.bme.hu/~tapolcai/papers/retvari2011lfa_infocom.pdf">
       <front>
          <title>IP Fast ReRoute: Loop Free Alternates Revisited</title>
          <author fullname="Gabor Retvari" initials="G.R." surname="Retvari"/>
          <author fullname="Janos Tapolcai" initials="J.T." surname="Tapolcai"/>
    <author fullname="Gábor Sándor Enyedi" initials="G.S.E." surname="Enyedi"/>
    <author fullname="András Császár" initials="A.C." surname="Császár"/>
          <date year="2011" />
        </front>
        <seriesInfo name="Proceedings of IEEE INFOCOM" value=""/>
        <format type='PDF' target="http://opti.tmit.bme.hu/~tapolcai/papers/retvari2011lfa_infocom.pdf"/>
      </reference>

    <reference anchor="EnyediThesis"
               target="http://timon.tmit.bme.hu/theses/thesis_book.pdf">
     <front>
       <title>Novel Algorithms for IP Fast Reroute</title>
    <author fullname="Gábor Sándor Enyedi" initials="G.S.E." surname="Enyedi"/>
       <date month="February" year="2011"/>
       </front>
        <seriesInfo name="Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics" value="Ph.D. Thesis"/>
        <format type='PDF' target="http://www.omikk.bme.hu/collections/phd/Villamosmernoki_es_Informatikai_Kar/2011/Enyedi_Gabor/ertekezes.pdf" />
      </reference>
    </references>

    <!-- Change Log

v00 2011-06-28  AKA   Initial version
v01 2011-07-13  RWK   Multicast Changes
v02 2012-01-18  AKA   First WG version - removed multicast
v03 2012-03-08 AKA    Second WG version - added more details for
              inter-area, IGP signaling, and phased deployment.  -->
v04 2013-02-23 AKA WG-03 - added profile flags, LDP signaling, removed
restriction to only 2 nodes for proxy-nodes.

  </back>
</rfc>

PAFTECH AB 2003-20262026-04-22 16:56:05