One document matched: draft-ietf-rtgwg-mrt-frr-architecture-00.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
     which is available here: http://xml.resource.org. -->



<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
     There has to be one entity for each item to be referenced. 
     An alternate method (rfc include) is described in the references. -->

<!ENTITY I-D.karan-mofrr SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.karan-mofrr.xml">
<!ENTITY RFC5714 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5714.xml">
<!ENTITY RFC5384 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5384.xml">
<!ENTITY RFC5715 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5715.xml">
<!ENTITY RFC5286 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5286.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY I-D.ietf-rtgwg-ipfrr-notvia-addresses SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-rtgwg-ipfrr-notvia-addresses.xml">
<!ENTITY I-D.ietf-rtgwg-lfa-applicability SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-rtgwg-lfa-applicability.xml">
<!ENTITY I-D.ietf-mpls-ldp-p2mp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mpls-ldp-p2mp.xml">
<!ENTITY I-D.ietf-rtgwg-ordered-fib SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-rtgwg-ordered-fib.xml">

<!ENTITY I-D.enyedi-rtgwg-mrt-frr-algorithm SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.enyedi-rtgwg-mrt-frr-algorithm.xml">
<!ENTITY I-D.ietf-pim-mtid SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-pim-mtid.xml">
]>




<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
     please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
     (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="no" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
     (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-ietf-rtgwg-mrt-frr-architecture-00" ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
     ipr values: full3667, noModification3667, noDerivatives3667
     you can add the attributes updates="NNNN" and obsoletes="NNNN" 
     they will automatically be output with "(if approved)" -->





  <!-- ***** FRONT MATTER ***** -->

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the 
         full title is longer than 39 characters -->

    <title abbrev="MRT FRR Architecture">An Architecture for IP/LDP Fast-Reroute Using Maximally Redundant Trees</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Alia Atlas" initials="A.K.A." role="editor" surname="Atlas">
     <organization>Juniper Networks</organization>
     <address>
       <postal>
         <street>10 Technology Park Drive</street>
         <city>Westford</city>
         <region>MA</region>
         <code>01886</code>
         <country>USA</country>
       </postal>
       <email>akatlas@juniper.net</email>
      </address>
    </author>

    <author fullname="Robert Kebler" initials="R.K." surname="Kebler">
     <organization>Juniper Networks</organization>
     <address>
       <postal>
         <street>10 Technology Park Drive</street>
         <city>Westford</city>
         <region>MA</region>
         <code>01886</code>
         <country>USA</country>
       </postal>
       <email>rkebler@juniper.net</email>
      </address>
    </author>

    <author fullname="Maciek Konstantynowicz" initials="M.K." surname="Konstantynowicz">
     <organization>Juniper Networks</organization>
     <address>
       <email>maciek@juniper.net</email>
      </address>
    </author>

    <author fullname="Gábor Sándor Enyedi" initials="G.S.E." surname="Enyedi">
      <organization>Ericsson</organization>
      <address>
        <postal>
          <street>Konyves Kalman krt 11.</street>
          <city>Budapest</city>
          <country>Hungary</country>
          <code>1097</code>
        </postal>
        <email>Gabor.Sandor.Enyedi@ericsson.com</email>
     </address>
    </author>

    <author fullname="András Császár" initials="A.C." surname="Császár">
      <organization>Ericsson</organization>
      <address>
        <postal>
          <street>Konyves Kalman krt 11</street>
          <city>Budapest</city>
          <country>Hungary</country>
          <code>1097</code>
        </postal>
        <email>Andras.Csaszar@ericsson.com</email>
     </address>
    </author>

    <author fullname="Russ White" initials="R.W." surname="White">
      <organization>Cisco Systems</organization>
      <address>
        <email>russwh@cisco.com</email>
      </address>
    </author>

    <author fullname="Mike Shand" initials="M.S." surname="Shand">
      <address>
        <email>mike@mshand.org.uk</email>
      </address>
    </author>

    <date year="2012" />

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill 
         in the current day for you. If only the current year is specified, xml2rfc will fill 
	 in the current day and month for you. If the year is not the current one, it is 
	 necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the 
	 purpose of calculating the expiry date).  With drafts it is normally sufficient to 
	 specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>Routing</area>

    <workgroup>Routing Area Working Group</workgroup>

    <abstract>

      <t>As IP and LDP Fast-Reroute are increasingly deployed, the
      coverage limitations of Loop-Free Alternates are seen as a
      problem that requires a straightforward and consistent solution
      for IP and LDP, for unicast and multicast.  This draft describes
      an architecture based on redundant backup trees where a single
      failure can cut a point-of-local-repair from the destination
      only on one of the pair of redundant trees.</t>

      <t>One innovative algorithm to compute such topologies is maximally
      disjoint backup trees.  Each router can compute its next-hops
      for each pair of maximally disjoint trees rooted at each node in
      the IGP area with computational complexity similar to that
      required by Dijkstra.</t>
      
      <t>The additional state, address and computation requirements are
      believed to be significantly less than the Not-Via architecture
      requires.</t>
    </abstract>
  </front>










  <middle>
    <section title="Introduction">
      <t>There is still work required to completely provide IP and LDP
      Fast-Reroute<xref target="RFC5714"/> for unicast and multicast traffic.  This draft
      proposes an architecture to provide 100% coverage.</t>

      <t>Loop-free alternates (LFAs)<xref target="RFC5286"/> provide a
      useful mechanism for link and node protection but getting
      complete coverage is quite hard.  <xref target="LFARevisited"/>
      defines sufficient conditions to determine if a network provides
      link-protecting LFAs and also proves that augmenting a network
      to provide better coverage is NP-hard. <xref
      target="I-D.ietf-rtgwg-lfa-applicability"/> discusses the
      applicability of LFA to different topologies with a focus on
      common PoP architectures.</t>

      <t>While Not-Via <xref
      target="I-D.ietf-rtgwg-ipfrr-notvia-addresses"/> is defined as
      an architecture, in practice, it has proved too complicated and
      stateful to spark substantial interest in implementation or
      deployment.  Academic implementations <xref
      target="LightweightNotVia"/> exist and have found the address
      management complexity high (but no standardization has been done
      to reduce this).</t>

      <t>A different approach is needed and that is what is described
      here.  It is based on the idea of using disjoint backup
      topologies as realized by Maximally Redundant Trees (described in
      <xref target="LightweightNotVia"/>); the general
      architecture could also apply to future improved redundant tree
      algorithms.</t>

<section title="Goals for Extending IP Fast-Reroute coverage beyond LFA">

<t>Any scheme proposed for extending IPFRR network topology coverage
beyond LFA, apart from attaining basic IPFRR properties, should also
aim to achieve the following usability goals:</t>

<t><list style="symbols">

<t>ensure maximum physically feasible link and node disjointness
regardless of topology,</t>

<t>automatically compute backup next-hops based on the topology
information distributed by link-state IGP,</t>

<t>do not require any signaling in the case of failure and use
pre-programmed backup next-hops for forwarding,</t>

<t>introduce minimal amount of additional addressing and state on
routers,</t>

<t>enable gradual introduction of the new scheme and backward
compatibility,</t>

<t>and do not impose requirements for external computation.</t>
</list></t>

</section>

    </section><!-- End of Introduction !-->






<section title="Terminology">
<t><list style="hanging">

     <t hangText="2-connected: ">A graph that has no cut-vertices.
     This is a graph that requires two nodes to be removed before the
     network is partitioned.</t>

     <t hangText="2-connected cluster: ">A maximal set of nodes that
     are 2-connected.</t>

     <t hangText="2-edge-connected: ">A network graph where at least
     two links must be removed to partition the network.</t>

     <t hangText="ADAG: ">Almost Directed Acyclic Graph - a graph
     that, if all links incoming to the root were removed, would be a
     DAG.</t>

     <t hangText="block: ">Either a 2-connected cluster, a cut-edge,
     or an isolated vertex.</t>

     <t hangText="cut-link: ">A link whose removal partitions the
     network.  A cut-link by definition must be connected between two
     cut-vertices.  If there are multiple parallel links, then they
     are referred to as cut-links in this document if removing the set
     of parallel links would partition the network. </t>

     <t hangText="cut-vertex: ">A vertex whose removal partitions the
     network.</t>

     <t hangText="DAG: ">Directed Acyclic Graph - a graph where all
     links are directed and there are no cycles in it.</t>

     <t hangText="GADAG: ">Generalized ADAG - a graph that is
     the combination of the ADAGs of all blocks.</t>

     <t hangText="Maximally Redundant Trees (MRT): ">A pair of trees
     where the path from any node X to the root R along the first tree
     and the path from the same node X to the root along the second
     tree share the minimum number of nodes and the minimum number of
     links.  Each such shared node is a cut-vertex.  Any shared links
     are cut-links.  Any RT is an MRT but many MRTs are not RTs.</t>

     <t hangText="network graph: ">A graph that reflects the network
     topology where all links connect exactly two nodes and broadcast
     links have been transformed into the standard pseudo-node
     representation.</t>

     <t hangText="Redundant Trees (RT): ">A pair of trees where the
     path from any node X to the root R along the first tree is
     node-disjoint with the path from the same node X to the root
     along the second tree.  These can be computed in 2-connected
     graphs.</t>

</list></t>


</section>
    <section title="Maximally Redundant Trees (MRT)">

<t>In the last few years, there's been substantial research on how to
compute and use redundant trees. Redundant trees are directed spanning
trees that provide disjoint paths towards their common root.  These
redundant trees only exist and provide link protection if the network
is 2-edge-connected and node protection if the network is 2-connected.
Such connectiveness may not be the case in real networks, either due
to architecture or due to a previous failure.  The work on maximally
redundant trees has added two useful pieces that make them ready for
use in a real network.</t>

<t><list style="symbols">

<t>Computable regardless of network topology: The maximally redundant
trees are computed so that only the cut-edges or cut-vertices are
shared between the multiple trees.</t>

<t>Computationally practical algorithm is based on a common network
topology database. Algorithm variants can compute in O( e) or O(e + n
log n), as given in <xref target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>.</t>

</list></t>

<t>There is, of course, significantly more in the literature related to
redundant trees and even fast-reroute, but the formulation of the
Maximally Redundant Trees (MRT) algorithm makes it very well suited to
use in routers.</t>

<t>A known disadvantage of MRT, and redundant trees in general, is
that the trees do not necessarily provide shortest detour paths.  The
use of the shortest-path-first algorithm in tree-building and
including all links in the network as possibilities for one path or
another should improve this.  Modeling is underway to investigate and
compare the MRT alternates to the optimal <xref
target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>.  Providing shortest
detour paths would require failure-specific detour paths to the
destinations, but the state-reduction advantage of MRT lies in the
detour being established per destination (root) instead of per
destination AND per failure.</t>

<t>The specific algorithm to compute MRTs as well as the logic behind
that algorithm and alternative computational approaches are given in
detail in <xref target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>.  Those
interested are highly recommended to read that document.  This
document describes how the MRTs can be used and not how to compute
them.</t>

<t>The most important thing to understand about MRTs is that for each
pair of destination-routed MRTs, there is a path from every node X to
the destination D on the Blue MRT that is as disjoint as possible from
the path on the Red MRT.  The two paths along the two MRTs to a given
destination-root of a 2-connected graph are node-disjoint, while in
any non-2-connected graph, only the cut-vertices and cut-edges can be
contained by both of the paths.</t>

<t>For example, in <xref target="fig_example_2_connected"/>, there is
a network graph that is 2-connected in (a) and associated MRTs in (b)
and (c).  One can consider the paths from B to R; on the Blue MRT, the
paths are B->F->D->E->R or B->F->C->E->R.  On
the Red MRT, the path is B->A->R.  These are clearly link and
node-disjoint.  These MRTs are redundant trees because the paths are
disjoint.</t>

<figure anchor="fig_example_2_connected" title="A 2-connected Network" align="center">
<artwork align="center"><![CDATA[
[E]---[D]---|           [E]<--[D]<--|                [E]-->[D]---|
 |     |    |            |     ^    |                       |    |
 |     |    |            V     |    |                       V    V
[R]   [F]  [C]          [R]   [F]  [C]               [R]   [F]  [C]
 |     |    |                  ^    ^                 ^     |    |
 |     |    |                  |    |                 |     V    |
[A]---[B]---|           [A]-->[B]---|                [A]---[B]<--|

      (a)                     (b)                         (c)
a 2-connected graph     Blue MRT towards R          Red MRT towards R
]]></artwork>
</figure>

<t>By contrast, in <xref target="Non-2-connected_Network_Example"/>,
the network in (a) is not 2-conneted.  If F, G or the link F<->G
failed, then the network would be partitioned.  It is clearly
impossible to have two link-disjoint or node-disjoint paths from G, I
or J to R.  The MRTs given in (b) and (c) offer paths that are as
disjoint as possible.  For instance, the paths from B to R are the
same as in <xref target="fig_example_2_connected"/> and the path from
G to R on the Blue MRT is G->F->D->E->R and on the Red MRT
is G->F->B->A->R.</t>

<figure anchor="Non-2-connected_Network_Example"
  title="A non-2-connected network" align="center">
<artwork align="center"><![CDATA[

                 [E]---[D]---|
                  |     |    |     |----[I]
                  |     |    |     |     |
                 [R]---[C]  [F]---[G]    |
                  |     |    |     |     |
                  |     |    |     |----[J]
                 [A]---[B]---|
    
                             (a)
                   a non-2-connected graph

  [E]<--[D]<--|                        [E]-->[D]---|                
   |     ^    |          [I]                  |    |          [I]    
   V     |    |           ^                   V    V           |     
  [R]<--[C]  [F]<--[G]    |            [R]---[C]  [F]<--[G]    |     
         ^    ^     |     |             ^     |    |     ^     V     
         |    |     |--->[J]            |     V    |     |----[J]     
  [A]-->[B]---|                        [A]<--[B]<--|                 

              (b)                                    (c)
       Blue MRT towards R                    Red MRT towards R

]]></artwork>
</figure>

</section>

<section title="Maximally Redundant Trees (MRT) and Fast-Reroute">

<t>In normal IGP routing, each router has its shortest-path-tree to
all destinations.  From the perspective of a particular destination,
D, this looks like a reverse SPT (rSPT).  To use maximally redundant
trees, in addition, each destination D has two MRTs associated with
it; by convention these will be called the blue and red MRTs.</t>

<t>MRTs are practical to maintain redundancy even after a single link
or node failure. If a pair of MRTs is computed rooted at each
destination, all the destinations remain reachable along one of the
MRTs in the case of a single link or node failure.</t>

<t>When there is a link or node failure affecting the rSPT, each node
will still have at least one path via one of the MRTs to reach the
destination D.  For example, in <xref
target="Non-2-connected_Network_Example"/>, C would normally forward
traffic to R across the C<->R link.  If that C<->R link
fails, then C could use either the Blue MRT path C->D->E->R
or the Red MRT path C->B->A->R.</t>

<t>As is always the case with fast-reroute technologies, forwarding
does not change until a local failure is detected.  Packets are
forwarded along the shortest path.  The appropriate alternate to use
is pre-computed.  <xref target="I-D.enyedi-rtgwg-mrt-frr-algorithm"/>
describes exactly how to determine whether the Blue MRT next-hops or
the Red MRT next-hops should be the MRT alternate next-hops for a
particular primary next-hop N to a particular destination D.</t>

<t>MRT alternates are always available to use, unless the network has
been partitioned.  It is a local decision whether to use an MRT
alternate, a Loop-Free Alternate or some other type of alternate.
When a network needs to use a micro-loop prevention mechanism <xref
target="RFC5715"/> such as Ordered FIB<xref
target="I-D.ietf-rtgwg-ordered-fib"/> or Farside Tunneling<xref
target="RFC5715"/>, then the whole IGP area needs to have alternates
available so that the micro-loop prevention mechanism, which requires
slower network convergence, can take the necessary time without
impacting traffic badly.</t>

<t>As described in <xref target="RFC5286"/>, when a worse failure than
is anticipated happens, using LFAs that are not downstream neighbors
can cause micro-looping.  An example is given of link-protecting
alternates causing a loop on node failure.  Even if a worse failure
than anticipated happened, the use of MRT alternates will not cause
looping.  Therefore, while node-protecting LFAs may be prefered, there
are advantages to using MRT alternates when such a node-protecting
LFA is not a downstream path.</t>
     
<section title="Multi-homed Prefixes">

<t>One advantage of LFAs that is necessary to preserve is the ability
to protect multi-homed prefixes against ABR failure.  For instance, if
a prefix from the backbone is available via both ABR A and ABR B, if A
fails, then the traffic should be redirected to B. This can also be
done for backups via MRT.</t>

<t>This generalizes to any multi-homed prefix.  A multi-homed prefix
could be:
<list style="symbols">
<t>An out-of-area prefix announced by more than one ABR,</t>
<t>An AS-External route announced by 2 or more ASBRs,</t>
<t>A prefix with iBGP multipath to different ASBRs,</t>
<t>etc.</t>
</list></t>

<t>For each prefix, the two lowest total cost ABRs are selected and a
proxy-node is created connected to those two ABRs.  If there exist
multiple multi-homed prefixes that share the same two best
connectivity, then a single proxy-node can be used to represent the
set.  An example of this is shown in <xref target="fig_abr_mhp"/>.</t>

<figure anchor="fig_abr_mhp" 
title="Prefixes Advertised by Multiple ABRs" align="center">
<artwork align="center"><![CDATA[
        2    2                     2     2
      A----B----C                A----B----C          
    2 |         | 2            2 |         | 2
      |         |                |         |           
    [ABR1]    [ABR2]           [ABR1]    [ABR2]
      |         |                |         |
     p,10      p,15           10 |---[P]---| 15

    (a) Initial topology         (b)with proxy-node

    A<---B<---C                 A--->B--->C          
    |         ^                 ^         | 
    V         |                 |         V           
  [ABR1]    [ABR2]            [ABR1]    [ABR2]
    |                                     |
    |-->[P]                         [P]<--| 

    (c) Blue MRT                (d) Red MRT

]]></artwork>
</figure>

<t>The proxy-nodes and associated links are added to the network
topology after all real links have been assigned to a direction and
before the actual MRTs are computed. Proxy-nodes cannot be transited
when computing the MRTs.  In addition to computing the pair of MRTs
associated with each router destination D in the area, a pair of MRTs
can be computed for each such proxy-node to fully protect against ABR
failure. </t>

<t>Each ABR or attaching router must remove the MRT marking[see <xref
target="sec_uni_forwarding"/>] and then forward the traffic outside of
the area (or island of MRT-fast-reroute-supporting routers).</t>

<t>When directing traffic along an MRT towards a multi-homed prefix,
if a topology-identifier label[see <xref
target="sec_ldp_uni_forward"/>] is not used, then the proxy-node must
be named and either additional LDP labels or IP addresses associated
with it.</t>

<!-- Andras: ASBR protection may require path to the alternate ASBR;
     i.e. MRT marking may get removed at ABR, but the path should
     still lead to the backup ASBR 

     Alia: Agreed - but that's what we're putting out-of-scope for now
     and it has its own section.
-->

</section>

<section anchor="sec_uni_forwarding" title="Unicast Forwarding with MRT Fast-Reroute">
<t>With LFA, there is no need to tunnel unicast traffic, whether IP or
LDP.  The traffic is simply sent to an alternate. The behavior with
MRT Fast-Reroute is different depending upon whether IP or LDP unicast
traffic is considered.</t>

<t>Logically, one could use the same IP address or LDP FEC and then
also use 2 bits to express the topology to use.  The topology options
are (00) IGP/SPT, (01) blue MRT, (10) red MRT.  Unfortunately, there
just aren't 2 spare bits available in the IPv4 or IPv6 header.  This
has different consequences for IP and LDP because LDP can just add a
topology label on top or take 2 spare bits from the label space.</t>

<t>Once the MRTs are computed, the two sets of MRTs are seen by the
forwarding plane as essentially two additional topologies.  The same
considerations apply for forwarding along the MRTs as for handling
multiple topologies.</t>  

<section anchor="sec_ldp_uni_forward" title="LDP Unicast Forwarding - Avoid Tunneling">
<t>For LDP, it is very desirable to avoid tunneling because, for at
least node protection, tunneling requires knowledge of remote LDP
label mappings and thus requires targeted LDP sessions and the
associated management complexity.  There are two different mechanisms
that can be used.</t>

<t><list style="numbers">
   <t> Option A - Encode Topology in Labels: In addition to sending a single
   label for a FEC, a router would provide two additional labels with
   their associated MRT colors.  This is simple, but reduces the label
   space for other uses.  It also increases the memory to store the
   labels and the communication required by LDP.</t>

   <t>Option B - Create Topology-Identification Labels: Use the label-stacking
   ability of MPLS and specify only two additional labels - one for
   each associated MRT color - by a new FEC type.  When sending a
   packet onto an MTR, first swap the LDP label and then push the
   topology-identification label for that MTR color.  When receiving a
   packet with a topology-identification label, pop it and use it to
   guide the next-hop selection in combination with the next label in
   the stack; then swap the remaining label, if appropriate, and push
   the topology-identification label for the next-hop.  This has
   minimal usage of additional labels, memory and LDP communication.
   It does increase the size of packets and the complexity of the
   required label operations and look-ups.  This can use the same
   mechanisms as are needed for context-aware label spaces.</t>
</list></t>

<t>Note that with LDP unicast forwarding, regardless of whether
topology-identification label or encoding topology in label is used,
no additional loopbacks per router are required as are required in the
IP unicast forwarding case. This is because LDP labels are used on a
hop-by-hop basis to identify MRT-blue and MRT-red forwarding
trees.</t>

<t>For greatest hardware compatibility, routers should support Option
B of encoding the topology in the labels.</t>

<!-- Andras:  So, do we want to say that the preferred choice is B?   -->

<section title="Protocol Extensions and Considerations: LDP">
<t>This captures an initial understanding of what may need to be specified.</t>

<t><list style="numbers">
  <t>Specify Topology in Label: When sending a Label Mapping, have the
  ability to send a Label TLV and multiple Topology-Label TLVs.  The
  Topology-Label TLV would specify MRT and the associated MRT
  color.</t>
  <t>Topology-Identification Labels: Define a new FEC type that
  describes the topology for MRT and the associated MRT color.</t>
</list></t>

</section>

</section>

<section title="IP Unicast Traffic">

<t>For IP, there is no currently practical alternative except
tunneling.  The tunnel egress could be the original destination in the
area, the next-next-hop, etc..  If the tunnel egress is the original
destination router, then the traffic remains on the redundant tree
with sub-optimal routing.  If the tunnel egress is the next-next-hop,
then protection of multi-homed prefixes and node-failure for ABRs is
not available.  Selection of the tunnel egress is a router-local
decision.</t>

<!-- Andras: I think that the selection of the tunnel egress can also
     be anything in between the NNH and the destination.  This could
     also be one tweaking knob with which we can optimise path length.  

     E.g. Use that tunnell egress which altogether produces the
     shortest detour path.
-->

<t>There are three options available for marking IP packets with which
MRT it should be forwarded in.</t> 

<t><list style="numbers"> 

<t>Tunnel IP packets via an LDP LSP. This has the advantage that more
installed routers can do line-rate encapsulation and decapsulation.
Also, no additional IP addresses would need to be allocated or
signaled.

<list style="letters"> 

<t>Option A - LDP Destination-Topology Label: Use a label that
indicates both destination and MRT.  This method allows easy tunneling
to the next-next-hop as well as to the IGP-area destination.  For
multi-homed prefixes, this requires that additional labels be
advertised for each proxy-node.</t>

<t>Option B - LDP Topology Label: Use a Topology-Identifier label on
top of the IP packet.  This is very simple and doesn't require
additional labels for proxy-nodes.  If tunneling to a next-next-hop is
desired, then a two-deep label stack can be used with [ Topology-ID
label, Next-Next-Hop Label ].</t>
</list></t>

<t>Tunnel IP packets in IP.  Each router supporting this option would
announce two additional loopback addresses and their associated MRT
color. Those addresses are used as destination addresses for MRT-blue
and MRT-red IP tunnels respectively. They allow the transit nodes to
identify the traffic as being forwarded along either MRT-blue or
MRT-red tree topology to reach the tunnel destination. Announcements
of these two additional loopback addresses per router with their MRT
color requires IGP extensions.</t>
</list></t>

<!-- Andras: shouldn't we add an IP Header Option as an alternative?
     Not practical today but might get practical in a couple of years
-->

<t>For proxy-nodes associated with one or more multi-homed prefixes,
the problem is harder because there is no router associated with the
proxy-node, so its loopbacks can't be known or used.  In this case,
each router attached to the proxy-node could announce two common IP
addresses with their associated MRT colors.  This would require
configuration as well as the previously mentioned IGP extensions.
Similarly, in the LDP case, two additional FEC bindings could be
announced.</t>

<section title="Protocol Extensions and Considerations: OSPF and ISIS">
<t>This captures an initial understanding of what may need to be specified.</t>

<t><list style="symbols">

  <t>Capabilities: Does a router support MRT?  Does the router do MRT
  tunneling with LDP or IP or GRE or...?</t>
  <t>Topology Association: A router needs to advertise a loopback and
  associate it with an MRT whether blue or red.  Additional
  flexibility for future uses would be good.</t>
  <t>Proxy-nodes for Multi-homed Prefixes: We need a way to advertise
  common addresses with MRT for multi-homed prefixes' proxy-nodes.
  Currently, those proxy-nodes aren't named or considered.</t>
</list></t>

<t>As with LFA, it is expected that OSPF Virtual Links will not be supported.</t>
</section>

</section>

<section title="Inter-Area and ABR Forwarding Behavior">

<t>In regular forwarding, packets destined outside the area arrive at
the ABR and the ABR forwards them into the other area because the
next-hops from the area with the best route (according to tie-breaking
rules) are used by the ABR.  The question is then what to do with
packets marked with an MRT that are received by the ABR.</t>

<t>The only option that doesn't require forwarding based upon incoming
interface is to forward an MRT marked packet in the area with the best
route along its associated MRT.  If the packet came from that area,
this correctly avoids the failure.  If the packet came from a
different area, at least this gets the packet to the destination even
though it is along an MRT rather than the shortest-path.</t>

<figure anchor="fig_abr_mrt" title="ABR Forwarding Behavior and MRTs"
align="center">
<artwork align="center"><![CDATA[
        
    +----[C]----     --[D]--[E]                --[D]--[E]
    |           \   /         \               /         \
p--[A] Area 10 [ABR1]  Area 0 [H]--p   +-[ABR1]  Area 0 [H]-+
    |           /   \         /        |      \         /   |
    +----[B]----     --[F]--[G]        |       --[F]--[G]   |
                                       |                    |
                                       | other              |
                                       +----------[p]-------+
                                         area 

      (a) Example topology        (b) Proxy node view in Area 0 nodes


                +----[C]<---       [D]->[E]
                V           \             \
             +-[A] Area 10 [ABR1]  Area 0 [H]-+
             |  ^           /             /   |
             |  +----[B]<---       [F]->[G]   V
             |                                |
             +------------->[p]<--------------+

               (c) rSPT towards destination p



          ->[D]->[E]                         -<[D]<-[E]
         /          \                       /         \
    [ABR1]  Area 0 [H]-+             +-[ABR1]         [H]
                   /   |             |      \         
            [F]->[G]   V             V       -<[F]<-[G]
                       |             |
                       |             |
             [p]<------+             +--------->[p]

  (d) Blue MRT in Area 0           (e) Red MRT in Area 0

]]></artwork>
</figure>


<t>To avoid using an out-of-area MRT, special action can be taken by
the penultimate router along the in-local-area MRT immediately before
the ABR is reached.  The penultimate router can determine that the ABR
will forward the packet out of area and, in that case, the penultimate
router can remove the MRT marking but still forward the packet along
the MRT next-hop to reach the ABR.  For instance, in <xref
target="fig_abr_mrt"/>, if node H fails, node E has to put traffic
towards prefix p onto the red MRT.  But since node D knows that ABR1
will use a best from another area, it is safe for D to remove the MRT
marking and just send the packet to ABR1 still on the red MRT but
unmarked.  ABR1 will use the shortest path in Area 10.
</t>

<t>In all cases for ISIS and most cases for OSPF, the penultimate
router can determine what decision the adjacent ABR will make.  The
one case where it can't be determined is when two ASBRs are in
different non-backbone areas attached to the same ABR, then the ASBR's
Area ID may be needed for tie-breaking (prefer the route with the
largest OPSF area ID) and the Area ID isn't announced as part of the
ASBR link-state advertisement (LSA).  In this one case, suboptimal
forwarding along the MRT in the other area would happen.  If this is a
realistic deployment scenario, OSPF extensions could be considered.</t>

</section>

<section title="Issues with Area Abstraction">

<t>MRT fast-reroute provides complete coverage in a area that is
2-connected.  Where a failure would partition the network, of course,
no alternate can protect against that failure.  Similarly, there are
ways of connecting multi-homed prefixes that make it impractical to
protect them without excessive complexity.</t>

<figure anchor="fig_mhp_areas" title="AS external prefixes in different areas">
<artwork align="center"><![CDATA[
       50
     |----[ASBR Y]---[B]---[ABR 2]---[C]      Backbone Area 0:
     |                                |           ABR 1, ABR 2, C, D
     |                                |
     |                                |       Area 20:  A, ASBR X
     |                                |
     p ---[ASBR X]---[A]---[ABR 1]---[D]      Area 10: B, ASBR Y
        5                                  p is a Type 1 AS-external

]]></artwork>
</figure>

<t>Consider the network in <xref target="fig_mhp_areas"/> and assume
there is a richer connective topology that isn't shown, where the same
prefix is announced by ASBR X and ASBR Y which are in different
non-backbone areas.  If the link from A to ASBR X fails, then an MRT
alternate could forward the packet to ABR 1 and ABR 1 could forward it
to D, but then D would find the shortest route is back via ABR 1 to
Area 20.  The only real way to get it from A to ASBR Y is to
explicitly tunnel it to ASBR Y.</t>

<t>Tunnelling to the backup ASBR is for future consideration.  The
previously proposed PHP approach needs to have an exception if BGP
policies (e.g. BGP local preference) determines which ASBR to use.
Consider the case in <xref target="fig_mhp_areas_bgp_pref"/>.  If the
link between A and ASBR X (the preferred border router) fails, A can
put the packets to p onto an MRT alternate, even tunnel it towards
ASBR Y.  Node B, however, must not remove the MRT marking in this
case, as nodes in Area 0, including ASBR Y itself would not know that
their preferred ASBR is down.</t>


<figure anchor="fig_mhp_areas_bgp_pref" title="Failure of path towards ASBR preferred by BGP">
<artwork align="center"><![CDATA[

              Area 20                    BB Area 0  
  p ---[ASBR X]-X-[A]---[B]---[ABR 1]---[D]---[ASBR Y]--- p

              BGP prefers ASBR X for prefix p

]]></artwork>
</figure>


<t>The fine details of how to solve multi-area external prefix cases,
or identifying certain cases as too unlikely and too complex to
protect is for further consideration.</t>

</section>

<section title="Partial Deployment and Islands of Compatible MRT FRR routers">
<t>A natural concern with new functionality is how to have it be
useful when it is not deployed across an entire IGP area.  In the case
of MRT FRR, where it provides alternates when appropriate LFAs aren't
available, there are also deployment scenarios where it may make sense
to only enable some routers in an area with MRT FRR.  A simple example
of such a scenario would be a ring of 6 or more routers that is
connected via two routers to the rest of the area.</t>

<t>First, a computing router S must determine its local island of
compatible MRT fast-reroute routers.  A router that has common
forwarding mechanisms and common algorithm and is connected to either
to S or to another router already determined to be in S's local island
can be added to S's local island.</t>

<t>Destinations inside the local island can obviously use MRT
alternates.  Destinations outside the local island can be treated like
a multi-homed prefix with caveats to avoid looping.  For LDP labels
including both destination and topology, the routers at the borders of
the local island need to originate labels for the original FEC and the
associated MRT-specific labels.  Packets sent to an LDP label marked
as blue or red MRT to a destination outside the local island will have
the last router in the local island swap the label to one for the
destination and forward the packet along the outgoing interface on the
MRT towards a router outside the local island that was represented by
the proxy-node.</t>

<t>For IP in IP encapsulations, remote destinations may not be
advertising additional IP loopback addresses for the MRTs.  In that
case, a router attached to a proxy-node, which represents destinations
outside the local island, must advertise IP addresses associated with
that proxy-node.  Packets sent to an address associated with a
proxy-node will have their outer IP header removed by the router
attached to the proxy-node and be forwarded by the router along the
outgoing interface on the MRT towards a router outside the local
island that was represented by the proxy-node.</t>

<!-- Andras: loop avoidance issues not yet solved? What if the packet
     after getting out of the island is forwarded back?
-->


<!-- Andras: 

<t>In an MRT island, a MHP proxy node should only be connected to
those MRT island nodes from which a shortest path that does not loop
back to the island leads to the destination prefix. If MRT deployment
is sparse, it may easily happen that such nodes do not exist in the
current MRT island.  In this case, the task is to tunnel the packet to
an outside node, a "candidate", which can forward it on the shortest
path.  We need to find redundant paths to these nodes.  This can be
done by creating an extended island by virtually connecting the
candidate to node in the island and by finding a pair of MRTs in this
extended area rooted to the candidate.  These candidate nodes should
be virtually added to the MRT as proxy nodes, but they should be
connected to MRT island nodes from which the remote repair node is
reachable on the shortest path without looping back to the island.</t>


<t>Observe that we need to discuss what the MRT island border node
needs to do, when the packet is leaving the island but should be
forwarded to an out-of-island candidate.  Recall, that such a packet
was forwarded along an MRT, and we need to switch back to the shortest
path when leaving the MRT island.  Therefore, the MRT island border
node must change the destination address or MPLS label to the one
describing forwarding along shortest path. This swapping is simple,
for LDP.  If however we are using pure IP, we need to change the
destination IP address in the outer encapsulation header to the
default address of the candidate.</t>

<t>Moreover, it is possible, that the candidate is not an MRT capable
node, so it does not have two extra IP addresses automatically.  In
this case the operator is may configure two additional IP addresses
for this legacy node, so that MRT capable routers consistently select
one as the default address, one as the address on the blue tree, and
one as the address on the red tree.  E.g., besides the default address
the operator configures two IP additional address that are higher than
the default. In this case, the smallest IP address is the default
address, the medium can be the red address while the highest can be
the blue address ? the main point is that each router should handle
these addresses consistently.</t>



<t>In order to further improve failure coverage, we can connect MRT
capable islands explicitly with tunnels.  This means that border
routers of islands may put the packet into an IP-in-IP tunnel or push
an MPLS label, and send them to another island.  The concept is
depicted in <xref target="fig_multi-island"/>.  There we have X and Y,
two MRT capable nodes, which can reach each other along the shortest
paths, using only non-MRT capable nodes in both directions.  In this
case we can build up a tunnel between them and consider them as
immediate neighbors.  This, trivially, unifies the two MRT capable
islands.  Uniting MRT capable islands gives us the possibility to
treat them as a single super island, and in this way give us
relatively high coverage.  Since now it is not important if the MRT
tunnell egress is in the same MRT capable island or not, if it is MRT
capable, we can find paths along the MRTs in the super island.</t>

<figure anchor="fig_multi-island" title="Unifying multiple MRT islands">
<artwork align="center"><![CDATA[
.....                                             ...........
    .....    Tunnell or LSP connecting two     ...          . ...
        .   MRT capable island border routers  .      MRT
MRT     .<====================================>.      island
island [X]---------[ ]----------[ ]-----------[Y]     #2
#1       .                                    .
        .                                     .
........                                       .....
                                                   ..............

]]></artwork>
</figure>
 
<t>Observe, however, that this kind of connection has its drawbacks. 
First, applying such tunnels adds another header to the packets, which
is an overhead; second, realizing such tunneling requires using
targeted LDP (or labeled iBGP) between island border routers to tell
other nodes which label the tunnell endpoint needs to put packets back
to the MRT LSP.  On the above figure, X needs the label used by Y for
forwarding packets not along the original LSP, but along one of the
MRTs.</t>

-->

</section>

<section title="Network Convergence and Preparing for the Next Failure">

<t>After a failure, MRT detours ensure that packets reach their
intended destination while the IGP has not reconverged onto the new
topology.  As link-state updates reach the routers, the IGP process
calculates the new shortest paths. Two things need attention:
micro-loop prevention and MRT re-calculation.</t>

<section title="Micro-forwarding loop prevention and MRTs">

<t>As is well known<xref target="RFC5715"/>, micro-loops can occur
during IGP convergence; such loops can be local to the failure or
remote from the failure.  Managing micro-loops is an orthogonal issue
to having alternates for local repair, such as MRT fast-reroute
provides.</t>

<t>There are two possible micro-loop prevention mechanism discussed in
<xref target="RFC5715"/>.  The first is Ordered FIB <xref
target="I-D.ietf-rtgwg-ordered-fib"/>.  The second is Farside
Tunneling which requires tunnels or an alternate topology to reach
routers on the farside of the failure.</t>

<t>Since MRTs provide an alternate topology through which traffic can
be sent and which can be manipulated separately from the SPT, it is
possible that MRTs could be used to support Farside Tunneling.
Details of how to do so are outside of this document.</t>

</section>

<section title="MRT Recalculation">

<t>When a failure event happens, traffic is put by the PLRs onto the
MRT topologies.  After that, each router recomputes its shortest path
tree (SPT) and moves traffic over to that.  Only after all the PLRs
have switched to using their SPTs and traffic has drained from the MRT
topologies should each router install the recomputed MRTs into the
FIBs.</t>

<t>At each router, therefore, the sequence is as follows:
<list style="numbers">
<t>Receive failure notification</t>
<t>Recompute SPT</t>
<t>Install new SPT</t>
<t>Recompute MRTs</t>
<t>Wait configured period for all routers to be using their SPTs and
traffic to drain from the MRTs.</t>
<t>Install new MRTs.</t>
</list></t>

<t>While the recomputed MRTs are not installed in the FIB, protection
coverage is lowered.  Therefore, it is important to recalculate the
MRTs and install them as quickly as possible.</t>

<t>It is for further study whether MRT re-calculation is possible in
an incremental fashion, such that the sections of the MRT in use after
a failure are not changed.  </t>

</section>

</section>

</section>
</section>


<section anchor="Acknowledgements" title="Acknowledgements">
  <t>The authors would like to thank Hannes Gredler, Jeff Tantsura,
  Ted Qian, Kishore Tiruveedhula, Santosh Esale, Nitin Bahadur, Harish
  Sitaraman and Raveendra Torvi for their suggestions and review.</t>

</section>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>This doument includes no request to IANA.</t>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>This architecture is not currently believed to introduce new security concerns.</t>
    </section>
  </middle>

  <!--  *****BACK MATTER ***** -->

  <back>


    <!-- References split into informative and normative -->

    <!-- There are 2 ways to insert reference entries from the citation libraries:
     1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
     2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
        (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

     Both are cited textually in the same manner: by using xref elements.
     If you use the PI option, xml2rfc will, by default, try to find included files in the same
     directory as the including file. You can also define the XML_LIBRARY environment variable
     with a value containing a set of directories to search.  These can be either in the local
     filing system or remote ones accessed by http (http://domain/dir/... ).-->

    <references title="Normative References">
    &RFC5714;
    &RFC5286;
    &RFC5384;
    &I-D.enyedi-rtgwg-mrt-frr-algorithm;

    </references>

    <references title="Informative References">
    &I-D.ietf-rtgwg-ipfrr-notvia-addresses;
    &I-D.ietf-rtgwg-lfa-applicability;
    &RFC5715;
    &I-D.ietf-rtgwg-ordered-fib;

      <reference anchor="LightweightNotVia"
                 target="http://mycite.omikk.bme.hu/doc/71691.pdf">
       <front>
          <title>IP Fast ReRoute: Lightweight Not-Via without Additional Addresses</title>
    <author fullname="Gábor Sándor Enyedi" initials="G.S.E." surname="Enyedi"/>
          <author fullname="Gabor Retvari" initials="G.R." surname="Retvari"/>
          <author fullname="Peter Szilagyi" initials="P.S." surname="Szilagyi"/>
    <author fullname="András Császár" initials="A.C." surname="Császár"/>
          <date year="2009" />
       </front>
        <seriesInfo name="Proceedings of IEEE INFOCOM" value=""/>
        <format type='PDF' target="http://mycite.omikk.bme.hu/doc/71691.pdf"/>
      </reference>

      <reference anchor="LFARevisited"
                 target="http://opti.tmit.bme.hu/~tapolcai/papers/retvari2011lfa_infocom.pdf">
       <front>
          <title>IP Fast ReRoute: Loop Free Alternates Revisited</title>
          <author fullname="Gabor Retvari" initials="G.R." surname="Retvari"/>
          <author fullname="Janos Tapolcai" initials="J.T." surname="Tapolcai"/>
    <author fullname="Gábor Sándor Enyedi" initials="G.S.E." surname="Enyedi"/>
    <author fullname="András Császár" initials="A.C." surname="Császár"/>
          <date year="2011" />
        </front>
        <seriesInfo name="Proceedings of IEEE INFOCOM" value=""/>
        <format type='PDF' target="http://opti.tmit.bme.hu/~tapolcai/papers/retvari2011lfa_infocom.pdf"/>
      </reference>
    </references>

    <!-- Change Log

v00 2011-06-28  AKA   Initial version
v01 2011-07-13  RWK   Multicast Changes
v02 2012-01-18  AKA   First WG version - removed multicast
    -->

  </back>
</rfc>

PAFTECH AB 2003-20262026-04-23 23:25:35