One document matched: draft-tsou-softwire-bfd-ds-lite-05.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
     which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
     There has to be one entity for each item to be referenced. 
     An alternate method (rfc include) is described in the references. -->

  <!ENTITY RFC0792 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0792.xml">
  <!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
  <!ENTITY RFC4443 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4443.xml">
  <!ENTITY RFC5880 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5880.xml">
  <!ENTITY RFC5881 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5881.xml">
  <!ENTITY RFC5882 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5882.xml">
  <!ENTITY RFC5883 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5883.xml">
  <!ENTITY RFC6333 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6333.xml">
  <!ENTITY RFC6334 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6334.xml">
  <!ENTITY RFC6887 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6887.xml">
]>

<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
     please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
     (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
     (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="info" docName="draft-tsou-softwire-bfd-ds-lite-05" ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
     ipr values: trust200902, noModificationTrust200902, noDerivativesTrust200902,
        or pre5378Trust200902
     you can add the attributes updates="NNNN" and obsoletes="NNNN" 
     they will automatically be output with "(if approved)" -->

  <!-- ***** FRONT MATTER ***** -->

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the full title is longer than 39 characters -->

    <title>DS-Lite Failure Detection and Failover</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Tina Tsou" initials="T." surname="Tsou">
      <organization>Huawei Technologies (USA)</organization>
      <address>
        <postal>
          <street>2330 Central Expressway</street>
          <city>Santa Clara</city>
          <code>CA  95050</code>
          <country>USA</country>
        </postal>
        <phone>+1 408 330 4424</phone>
        <email>tina.tsou.zouting@huawei.com</email>
      </address>
    </author>

    <author fullname="Brandon Li" initials="B." surname="Li">
      <organization>Huawei Technologies</organization>
      <address>
        <postal>
          <street>M6, No. 156, Beiqing Road, Haidian District</street>
          <city>Beijing</city>
          <code>100094</code>
          <country>China</country>
        </postal>
        <phone></phone>
        <email>brandon.lijian@huawei.com</email>
      </address>
    </author>

    <author fullname="Cathy Zhou" initials="C." surname="Zhou">
      <organization>Huawei Technologies </organization>
      <address>
        <postal>
          <street></street>
          <city></city>
          <code></code>
          <country>China</country>
        </postal>
        <phone></phone>
        <email>cathy.zhou@huawei.com</email>
      </address>
    </author>

    <author fullname="Juergen Schoenwaelder" initials="J." surname="Schoenwaelder">
      <organization>Jacobs University Bremen</organization>
      <address>
        <postal>
          <street>Campus Ring 1</street>
          <city>Bremen</city>
          <code>28759</code>
          <country>Germany</country>
        </postal>
        <phone></phone>
        <email>j.schoenwaelder@jacobs-university.de</email>
      </address>
    </author>

    <author fullname="Reinaldo Penno" initials="R." surname="Penno">
      <organization>Cisco Systems, Inc.</organization>
      <address>
        <postal>
          <street>170 West Tasman Drivee</street>
          <city>San Jose, California</city>
          <code>95134</code>
          <country>USA</country>
        </postal>
        <phone></phone>
        <email>repenno@cisco.com</email>
      </address>
    </author>
    
    <author fullname="Mohamed Boucadair" initials="M." surname="Boucadair">
      <organization>France Telecom</organization>
      <address>
        <postal>
          <street>Rennes,35000</street>
          <city></city>
          <code></code>
          <country>France</country>
        </postal>
        <phone></phone>
        <email>mohamed.boucadair@orange.com</email>
      </address>
    </author>
    


    <date year="2013" />


    <!-- Meta-data Declarations -->

    <area>INT</area>

    <workgroup>Internet Engineering Task Force</workgroup>

    <!-- WG name at the upperleft corner of the doc,
         IETF is fine for individual submissions.  
     If this element is not present, the default is "Network Working Group",
         which is used by the RFC Editor as a nod to the history of the IETF. -->
    


    <abstract>
      <t>
	In DS-Lite, the tunnel is stateless, not associated with any
	state information, and the CGN function at the AFTR is stateful. 
	Currently, there is no failure detection and failover mechanism 
	for both stateless tunnel and stateful CGN function, which makes 
	it difficult to manage and diagnose if there is a problem. This 
	draft analyzes the applicability of some of the possible solutions.
      </t>
    </abstract>
  </front>

  <middle>
    <section anchor="Problem" title="Introduction">
      <t>
	In <xref target="RFC6333">DS-Lite</xref>, the IPv4-in-IPv6
	DS-Lite tunnel is stateless, no status information about the
	tunnel is available, and no keep-alive mechanism is
	available. It is difficult to know whether the tunnel is up or
	down; and if there is a link problem, the Basic Bridging
	BroadBand (B4) element can not automatically switch to another
	Address Family Transition Router (AFTR) so as to continue the
	network service automatically, without the involvement of
	operators. Besides, In <xref target="RFC6333">DS-Lite</xref>, 
	the CGN function at the AFTR is stateful and there is no mechanism 
	to detect whether the NAT44 CGN is functioning in the AFTR. 
	These will create problems for network operation and maintenance.
      </t>
      

      <t>
	Possible solutions for failure detection include the usage of
	Bidirectional Forwarding Detection (BFD), the Port Control
	Protocol (PCP), and ICMP Echo (Request) / Echo Reply
	(PING). The properties of these solutions are discussed in
	this document and guidelines are provided how to implement
	failure detection and automatic failover.
      </t>

      <t>
	The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
        "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY",
        and "OPTIONAL" in this document are to be interpreted as
        described in <xref target="RFC2119">RFC 2119</xref>.
      </t>
    </section>

    <section title="Terminology">
      <t>
        <list hangIndent="10" style="hanging">
          <t hangText="AFTR:">Address Family Transition Router.</t>
          <t hangText="B4:">Basic Bridging BroadBand.</t>
          <t hangText="BBF:">BroadBand Forum.</t>
          <t hangText="BFD:">Bidirectional Forwarding Detection.</t>
          <t hangText="CPE:">Customer Premise Equipment (i.e., the DS-Lite B4).</t>
          <t hangText="FQDN">Fully Qualified Domain Name.</t>
          <t hangText="PCP">Port Control Protocol.</t>
        </list>
      </t>
    </section>
    
      <section title="Failover Mechanisms">
      <t>The FQDN of the AFTR is sent to the B4 element via a DHCP
	option, as defined in <xref target="RFC6334"></xref>.
	Multiple IP addresses can be configured for the FQDN of an
	AFTR on the DNS server. If a B4 element detects a failure on
	the link to the AFTR, the B4 element MUST terminate the
	current DS-Lite tunnel, choose another AFTR address in the
	list, and create a tunnel to the new AFTR. If necessary, the
	B4 element SHOULD re-configure the connectivity test tool
	accordingly and restart the test procedures.
      </t>
         <section title="Anycast Approach">
      
      <t>
	Anycasts may also be used for failover. But there is an
        ICMP-error-message problem with anycast, that is, when a
        packet is sent from the AFTR to a B4 element, if one of the
        routers along the path generates an ICMP error message, e.g.,
        Packet Too Big (PTB), then the error message may not be sent
        back to the source AFTR but to another AFTR.
      </t>
      
      <t>
      There's also a problem with anycast for stateful CGN/AFTR. If 
      there is an asymmetric path though the CGNs, then return path 
      traffic will be dropped as there is no corresponding state table 
      entry in the AFTR.
      </t>
         </section>
       
         <section title="VRRP Approach">
         <t>For active/passive HA in NAT gateways, it's quite common to 
         have a single virtual address offered by VRRP (or a proprietary 
         equivalent) that the upstream routers will use as their next hop. 
         In the event that the master CGN fails, the standby takes over 
         the virtual L3 address. If a VRRP based virtual address is used 
         as the tunnel endpoint, then the clients wouldn't need to be awared 
         of the failover.</t> 
         </section>
    </section>

    <section title="Solutions">
      <section title="Bidirectional Forwarding Detection (BFD)">
        <t>
	  <xref target="RFC5880">Bidirectional Forwarding
          Detection</xref> (BFD) is a mechanism intended to detect
          faults in a bidirectional path. It is usually used in
          conjunction with applications like OSPF, IS-IS, for fast
          fault recovery and fast
          re-route <xref target="RFC5882"></xref>. BFD is being made
          mandatory for keep-alive for subscriber sessions, including
          DS-Lite, by the BroadBand Forum
          (BBF) <xref target="WT-146"></xref>.
	</t>
        <t>
	  BFD can be used in DS-Lite, by creating a BFD session
          between the B4 element and the AFTR to provide tunnel status
          information.  If a fault is detected, the B4 element can try
          to create a DS-Lite tunnel with another AFTR and terminate
          the existing one, so as to continue network service. BFD could 
          also be used to detect the CGN state at the AFTR, but the 
          detection should be based on per-user.
	</t>
	<t>
	  <xref target="I-D.vinokour-bfd-dhcp"></xref> proposes using
          a DHCP option to distribute BFD parameters to B4
          elements. But in case of DS-Lite, some of the key BFD
          parameters are already available (e.g., peer IP address),
          and other parameters can be negotiated by BFD signaling or
          statically configured, so that no extra DHCP option(s) need
          to be defined.
	</t>
	

        <section title="DS-Lite Scenario">
          <t>
	    In <xref target="RFC6333">DS-Lite</xref>, the BFD packet
            SHOULD be sent through an IPv4-in-IPv6 tunnel, as shown in
            <xref target="DS-Lite-Scenario"></xref>. The IPv4
            addresses of the B4 element and the AFTR SHOULD be the
            endpoints of a BFD session.
	  </t>
          <figure title="DS-Lite Scenario"
                  anchor="DS-Lite-Scenario"
                  align="center">
            <artwork>
               +--------------+                  +--------------+
  +------+     |              |     +------+     |              |
  |      |-----+--------------+-----|      |     |              |
  | CPE  |       IPv6 Tunnel        | AFTR |-----| IPv4 Network |
  | (B4) |-----+--------------+-----|      |     |              |
  +------+     | IPv6 Network |     +------+     |              |
 192.0.0.0     +--------------+    192.0.0.1     +--------------+
            </artwork>
          </figure>
        </section>

        <section title="Parameters for BFD">
          <t>
	    In order to set up a BFD session, the following parameters
            are needed, as shown in Section 4.1
            of <xref target="RFC5880"></xref>:
	  </t>
          <t><list style="symbols">
              <t>Peer IP address</t>
              <t>My Discriminator</t>
              <t>Your Discriminator</t>
              <t>Desired Min TX Interval</t>
              <t>Required Min RX Interval</t>
              <t>Required Min Echo RX Interval</t>
            </list></t>
          <t>
	    In <xref target="RFC6334">DS-Lite</xref>, the B4's
            WAN-side IPv4 address is the well-known address 192.0.0.0,
            and the AFTR's well-known IPv4 address is 192.0.0.1, as
            defined in section 5.7 of <xref target="RFC6333"></xref>.
            The B4 element needs to create an IPv6 tunnel to an AFTR
            so as to get network connectivity to the AFTR, and send
            IPv4 BFD packets through the tunnel to manage it.
	  </t>
	  <t>
	    The other parameters listed above can be negotiated by BFD
            signaling, and initial values can be configured on B4
            elements and AFTRs.
	  </t>
        </section>

        <section title="Elements of Procedure">
          <t>
	    When a B4 element gets online, it will be assigned an IPv6
            prefix or address, and also the FQDN of the AFTR, as
            defined in <xref target="RFC6334"></xref>.  The B4 element
            will create an IPv6 tunnel to the AFTR with which the B4
            element can initiate a BFD session to the AFTR. BFD
            packets will be sent through the DS-Lite tunnel. As
            defined in section 4 of <xref target="RFC5881"></xref>,
            BFD control packets MUST be sent in UDP packets with
            destination port 3784, and BFD echo packets MUST be sent
            in UDP packets with destination port 3785.
	  </t>
	  <t>
	    When sending out the first BFD packet, the B4 element can
            generate a unique local discriminator, and set the remote
            discriminator to zero. When the AFTR receives the first
            BFD packet from a B4 element, the AFTR will also generate
            a corresponding local discriminator, and put it in the
            response packet to the B4 element. This will finish the
            discriminator negotiation in the B4 to AFTR direction,
            without any manual configuration.
	  </t>
          <t>
	    When an AFTR receives the first packet from a B4 element,
            the AFTR will get the IPv6 address and discriminator of
            the B4 element, so that the AFTR can initiate the BFD
            session in the other direction and a similar discriminator
            negotiation can be carried out.
	  </t>
        </section>
        
        <section title="BFD for NAT failure detection">
          <t>B4 creates PCP mapping. BFD at AFTR uses an external public 
          interface (or another external mapping) to send a BFD packet 
          to the public PCP mapping created by B4. In this case, the AFTR 
          BFD packet will have a public source IP of interface, which 
          will go through the NAT, therefore exercising the NAT function. B4 
          will reply to the AFTR external interface.</t> 
        
        </section>

        <section title="Implementation Considerations">
          <t>
	    BFD is usually used for quick fault detection, at a very
            small time scale, e.g. milliseconds. But in DS-Lite, it
            may not be necessary to detect faults in such a short
            time. On the other hand, an AFTR may need to support tens
            of thousands of B4 elements, which means an AFTR will need
            to support the same number of BFD sessions. In order to
            meet performance requirements on an AFTR, it may be
            necessary to extend the time period between BFD packet
            transmissions to a longer time, e.g., 10s or 30s.
          </t>
          <t>
	    Compared to other solutions, BFD has a simple and fixed
            packet format, which is easy to implement by logic devices
            (e.g., ASIC, FPGA). Complicated protocols are usually
            processed by software which is relatively slow. An AFTR
            may need to support 10000-20000 users, and if the
            protocol is handled by software, it will bring extra load
            to the AFTR.
	  </t>
        </section>
      </section>

      <section title="Port Control Protocol (PCP)">
        <t>
	  <xref target="RFC6887"/>PCP is a NAT
          traversal tool. It can also be used for network connectivity
          test if PCP is supported in the network. A common use case
          of PCP is to create a pinhole so that external users can
          visit the servers located behind a NAT.  The lifetime of the
          pinhole mapping is usually long, e.g., hours, and the
          lifetime will be refreshed periodically by the client before
          it is expired. For the purpose of network connectivity
          tests, a B4 element can create a mapping in the CGN via PCP,
          with a short life time, e.g., 10s of seconds, and keep on
          refreshing the mapping before it expires. If any refresh
          requests fail, the B4 element knows that something is wrong
          with the link or the PCP server or the CGN.
	</t>
        <t>
	  In order to detect the network connectivity of the DS-Lite
          tunnel, the encapsulation mode MUST be used for PCP: PCP
          packets are sent through the DS-Lite tunnel.
	</t>
        <t>
	  PCP can detect the failure of more components of the DS-Lite
          system. Besides failures of the link and the routing, it
          also covers NAT functions.
	</t>
      </section>

      <section title="ICMP Echo (Request) / Echo Reply (PING)">
        <t>
	  PING is commonly implemented using the Echo (Request) and
	  Echo Response messages of the Internet Control Message
	  Protocol (ICMP) <xref target="RFC0792"/>
	  <xref target="RFC4443"/>.  In case of DS-Lite, a B4 element
	  can send Echo (Request) packets to the AFTR periodically. If
	  the B4 element does not receive Echo Response packets for a
	  certain number (e.g., 3) of Echo (Request) packets, then the
	  B4 element decides that a fault has been detected.
	</t>
        <t>
	  In order to test the connectivity of DS-Lite tunnel, Echo
          (Request) packets MUST be sent using ICMPv4, rather than
          ICMPv6.
	</t>
	<t>
	  Since ICMP is an integral part of any IP implementation, the
	  usage of PING to detect tunnel failures does not require any
	  special implementation efforts on the B4 elements. However,
	  on AFTRs that process ICMP messages in software rather than
	  in hardware, the usage of PING might lead to scalability
	  issues.
	</t>
      </section>
      
      <section title="Comparison of Different Solutions">
      
      <figure title="Comparison of different solutions"
                  anchor="Comparison"
                  align="center">
            <artwork>
      
+--------+-------------+------+-------------------+-------------+
|        |             |Packet|Additional         |Configuration|
|        |Availablility|format|functionality      |/provisioning|
|        |             |      |ontop of keepalives|   overheads |
+--------+-------------+------+-------------------+-------------+
|  BFD   |Widely used/ |      |                   |             |
|        |network side,|Simple|Bidirectional      |             |
|        |less used/   |fixed |status             |             |
|        |terminal side|      |synchronization    |             |
+--------+-------------+------+-------------------+ Similar     |
| PCP    |Less than    |      |No bidirectional   |             |
|        |BFD/ICMP     |Vari- |     detection     |             |
+--------+-------------+able  +-------------------+             |
| ICMP   |Ubiquitous   |      |Network/CGN        |             |
|        |             |      |initiated detection|             |
+--------+-------------+------+-------------------+-------------+
      </artwork>
          </figure>
      
      <t> <xref target="Comparison"></xref> gives a direct comparison
       among different solutions. Compared to other solutions, BFD 
       has a simple and fixed packet format, which is easy to implement 
       by logic devices (e.g., ASIC, FPGA). Complicated protocols are 
       usually processed by software which is relatively slow. 
       ICMP is widely used than PCP/BFD, while BFD is more widely used 
       in the router and CGN side than in the terminal side. However, 
       from the aspect of failure detection, BFD has explicit capability 
       of bidirectional status synchronization to guarantee the 
       consistency of the failure status of both sides. ICMP could 
       actively initiate status detection from the network side or CGN 
       side, while PCP could not. PCP has no capability of bidirectional 
       detection. Considering the configuration/provisioning overheads, 
       since there is normally TR-069 server at the network management 
       side. So it is similar for each approach.</t>
       
       <t>From the above comparison, BFD is selected as the failure 
       detection approach in this document. </t>
          
      </section>

    </section>
    
    <section title="State Synchronization and Session Re-establishment">
      <t>There should be a state sync mechanism between active AFTR and 
      backup AFTR, to synchronize the state of each user between the two 
      AFTRs. This mechanism is to guarantee that the traffic returning 
      to the B4 is from the backup AFTR, if the service is shifted to that 
      AFTR. The BFD link for both active AFTR and backup AFTR should be 
      set up in the initial state. When the active AFTR is detected in 
      failure, the service will be shifted to the backup AFTR. If the 
      backup AFTR is detected in failure, it will notify the network 
      management server to fix the failure.</t>
      
      <t>In the hot-standby case, the master AFTR and the backup AFTR 
      will synchronize and backup the session. So there is no need 
      to re-establish the TCP session in the event of an AFTR failure. But 
      in the cold-standby case, if there is an active TCP session through 
      the CGN function of an AFTR, and this AFTR fails, then the TCP session 
      will need to be re-established by the client becasue only the capability 
      is reserved but the session is not backup.</t> 
    </section>

    <section anchor="IANA" title="IANA Considerations">
      <t>
	This memo includes no request to IANA.
      </t>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>
	In the <xref target="RFC6333">DS-Lite</xref> application, the
	B4 element may not be directly connected to the AFTR; there
	may be other routers between them. In such a deployment, there
	are potential spoofing problems, as described in
	<xref target="RFC5883"/>.  Hence cryptographic authentication
	SHOULD be used with BFD as described in
	<xref target="RFC5880"/> if security is concerned.
      </t>
    </section>

    <section title="Acknowledgements">
      <t>
	The authors would like to thank Ian Farrer for his
	valuable comments.
      </t>
    </section>
  </middle>

  <!--  *****BACK MATTER ***** -->

  <back>
    <references title="Normative References">
      &RFC0792;
      &RFC2119;
      &RFC4443;
      &RFC5880;
      &RFC5881;
      &RFC5882;
      &RFC5883;
      &RFC6333;
      &RFC6334;
      &RFC6887;


      <reference anchor="WT-146">
        <front>
          <title>WT-146 Subscriber Sessions (work in progress)</title>
          <author initials="A." surname="Kavanagh" fullname="Alan Kavanagh">
            <organization>Ericsson</organization>
          </author> 
          <author initials="F." surname="Klamm" fullname="Frederic Klamm">
            <organization>Apple Inc.</organization>
          </author>
          <author initials="W." surname="Boucadair" fullname="Wojciech Dec">
            <organization>France Telecom</organization>
          </author>
          <author initials="R." surname="Dec" fullname="Reinaldo Penno">
            <organization>Cisco Systems</organization>
          </author>
          <date month="Apr" year="2012" />
        </front>
      </reference>

    </references>

    <references title="Informative References">

      <reference anchor="I-D.vinokour-bfd-dhcp">
        <front>
          <title>Configuring BFD with DHCP and Other Musings</title>
          <author initials="V." surname="Vinokour" fullname="Vitali Vinokour">
            <organization>Cisco Systems</organization>
          </author>
          <date month="May" year="2008" />
        </front>
      </reference>

      </references>

  </back>
</rfc>

PAFTECH AB 2003-20262026-04-24 02:40:52