One document matched: draft-ietf-tcpm-tcp-lcd-03.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
     which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
     There has to be one entity for each item to be referenced.
     An alternate method (rfc include) is described in the references. -->
<!ENTITY rfc0791 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0791.xml">
<!ENTITY rfc0792 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0792.xml">
<!ENTITY rfc0793 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0793.xml">
<!ENTITY rfc0826 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0826.xml">
<!ENTITY rfc1122 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1122.xml">
<!ENTITY rfc1323 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1323.xml">
<!ENTITY rfc1812 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1812.xml">
<!ENTITY rfc2003 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2003.xml">
<!ENTITY rfc2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY rfc2460 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2460.xml">
<!ENTITY rfc2629 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY rfc2784 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2784.xml">
<!ENTITY rfc2988 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2988.xml">
<!ENTITY rfc3522 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3522.xml">
<!ENTITY rfc3168 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3168.xml">
<!ENTITY rfc3782 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3782.xml">
<!ENTITY rfc3819 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3819.xml">
<!ENTITY rfc4015 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4015.xml">
<!ENTITY rfc4301 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4301.xml">
<!ENTITY rfc4443 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4443.xml">
<!ENTITY rfc5461 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5461.xml">
<!ENTITY rfc5681 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5681.xml">
<!ENTITY rfc5682 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5682.xml">
<!ENTITY rfc5927 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5927.xml">
<!ENTITY retransmit-now SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.eggert-tcpm-tcp-retransmit-now.xml">
<!ENTITY tcp-rlci SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.schuetz-tcpm-tcp-rlci.xml">
<!ENTITY linkup SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.dawkins-trigtran-linkup.xml">
]>

<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
     please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds
     might want to use. (Here they are set differently than their defaults in
     xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="3"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
     (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->

<rfc category="exp" docName="draft-ietf-tcpm-tcp-lcd-03" ipr="trust200902">
<!-- category values: std, bcp, info, exp, and historic
     ipr values: full3667, noModification3667, noDerivatives3667
     you can add the attributes updates="NNNN" and obsoletes="NNNN"
     they will automatically be output with "(if approved)" -->

    <!-- ***** FRONT MATTER ***** -->
    <front>

        <!-- The abbreviated title is used in the page header - it is only
             necessary if the full title is longer than 39 characters -->
        <title abbrev="Making TCP more Robust to LCDs">
        Making TCP more Robust to Long Connectivity Disruptions (TCP-LCD)</title>

        <!-- add 'role="editor"' below for the editors if appropriate -->
        <author initials="A.Z."
                surname="Zimmermann"
                fullname="Alexander Zimmermann">
            <organization>RWTH Aachen University</organization>
            <address>
                <postal>
                    <street>Ahornstrasse 55</street>
                    <city>Aachen</city>
                    <region></region>
                    <code>52074</code>
                    <country>Germany</country>
                </postal>
                <phone>+49 241 80 21422</phone>
                <email>zimmermann@cs.rwth-aachen.de</email>
                <!-- uri and facsimile elements may also be added -->
            </address>
        </author>

        <author initials="A.H."
                surname="Hannemann"
                fullname="Arnd Hannemann">
            <organization>RWTH Aachen University</organization>
            <address>
                <postal>
                    <street>Ahornstrasse 55</street>
                    <city>Aachen</city>
                    <region></region>
                    <code>52074</code>
                    <country>Germany</country>
                </postal>
                <phone>+49 241 80 21423</phone>
                <email>hannemann@nets.rwth-aachen.de</email>
                <!-- uri and facsimile elements may also be added -->
            </address>
        </author>

        <date year="2010" />
        <!-- If the month and year are both specified and are the current ones,
             xml2rfc will fill in the current day for you. If only the current
             year is specified, xml2rfc will fill in the current day and month
             for you. If the year is not the current one, it is necessary to
             specify at least a month (xml2rfc assumes day="1" if not specified
             for the purpose of calculating the expiry date). With drafts it is
             normally sufficient to specify just the year. -->

        <!-- Meta-data Declarations -->

        <area>General</area>

        <workgroup>TCP Maintenance and Minor Extensions (TCPM) WG</workgroup>
        <!-- WG name at the upperleft corner of the doc, IETF is fine for
             individual submissions. If this element is not present, the default
             is "Network Working Group", which is used by the RFC Editor as a
             nod to the history of the IETF. -->

        <keyword>Transmission Control Protocol (TCP),
        Internet Control Message Protocol (ICMP), Long Connectivity
        Disruption (LCD)</keyword>
        <!-- Keywords will be incorporated into HTML output
             files in a meta tag but they have no effect on text or nroff
             output. If you submit your draft to the RFC Editor, the
             keywords will be used for the search engine. -->

        <abstract>
            <t>Disruptions in end-to-end path connectivity, which last longer
            than one retransmission timeout, cause suboptimal TCP performance.
            The reason for this performance degradation is that TCP interprets
            segment loss induced by long connectivity disruptions as a sign of
            congestion, resulting in repeated retransmission timer backoffs.
            This, in turn, leads to a delayed detection of the re-establishment
            of the connection since TCP waits for the next retransmission
            timeout before it attempts a retransmission.</t>

            <t>This document proposes an algorithm to make TCP more
            robust to long connectivity disruptions (TCP-LCD). It describes how
            standard ICMP messages can be exploited during timeout-based loss
            recovery to disambiguate true congestion loss from non-congestion
            loss caused by connectivity disruptions. Moreover, a reversion
            strategy of the retransmission timer is specified that enables a
            more prompt detection of whether or not the connectivity to a
            previously disconnected peer node has been restored. TCP-LCD is a
            TCP sender-only modification that effectively improves TCP
            performance in case of connectivity disruptions.</t>
        </abstract>

    </front>

    <!--  ***** MAIN MATTER ***** -->
    <middle>

        <!-- ***** Section: Terminology ***** -->
        <section anchor="terminology" title="Terminology">
            <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
            "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and
            "OPTIONAL" in this document are to be interpreted as described in
            <xref target="RFC2119" />.</t>

            <t>The reader should be familiar with the algorithm and terminology
            from <xref target='RFC2988' />, which defines the standard
            algorithm Transmission Control Protocol (TCP) senders are required
            to use to compute and manage their retransmission timer. In this
            document, the terms "retransmission timer" and "retransmission
            timeout" are used as defined in <xref target='RFC2988' />. The
            retransmission timer ensures data delivery in the absence of any
            feedback from the receiver.  The duration of this timer is referred
            to as retransmission timeout (RTO).</t>

            <t>As defined in <xref target='RFC0793' />, the term "acceptable
            acknowledgment (ACK)" refers to a TCP segment that acknowledges
            previously unacknowledged data. The TCP sender state variable
            "SND.UNA" and the current segment variable "SEG.SEQ" are used as
            defined in <xref target='RFC0793' />. SND.UNA holds the segment
            sequence number of earliest segment that has not been acknowledged
            by the TCP receiver (the oldest outstanding segment). SEG.SEQ is
            the segment sequence number of a given segment.</t>

            <t>For the purposes of this specification, we define the term
            "timeout-based loss recovery" that refers to the state that a TCP
            sender enters upon the first timeout of the oldest outstanding
            segment (SND.UNA) and leaves upon the arrival of the *first*
            acceptable ACK. It is important to note that other documents use a
            different interpretation of the term "timeout-based loss recovery".
            For example, the NewReno modification to TCP's Fast Recovery
            algorithm <xref target='RFC3782' /> extents the period a TCP sender
            remains in timeout-based loss recovery compared to the one defined
            in this document. This is because <xref target='RFC3782' />
            attempts to avoid unnecessary multiple Fast Retransmits that can
            occur after an RTO.</t>
       </section>

        <!-- ***** Section: Introduction ***** -->
        <section anchor="intro" title="Introduction">
            <t>Connectivity disruptions can occur in many different situations.
            The frequency of connectivity disruptions depends on the properties
            of the end-to-end path between the communicating hosts. While
            connectivity disruptions can occur in traditional wired networks,
            e.g., caused by an unplugged network cable, the likelihood of
            their occurrence is significantly higher in wireless (multi-hop)
            networks. Especially, end-host mobility, network topology changes,
            and wireless interferences are crucial factors. In the case of the
            Transmission Control Protocol (TCP) <xref target='RFC0793' />, the
            performance of the connection can experience a significant
            reduction compared to a permanently connected path
            <xref target='SESB05' />. This is because TCP, which was originally
            designed to operate in fixed and wired networks, generally assumes
            that the end-to-end path connectivity is relatively stable over the
            connection's lifetime.</t>

            <t>Depending on their duration, connectivity disruptions can be
            classified into two groups
            <xref target='I-D.schuetz-tcpm-tcp-rlci' />: "short" and "long". A
            connectivity disruption is "short" if connectivity returns before
            the retransmission timer fires for the first time. In this case,
            TCP recovers lost data segments through Fast Retransmit and lost
            acknowledgments (ACK) through successfully delivered later ACKs.
            Connectivity disruptions are declared as "long" for a given TCP
            connection if the retransmission timer fires at least once before
            connectivity is resumed. Whether or not path characteristics, like
            the round trip time (RTT) or the available bandwidth, have changed
            when connectivity resumes after a disruption is another important
            aspect for TCP's retransmission scheme
            <xref target='I-D.schuetz-tcpm-tcp-rlci'/>.</t>

            <t>The algorithm specified in this document improves TCP's behavior
            in case of "long connectivity disruptions". In particular, it
            focuses on the period prior to the re-establishment of the
            connectivity to a previously disconnected peer node. The document
            does not describe any modifications to TCP's behavior and its
            congestion control mechanisms <xref target='RFC5681' /> after
            connectivity has been restored.</t>

            <t>When a long connectivity disruption occurs on a TCP connection,
            the TCP sender eventually does not receive any more
            acknowledgments. After the retransmission timer expires, the TCP
            sender enters the timeout-based loss recovery and declares the
            oldest outstanding segment (SND.UNA) as lost. Since TCP tightly
            couples reliability and congestion control, the retransmission of
            SND.UNA is triggered together with the reduction of the
            transmission rate. This is based on the assumption that segment
            loss is an indication of congestion <xref target='RFC5681' />. As
            long as the connectivity disruption persists, TCP will repeat this
            procedure until the oldest outstanding segment has successfully
            been acknowledged, or until the connection has timed out. TCP
            implementations that follow the recommended retransmission timeout
            (RTO) management of <xref target='RFC2988'> RFC 2988</xref>
            double the RTO after each retransmission attempt. However, the
            RTO growth may be bounded by an upper limit, the maximum RTO,
            which is at least 60s, but may be longer: Linux, for example, uses
            120s. If connectivity is restored between two retransmission
            attempts, TCP still has to wait until the retransmission timer
            expires before resuming transmission, since it simply does not have
            any means to know if the connectivity has been re-established.
            Therefore, depending on when connectivity becomes available again,
            this can waste up to a maximum RTO of possible transmission
            time.</t>

            <t>This retransmission behavior is not efficient, especially in
            scenarios with long connectivity disruptions. In the ideal case,
            TCP would attempt a retransmission as soon as connectivity to its
            peer has been re-established. In this document, we specify a TCP
            sender-only modification to provide robustness to long connectivity
            disruptions (TCP-LCD). The memo describes how the standard Internet
            Control Message Protocol (ICMP) can be exploited during
            timeout-based loss recovery to identify non-congestion loss caused
            by long connectivity disruptions. TCP-LCD's reversion strategy of
            the retransmission timer enables higher-frequency retransmissions
            and thereby a prompt detection when connectivity to a previously
            disconnected peer node has been restored. If no congestion is
            present, TCP-LCD approaches the ideal behavior.</t>

            <t>Experimental results of a Linux implementation of TCP-LCD have
            been presented in <xref target="ZimHan09" />. The implementation
            has been incorporated into mainline Linux, and is already used
            within the Internet. Thus far, no negative experiences have been
            reported that could be attributed to the algorithm. However, we
            consider TCP-LCD as experimental until more real-life results have
            been obtained.  Nevertheless, we encourage implementation of
            TCP-LCD under other operating systems to provide for broader
            testing and experimentation opportunities.</t>
    </section>

        <!-- ***** Section: Connectivity Disruption Indication ***** -->
        <section anchor="cdi" title="Connectivity Disruption Indication">
            <t>If the queue of an intermediate router that is experiencing a link
            outage can buffer all incoming packets, a connectivity disruption
            will only cause a variation in delay, which is handled well by TCP
            implementations using either Eifel <xref target='RFC3522' />,
            <xref target='RFC4015' /> or Forward RTO-Recovery (F-RTO)
            <xref target='RFC5682' />. However, if the link outage lasts for
            too long, the router experiencing the link outage is forced to drop
            packets, and finally to discard the according route. Means to
            detect such link outages include reacting on failed address
            resolution protocol (ARP) <xref target='RFC0826' /> queries,
            unsuccessful link sensing, and the like. However, this is solely in
            the responsibility of the respective router.

                <list style="empty">
                    <t>Note: The focus of this memo is on introducing a method
                    how ICMP messages may be exploited to improve TCP's
                    performance; how different physical and link layer
                    mechanisms below the network layer may trigger ICMP
                    destination unreachable messages are out of scope of this
                    memo.</t>
                </list>
            </t>

            <t>Provided that no other route to the specific destination exists,
            an Internet Protocol version 4 (IPv4) <xref target='RFC0791' />
            router will notify the corresponding sending host about the dropped
            packets via ICMP destination unreachable messages of code 0 (net
            unreachable) or code 1 (host unreachable) <xref target='RFC1812' />.
            Therefore, the sending host can use the ICMP destination
            unreachable messages of these codes as an indication for a
            connectivity disruption, since the reception of these messages
            provide evidence that packets were dropped due to a link
            outage.</t>

            <t>For Internet Protocol version 6 (IPv6) <xref target='RFC2460' />,
            the counterpart of the ICMP destination unreachable message of code
            0 (net unreachable) and of code 1 (host unreachable) is the ICMPv6
            destination unreachable message of code 0 (no route to destination)
            <xref target='RFC4443' />. As with IPv4, a router should generate
            an ICMPv6 destination unreachable message of code 0 in response to
            a packet that cannot be delivered to its destination address
            because it lacks a matching entry in its routing table.</t>

            <t>Note that there are also other ICMP and ICMPv6 destination
            unreachable messages with different codes. Some of them are
            candidates for connectivity disruption indications, too, but need
            further investigation. For example, ICMP destination unreachable
            messages with code 5 (source route failed), code 11 (net
            unreachable for TOS), or code 12 (host unreachable for TOS) <xref
            target='RFC1812' />.
            On the other hand, codes that flag hard errors
            are of no use for this scheme, since TCP should abort the
            connection when those are received <xref target='RFC1122' />.</t>

            <t>For the sake of simplicity, we will use, unless explicitly
            qualified with ICMPv4 or ICMPv6, the term "ICMP unreachable
            message" as synonym for ICMP destination unreachable messages of
            code 0 or code 1 and ICMPv6 destination unreachable of code 0. This
            implies that all keywords from <xref target='RFC2119' /> that deal
            with the handling of received ICMP messages apply in the same way
            to ICMPv6 messages.</t>

            <t>The accurate interpretation of ICMP unreachable messages as a
            connectivity disruption indication is complicated by the following
            two peculiarities of ICMP messages. First, they do not
            necessarily operate on the same timescale as the packets, i.e., TCP
            segments that elicited them. When a router drops a packet due to a
            missing route, it will not necessarily send an ICMP unreachable
            message immediately, but will rather queue it for later delivery.
            Second, ICMP messages are subject to rate limiting, e.g., when a
            router drops a whole window of data due to a link outage, it is
            unlikely to send as many ICMP unreachable messages as dropped TCP
            segments. Depending on the load of the router, it may not even send
            any ICMP unreachable messages at all. Both peculiarities originate
            from <xref target='RFC1812' /> for ICMPv4 and <xref target='RFC4443' />
            for ICMPv6.</t>

            <t>Fortunately, according to <xref target='RFC0792' />, ICMPv4
            unreachable messages have to contain in their body the entire
            IPv4 header <xref target='RFC0791' /> of the
            datagram eliciting the ICMPv4 unreachable message, plus the first 64
            bits of the payload of that datagram. This allows the sending host
            to match the ICMPv4 error message to the transport connection that
            elicited it. <xref target='RFC1812'>RFC 1812</xref> augments these
            requirements and states that ICMPv4 messages should contain as much
            of the original datagram as possible without the length of the ICMPv4
            datagram exceeding 576 bytes. Therefore, in case of TCP, at least
            the source port number, the destination port number, and the 32-bit
            TCP sequence number are included. This allows the originating TCP
            to demultiplex the received ICMPv4 message and to identify the
            affected connection. Moreover, it can identify which segment of the
            respective connection triggered the ICMPv4 unreachable message,
            unless there are several segments in-flight with the same sequence
            number (see <xref target='discuss_retrans_ambiguity' />).</t>

            <t>For IPv6 <xref target='RFC2460' />, the payload of an ICMPv6
            error messages has to include as many bytes as possible from the
            IPv6 datagram that elicited the ICMPv6 error message, without
            making the error message exceed the minimum IPv6 MTU (1280 bytes)
            <xref target='RFC4443' />. Thus, enough information is available
            to identify both, the affected connection and the corresponding
            segment that triggered the ICMPv6 error message.</t>

            <t>A connectivity disruption indication in form of an ICMP
            unreachable message associated with a presumably lost TCP segment
            provides strong evidence that the segment was not dropped due to
            congestion, but was successfully delivered as far as the reporting
            router. It therefore did not witness any congestion at least on
            that part of the path that was traversed by both the TCP segment
            eliciting the ICMP unreachable message as well as the ICMP
            unreachable message itself.</t>
        </section>

        <!-- ***** Section: Connectivity Disruption Reaction ***** -->
        <section anchor="cdr" title="Connectivity Disruption Reaction">
            <t><xref target='alg_idea' /> introduces the basic idea of TCP-LCD. The
            complete algorithm is specified in <xref target='alg' />.</t>

            <!-- ***** Subsection: Basic Idea ***** -->
            <section anchor="alg_idea" title="Basic Idea">
                <t>The goal of the algorithm is to promptly detect when
                connectivity to a previously disconnected peer node has been
                restored after a long connectivity disruption, while retaining
                appropriate behavior in case of congestion. TCP-LCD exploits
                standard ICMP unreachable messages during timeout-based loss
                recovery. This increases TCP's retransmission frequency by
                undoing one retransmission timer backoff whenever an ICMP
                unreachable message is received that contains a segment with
                a sequence number of a presumably lost retransmission.</t>

                <t>This approach has the advantage of appropriately reducing the
                probing rate in case of congestion. If either the
                retransmission itself or the corresponding ICMP message is
                dropped the previously performed retransmission timer backoff
                is not undone, which effectively halves the probing rate.</t>
            </section>

            <!-- ***** Subsection: Algorithm Details ***** -->
            <section anchor="alg" title="Algorithm Details">
                <t>A TCP sender that uses <xref target='RFC2988'>
                RFC 2988</xref> to compute TCP's retransmission timer MAY
                employ the following scheme to avoid over-conservative
                retransmission timer backoffs in case of long connectivity
                disruptions. If a TCP sender does implement the following
                steps, the algorithm MUST be initiated upon the first timeout
                of the oldest outstanding segment (SND.UNA) and MUST be stopped
                upon the arrival of the first acceptable ACK. The algorithm
                MUST NOT be re-initiated upon subsequent timeouts for the same
                segment. The scheme SHOULD NOT be used in SYN-SENT or
                SYN-RECEIVED states <xref target='RFC0793' /> (see
                <xref target="discuss_syn" />).</t>

                <t>A TCP sender that does not employ
                <xref target='RFC2988'>RFC 2988</xref> to compute TCP's
                retransmission timer MUST NOT use TCP-LCD. We envision that
                the scheme could be easily adapted to algorithms others than
                RFC 2988. However, we leave this as future work.</t>

                <t>In rule (2.5), <xref target='RFC2988'>RFC 2988</xref>
                provides the option to place a maximum value on the RTO. When a
                TCP implements this rule to provide an upper bound for the RTO,
                it MUST also be used in the following algorithm. In
                particular, if the RTO is bounded by an upper limit (maximum
                RTO), the "MAX_RTO" variable used in this scheme MUST be
                initialized with this upper limit. Otherwise, if the RTO is
                unbounded, the "MAX_RTO" variable MUST be set to
                infinity.</t>

                <t>The scheme specified in this document uses the "BACKOFF_CNT"
                variable, whose initial value is zero. The variable is used to
                count the number of performed retransmission timer backoffs
                during one timeout-based loss recovery. Moreover, the
                "RTO_BASE" variable is used to recover the previous RTO if the
                retransmission timer backoff was unnecessary. The variable is
                initialized with the RTO upon initiation of timeout-based loss
                recovery.</t>

                <t>
                    <list style='format (%d)' counter="cnt">
                        <t>Before TCP updates the variable "RTO" when it
                        initiates timeout-based loss recovery, set the variables
                        "BACKOFF_CNT" and "RTO_BASE" as follows:
                            <list style='empty'>
                                <t>BACKOFF_CNT := 0;</t>
                                <?rfc subcompact='yes' ?>
                                <t>RTO_BASE := RTO.</t>
                                <?rfc subcompact='no' ?>
                            </list>
                        Proceed to step (R).</t>
                    </list>

                    <list style='hanging' hangIndent='5'>
                        <t hangText="(R)">This is a placeholder for standard TCP's
                        behavior in case the retransmission timer has expired.
                        In particular, if
                        <xref target='RFC2988'>RFC 2988</xref> is used,
                        steps (5.4) - (5.6) of that algorithm go here. Proceed
                        to step (2).</t>
                    </list>

                    <list style='format (%d)' counter="cnt">
                        <t>To account for the expiration of the retransmission
                        timer in the previous step (R), increment the
                        "BACKOFF_CNT" variable by one:
                            <list style='empty'>
                                <t>BACKOFF_CNT := BACKOFF_CNT + 1.</t>
                            </list>
                        </t>

                        <t>Wait either
                            <list style='empty'>
                                <t>for the expiration of the retransmission
                                timer. When the retransmission timer expires,
                                proceed to step (R);</t>

                                <t>or for the arrival of an acceptable ACK. When
                                an acceptable ACK arrives, proceed to step (A);
                                </t>

                                <t>or for the arrival of an ICMP unreachable
                                message. When the ICMP unreachable message
                                "ICMP_DU" arrives, proceed to step (4).</t>
                           </list>
                        </t>
                    </list>

                    <list style='format (%d)' counter="cnt">
                        <t>If "BACKOFF_CNT > 0", i.e., if at least one
                        retransmission timer backoff can be undone, then
                            <list style='empty'>
                                <t>proceed to step (5);</t>
                            </list>
                        else
                            <list style='empty'>
                                <t>proceed to step (3).</t>
                            </list>
                        </t>

                        <t>Extract the TCP segment header included in the ICMP
                        unreachable message "ICMP_DU":
                            <list style='empty'>
                                <t>SEG := Extract(ICMP_DU).</t>
                            </list>
                        </t>

                        <t>If "SEG.SEQ == SND.UNA", i.e., if the TCP segment
                        "SEG" eliciting the ICMP unreachable message "ICMP_DU"
                        contains the sequence number of a retransmission, then
                            <list style='empty'>
                                <t>proceed to step (7);</t>
                            </list>
                        else
                            <list style='empty'>
                                <t>proceed to step (3).</t>
                            </list>
                        </t>

                        <t>Undo the last retransmission timer backoff:
                            <list style='empty'>
                                <t>BACKOFF_CNT := BACKOFF_CNT - 1;</t>
                                <?rfc subcompact='yes' ?>
                                <t>RTO := min(RTO_BASE * 2^(BACKOFF_CNT), MAX_RTO).</t>
                                <?rfc subcompact='no' ?>
                            </list>
                        </t>

                        <t>If the retransmission timer expires due to the undoing
                        in the previous step (7), then
                            <list style='empty'>
                                <t>proceed to step (R);</t>
                            </list>
                        else
                            <list style='empty'>
                                <t>proceed to step (3).</t>
                            </list>
                        </t>
                    </list>

                    <list style='hanging' hangIndent='5'>
                        <t hangText="(A)">This is a placeholder for standard
                        TCP's behavior in case an acceptable ACK has arrived.
                        No further processing.</t>
                    </list>
                </t>

                <t>When a TCP in steady-state detects a segment loss using the
                retransmission timer, it enters the timeout-based loss recovery
                and initiates the algorithm (step 1). It adjusts the slow start
                threshold (ssthresh), sets the congestion window (CWND) to one
                segment, backs off the retransmission timer, and retransmits
                the first unacknowledged segment (step R)
                <xref target='RFC5681' />, <xref target='RFC2988' />. To account
                for the expiration of the retransmission timer, the TCP sender
                increments the "BACKOFF_CNT" variable by one (step 2).</t>

                <t>In case the retransmission timer expires again (step 3a), a
                TCP will repeat the retransmission of the first unacknowledged
                segment and back off the retransmission timer once more (step
                R) <xref target='RFC2988' />, as well as increment the
                "BACKOFF_CNT" variable by one (step 2). Note that a TCP may
                implement <xref target='RFC2988'>RFC 2988's</xref> option
                to place a maximum value on the RTO that may result in not
                performing the retransmission timer backoff. However, step (2)
                MUST always and unconditionally be applied, no matter whether
                or not the retransmission timer is actually backed off. In
                other words, each time the retransmission timer expires, the
                "BACKOFF_CNT" variable MUST be incremented by one.</t>

                <t>If the first received packet after the retransmission(s) is
                an acceptable ACK (step 3b), a TCP will proceed as normal,
                i.e., slow start the connection and terminate the algorithm
                (step A). Later ICMP unreachable messages from the just
                terminated timeout-based loss recovery are ignored, since the
                ACK clock is already restarting due to the successful
                retransmission.</t>

                <t>On the other hand, if the first received packet after the
                retransmission(s) is an ICMP unreachable message (step 3c), and
                if step (4) permits it, TCP SHOULD undo one backoff for each
                ICMP unreachable message reporting an error on a
                retransmission. To decide if an ICMP unreachable message was
                elicited by a retransmission, the sequence number it contains
                is inspected (step 5, step 6). The undo is performed by
                re-calculating the RTO with the decremented "BACKOFF_CNT"
                variable (step 7). This calculation explicitly matches the
                (bounded) exponential backoff specified in rule (5.5) of
                <xref target='RFC2988' />.</t>

                <t>Upon receipt of an ICMP unreachable message that legitimately
                undoes one backoff, there is the possibility that the shortened
                retransmission timer has already expired (step 8). Then, TCP
                SHOULD retransmit immediately. In case the shortened
                retransmission timer has not yet expired, TCP MUST wait
                accordingly.</t>
            </section>
        </section>

        <!-- ***** Section: Discussion of TCP-LCD ***** -->
        <section anchor="discussion" title="Discussion of TCP-LCD">
            <t>TCP-LCD takes caution to only react to connectivity disruption
            indications in the form of ICMP unreachable messages during
            timeout-based loss recovery. Therefore, TCP's behavior is not
            altered when either no ICMP unreachable messages are received, or
            the retransmission timer of the TCP sender did not expire since the
            last received acceptable ACK. Thus, by definition, the algorithm
            triggers only in the case of long connectivity disruptions.</t>

            <t>Only such ICMP unreachable messages that contain a TCP segment
            with the sequence number of a retransmission, i.e., contain
            SND.UNA, are evaluated by TCP-LCD. All other ICMP unreachable
            messages are ignored. The arrival of those ICMP unreachable
            messages provides strong evidence that the retransmissions were not
            dropped due to congestion, but were successfully delivered to the
            reporting router. In other words, there is no evidence for any
            congestion at least on that very part of the path that was
            traversed by both the TCP segment eliciting the ICMP unreachable
            message as well as the ICMP unreachable message itself.</t>

            <t>However, there are some situations where TCP-LCD makes a false
            decision and incorrectly undoes a retransmission timer backoff. This
            can happen, even when the received ICMP unreachable message contains
            the segment number of a retransmission (SND.UNA), because the TCP
            segment that elicited the ICMP unreachable message may either not
            be a retransmission (<xref target='discuss_retrans_ambiguity' />),
            or does not belong to the current timeout-based loss recovery
            (<xref target='discuss_wrap_sequence_numbers' />). Finally, packet
            duplication (<xref target='discuss_packet_dup' />) can also
            spuriously trigger the algorithm.</t>

            <t><xref target='discuss_probing_frequency' /> discusses possible
            probing frequencies, while <xref target='discuss_steady-state' />
            describes the motivation for not reacting to ICMP unreachable
            messages while TCP is in steady-state.</t>

            <!-- ***** Subsection: Retransmission Ambiguity ***** -->
            <section anchor="discuss_retrans_ambiguity" title="Retransmission Ambiguity">
                <t>Historically, the retransmission ambiguity problem
                <xref target='Zh86' />, <xref target='KP87' /> is the TCP sender's
                inability to distinguish whether the first acceptable ACK after
                a retransmission refers to the original transmission or to the
                retransmission. This problem occurs after both a Fast
                Retransmit and a timeout-based retransmit. However, modern TCP
                implementations can eliminate the retransmission ambiguity with
                either the help of Eifel <xref target='RFC3522' />,
                <xref target='RFC4015' /> or Forward RTO-Recovery (F-RTO)
                <xref target='RFC5682' />.</t>

                <t>The reversion strategy of the given algorithm suffers from a
                form of retransmission ambiguity, too. In contrast to the above
                case, TCP suffers from ambiguity regarding ICMP unreachable
                messages received during timeout-based loss recovery. With the
                TCP segment number included in the ICMP unreachable message, a
                TCP sender is not able to determine if the ICMP unreachable
                message refers to the original transmission or to any of the
                timeout-based retransmissions. That is, there is an ambiguity
                with regards to which TCP segment an ICMP unreachable message
                reports on.</t>

                <t>However, this ambiguity is not considered to be a problem
                for the algorithm. The assumption that a received ICMP unreachable message
                provides evidence that a non-congestion loss caused by the
                connectivity disruption was wrongly considered a congestion
                loss still holds, regardless to which TCP segment, transmission
                or retransmission, the message refers.</t>
            </section>

            <!-- ***** Subsection: Wrapped Sequence Numbers ***** -->
            <section anchor="discuss_wrap_sequence_numbers" title="Wrapped Sequence Numbers">
                <t>Besides the ambiguity whether a received ICMP unreachable
                message refers to the original transmission or to any of the
                retransmissions, there is another source of ambiguity related
                to the TCP sequence numbers contained in ICMP unreachable
                messages. For high bandwidth paths, the sequence space may wrap
                quickly. This might cause that delayed ICMP unreachable
                messages may coincidentally fit as valid input in the proposed
                scheme. As a result, the scheme may incorrectly undo
                retransmission timer backoffs. Chances for this to happen are
                minuscule, since a particular ICMP unreachable message would need to
                contain the exact sequence number of the current
                oldest outstanding segment (SND.UNA), while at the same time
                TCP is in timeout-based loss recovery. However, two "worst
                case" scenarios for the algorithm are possible:</t>

                <t>For instance, consider a steady state TCP connection, which
                will be disrupted at an intermediate router due to a link
                outage. Upon the expiration of the RTO, the TCP sender enters
                the timeout-based loss recovery and starts to retransmit the
                earliest segment that has not been acknowledged (SND.UNA). For
                some reason, the router delays all corresponding ICMP unreachable
                messages so that the TCP sender backs the retransmission timer
                off normally without any undoing. At the end of the
                connectivity disruption, the TCP sender eventually detects the
                re-establishment, leaves the scheme and finally the
                timeout-based loss recovery, too. A sequence number wrap-around
                later, the connectivity between the two peers is disrupted
                again, but this time due to congestion and exactly at the time
                at which the current SND.UNA matches the SND.UNA from the
                previous cycle. If the router emits the delayed ICMP unreachable
                messages now, the TCP sender would incorrectly undo
                retransmission timer backoffs. As the TCP sequence number
                contains 32 bits, the probability of this scenario is at most
                1/2^32. Given sufficiently many retransmissions in the first
                timeout-based loss recovery, the corresponding ICMP unreachable
                messages could reduce the RTO in the second recovery at most to
                "RTO_BASE". However, once the ICMP unreachable messages are
                depleted, the standard exponential backoff will be performed.
                Thus, the congestion response will only be delayed by some
                false retransmissions.</t>

                <t>Similar to the above, consider the case where a steady state
                TCP connection with n segments in flight will be disrupted at
                some point due to a link outage at an intermediate router.
                For each segment in flight, the router may generate an ICMP
                unreachable message. However, due to some reason it delays
                them.  Once the link outage is over and the connection has been
                re-established, the TCP sender leaves the scheme and
                slow-starts the connection. Following a sequence number
                wrap-around, a retransmission timeout occurs, just at the
                moment the TCP sender's current window of data reaches the
                previous range of the sequence number space again.  In case
                the router emits the delayed ICMP unreachable messages now,
                spurious undoing of the retransmission timer backoff is
                possible once, if the TCP segment number contained in ICMP
                unreachable messages matches the current SND.UNA, and the
                timeout was a result of congestion. In the case of another
                connectivity disruption, the additional undoing of the
                retransmission timer backoff has no impact. The probability of
                this scenario is at most n/2^32.</t>
            </section>

            <!-- ***** Subsection: Packet Duplication ***** -->
            <section anchor="discuss_packet_dup" title="Packet Duplication">
                <t>In case an intermediate router duplicates packets, a TCP
                sender may receive more ICMP unreachable messages during
                timeout-based loss recovery than sent timeout-based
                retransmissions. However, since TCP-LCD keeps track of the
                number of performed retransmission timer backoffs in the
                "BACKOFF_CNT" variable, it will not undo more retransmission
                timer backoffs than were actually performed.  Nevertheless, if
                packet duplication and congestion coincide on the path between
                the two communicating hosts, duplicated ICMP unreachable messages could
                hide the congestion loss of some retransmissions or ICMP unreachable
                messages, and the algorithm may incorrectly undo retransmission
                timer backoffs. Considering the overall impact of a router that
                duplicates packets, the additional load induced by some
                spurious timeout-based retransmits can probably be
                neglected.</t>
            </section>

            <!-- ***** Subsection: Probing frequency ***** -->
            <section anchor="discuss_probing_frequency" title="Probing Frequency">
                <t>One might argue that if an ICMP unreachable message arrives
                for a timeout-based retransmission, the RTO shall be reset or
                recalculated, similar to what is done when an ACK arrives
                during timeout-based loss recovery (see Karn's algorithm
                <xref target='KP87' />, <xref target="RFC2988" />), and a new
                retransmission should be sent immediately. Generally, this
                would result in a much higher probing frequency based on the
                round trip time to the router where connectivity has been
                disrupted. However, we believe the current scheme provides a
                good trade-off between conservative behavior and fast detection
                of connectivity re-establishment. TCP-LCD focuses on
                long-connectivity disruptions, i.e., on disruptions that last
                for several RTOs. Thus, a much higher probing frequency (less
                then once per RTO) would not significantly increase the
                available transmission time compared to the duration of the
                connectivity disruption.</t>
            </section>

            <!-- ***** Subsection: Reaction during Connection Establishment -->
            <section anchor="discuss_syn" title="Reaction during Connection Establishment">
                <t>It is possible that a TCP sender enters timeout-based loss
                recovery while the connection is in SYN-SENT or SYN-RECEIVED
                states <xref target='RFC0793' />. The algorithm described in
                this document could also be used for faster connection
                establishment in networks with connectivity disruptions.
                However, because existing TCP implementations
                <xref target='RFC5461' /> already interpret ICMP unreachable
                messages during connection establishment and abort the
                corresponding connection, we refrain from suggesting this.</t>
           </section>

            <!-- ***** Subsection: Reaction in Steady-State ***** -->
            <section anchor="discuss_steady-state" title="Reaction in Steady-State">
                <t>Another exploitation of ICMP unreachable messages in the
                context of TCP congestion control might seem appropriate,
                while TCP is in steady-state. As the RTT up to the router that
                generated the ICMP unreachable message is likely to be
                substantially shorter than the overall RTT to the destination,
                the ICMP unreachable message may very well reach the
                originating TCP while it is transmitting the current window of
                data. In case the remaining window is large, it might seem
                appropriate to refrain from transmitting the remaining window
                as there is timely evidence that it will only trigger further
                ICMP unreachable messages at the very router. Although this
                promises improvement from a wastage perspective, it may be
                counterproductive from a security perspective. An attacker
                could forge such ICMP messages, thereby forcing the originating
                TCP to stop sending data, very similar to the blind
                throughput-reduction attack mentioned in
                <xref target="RFC5927" />.</t>

                <t>An additional consideration is the following: in the presence
                of multi-path routing, even the receipt of a legitimate ICMP
                unreachable message cannot be exploited accurately, because
                there is the possibility that only one of the multiple paths to the
                destination is suffering from a connectivity disruption, which
                causes ICMP unreachable messages to be sent. Then, however,
                there is the possibility that the path along which the
                connectivity disruption occurred contributed considerably to
                the overall bandwidth, such that a congestion response is very
                well reasonable. However, this is not necessarily the case.
                Therefore, a TCP has no means except for its inherent
                congestion control to decide on this matter. All in all, it
                seems that for a connection in steady-state, i.e., not in
                timeout-based loss recovery, reacting on ICMP unreachable
                messages in regard to congestion control is not appropriate.
                For the case of timeout-based retransmissions, however, there
                is a reasonable congestion response, which is skipping further
                retransmission timer backoffs because there is no congestion
                indication - as described above.</t>
            </section>
        </section>

        <!-- ***** Section: Dissolving Ambiguity Issues using the TCP Timestamps Option ***** -->
        <section anchor="algo_save" title="Dissolving Ambiguity Issues using the TCP Timestamps Option">
            <t>If the TCP Timestamps option <xref target='RFC1323' />
            is enabled for a connection, a TCP sender SHOULD use the following
            algorithm to dissolve the ambiguity issues mentioned in Sections
            <xref target='discuss_retrans_ambiguity' format='counter' />,
            <xref target='discuss_wrap_sequence_numbers' format='counter' />,
            and <xref target='discuss_packet_dup' format='counter' />. In
            particular, both the retransmission ambiguity and the packet
            duplication problems are prevented by the following TCP-LCD
            variant. On the other hand, the false positives caused by wrapped
            sequence numbers cannot be completely avoided, but the likelihood
            is further reduced by a factor of 1/2^32 since the Timestamp Value
            field (TSval) of the TCP Timestamps Option contains 32 bits.</t>

            <t>Hence, implementers may choose to implement the TCP-LCD with the
            following modifications.</t>

            <t>Step (1) is replaced by step (1'):
                <list style="format (%d')" counter="cnt2">
                    <t>Before TCP updates the variable "RTO" when it initiates
                    timeout-based loss recovery, set the variables "BACKOFF_CNT"
                    and "RTO_BASE" and the data structure "RETRANS_TS" as follows:
                        <list style='empty'>
                            <t>BACKOFF_CNT := 0;</t>
                            <?rfc subcompact='yes' ?>
                            <t>RTO_BASE := RTO;</t>
                            <t>RETRANS_TS := [].</t>
                            <?rfc subcompact='no' ?>
                        </list>
                    Proceed to step (R).</t>
                </list>
            </t>

            <t>Step (2) is extended by step (2b):
                <list style="format (%db)" counter="cnt2">
                    <t>Store the value of the Timestamp Value field (TSval) of
                    the TCP Timestamps option included in the retransmission
                    "RET" sent in step (R) into the "RETRANS_TS" data structure:
                        <list style='empty'>
                            <t>RETRANS_TS.add(RET.TSval)</t>
                        </list>
                    </t>
                </list>
            </t>

            <t>Step (6) is replaced by step (6'):
                <list style='hanging' hangIndent='6'>
                    <t hangText="(6')"> If "SEG.SEQ == SND.UNA &&
                    RETRANS_TS.exists(SEQ.TSval)", i.e., if the TCP segment
                    "SEG" eliciting the ICMP unreachable message "ICMP_DU"
                    contains the sequence number of a retransmission, and the
                    value in its Timestamp Value field (TSval) is valid, then
                        <list style='empty'>
                            <t>proceed to step (7');</t>
                        </list>
                    else
                        <list style='empty'>
                            <t>proceed to step (3).</t>
                        </list>
                    </t>
                </list>
            </t>

            <t>Step (7) is replaced by step (7'):
                <list style='hanging' hangIndent='6'>
                    <t hangText="(7')">Undo the last retransmission timer backoff:
                        <list style='empty'>
                            <t>RETRANS_TS.remove(SEQ.TSval);</t>
                            <?rfc subcompact='yes' ?>
                            <t>BACKOFF_CNT := BACKOFF_CNT - 1;</t>
                            <t>RTO := min(RTO_BASE * 2^(BACKOFF_CNT), MAX_RTO).</t>
                            <?rfc subcompact='no' ?>
                        </list>
                    </t>
                </list>
            </t>

            <t>The downside of the this variant is twofold. First, the
            modifications come at a cost: the TCP sender is required to store
            the timestamps of all retransmissions sent during one timeout-based
            loss recovery. Second, this variant can only undo a retransmission
            timer backoff if the intermediate router experiencing the link
            outage implements <xref target='RFC1812' /> and chooses to include
            as many more than the first 64 bits of the payload of the
            triggering datagram, as are needed to include the TCP Timestamps
            option in the ICMP unreachable message.</t>
        </section>

        <!-- ***** Section: Interoperability Issues ***** -->
        <section anchor="interoperability" title="Interoperability Issues">
            <t>This section discusses interoperability issues related to
            introducing TCP-LCD.</t>

            <!-- ***** Subsection: TCP Connection Failures ***** -->
            <section anchor="interaction_rfc1122" title="Detection of TCP Connection Failures">
                <t>TCP-LCD may have side-effects on TCP implementations that
                attempt to detect TCP connection failures by counting
                timeout-based retransmissions. <xref target='RFC1122' />
                states in Section 4.2.3.5 that a TCP host must handle excessive
                retransmissions of data segments with two thresholds R1 and R2
                that measure the number of retransmissions that have occurred for
                the same segment. Both thresholds might either be measured in
                time units or as a count of retransmissions.</t>

                <t>Due to TCP-LCD's reversion strategy of the retransmission
                timer, the assumption that a certain number of retransmissions
                corresponds to a specific time interval no longer holds, as
                additional retransmissions may be performed during
                timeout-based-loss recovery to detect the end of the
                connectivity disruption. Therefore, a TCP employing TCP-LCD
                either MUST measure the thresholds R1 and R2 in time units
                or, in case R1 and R2 are counters of retransmissions, MUST
                convert them into time intervals, which correspond to the time
                an unmodified TCP would need to reach the specified number of
                retransmissions.</t>
           </section>

            <!-- ***** Subsection: Explicit Congestion Notification ***** -->
            <section anchor="interaction_ecn" title="Explicit Congestion Notification (ECN)">
                <t>With Explicit Congestion Notification (ECN)
                <xref target='RFC3168' />, ECN-capable routers are no longer
                limited to dropping packets to indicate congestion. Instead,
                they can set the Congestion Experienced (CE) codepoint in the
                IP header to indicate congestion. With TCP-LCD, it may happen
                that during a connectivity disruption, a received ICMP
                unreachable message has been elicited by a timeout-based
                retransmission that was marked with the CE codepoint before
                reaching the router experiencing the link outage. In such a
                case, a TCP sender MUST, corresponding to
                <xref target='RFC3168' /> (Section 6.1.2), additionally reset
                the retransmission timer in case the algorithm undoes a
                retransmission timer backoff.</t>
            </section>


            <!-- ***** Subsection: TCP-LCD and IP Tunnels  ***** -->
            <section anchor="interaction_tunnels" title="TCP-LCD and IP Tunnels">
                <t>It is worth noting that IP tunnels, including IPsec
                <xref target='RFC4301' />, IP in IP <xref target='RFC2003' />,
                Generic Routing Encapsulation (GRE) <xref target='RFC2784' />,
                and others are compatible with TCP-LCD, as long as the received
                ICMP unreachable messages can be demultiplexed and extracted
                appropriately by the TCP sender during timeout-based loss
                recovery.</t>

                <t>If, for example, end-to-end tunnels like IPsec in transport
                mode <xref target='RFC4301' /> are employed, a TCP sender may
                receive ICMP unreachable messages where additional steps, e.g.,
                decrypting in step (5) of the algorithm, are needed to extract
                the TCP header from these ICMP messages. Provided that the
                received ICMP unreachable message contains enough information,
                i.e., SEQ.SEG is extractable, this information can still be
                used as a valid input for the proposed algorithm.</t>

                <t>Likewise, if IP encapsulation like <xref target='RFC2003' />
                is used in some part of the path between the communicating
                hosts, the tunnel ingress node may receive the ICMP unreachable
                messages from an intermediate router experiencing the link
                outage. Nevertheless, the tunnel ingress node may replay the
                ICMP unreachable messages in order to inform the TCP sender. If
                enough information is preserved to extract SEQ.SEG, the
                replayed ICMP unreachable messages can still be used in
                TCP-LCD.</t>
            </section>
        </section>

        <!-- ***** Section: Experimental Results ***** -->
<!--
        <section anchor="evaluation" title="Experimental Results">

        </section>
-->

        <!-- ***** Section: Related Work ***** -->
        <section anchor="related_work" title="Related Work">
            <t>Several methods that address TCP's problems in the presence of
            connectivity disruptions have been proposed in literature. Some of
            them try to improve TCP's performance by modifying lower layers.
            For example, <xref target='SM03'/> introduces a "smart link layer",
            which buffers one segment for each active connection and replays
            these segments upon connectivity re-establishment. This approach
            has a serious drawback: previously stateless intermediate routers
            have to be modified in order to inspect TCP headers, to track the
            end-to-end connection, and to provide additional buffer space. This
            leads to an additional need of memory and processing power.</t>

            <t>On the other hand, stateless link layer schemes, as proposed in
            <xref target='RFC3819'/>, which unconditionally buffer some small
            number of packets may have another problem: if a packet is buffered
            longer than the maximum segment lifetime (MSL) of 2 min
            <xref target='RFC0793' />, i.e., the disconnection lasts longer than
            MSL, TCP's assumption that such segments will never be received
            will no longer be true, violating TCP's semantics
            <xref target='I-D.eggert-tcpm-tcp-retransmit-now' />.</t>

            <t>Other approaches, like TCP-F <xref target='CRVP01' /> or the
            Explicit Link Failure Notification (ELFN) <xref target='HV02' />
            inform a TCP sender about a disrupted path by special messages
            generated and sent from intermediate routers. In the case of a link
            failure, the TCP sender stops sending segments and freezes its
            retransmission timers. TCP-F stays in this state and remains silent
            until either a "route establishment notification" is received or an
            internal timer expires. In contrast, ELFN periodically probes the
            network to detect connectivity re-establishment. Both proposals
            rely on changes to intermediate routers, whereas the scheme
            proposed in this document is a sender-only modification. Moreover,
            ELFN does not consider congestion and may impose serious additional
            load on the network, depending on the probe interval.</t>

            <t>The authors of ATCP <xref target='LS01' /> propose enhancements
            to identify different types of packet loss by introducing a layer
            between TCP and IP. They utilize ICMP destination unreachable
            messages to set TCP's receiver advertised window to zero, thus
            forcing the TCP sender to perform zero window probing with an
            exponential backoff. ICMP destination unreachable messages that
            arrive during this probing period are ignored. This approach is
            nearly orthogonal to this document, which exploits ICMP messages to
            undo a retransmission timer backoff when TCP is already probing. In
            principle, both mechanisms could be combined. However, due to
            security considerations, it does not seem appropriate to adopt
            ATCP's reaction, as discussed in
            <xref target='discuss_steady-state' />.</t>

            <t>Schuetz et al. <xref target='I-D.schuetz-tcpm-tcp-rlci' />
            describe a set of TCP extensions that improve TCP's behavior when
            transmitting over paths whose characteristics can change rapidly.
            Their proposed extensions modify the local behavior of TCP and
            introduce a new TCP option to signal locally received
            connectivity-change indications (CCIs) to remote peers. Upon
            receipt of a CCI, they re-probe the path characteristics either by
            performing a speculative retransmission or by sending a single
            segment of new data, depending on whether the connection is
            currently stalled in exponential backoff or transmitting in
            steady-state, respectively. The authors focus on specifying TCP
            response mechanisms, nevertheless underlying layers would have to
            be modified to explicitly send CCIs to make these immediate
            responses possible.</t>
        </section>

        <!-- ***** Section: IANA Considerations ***** -->
        <section anchor="iana" title="IANA Considerations">
            <t>This memo includes no request to IANA.</t>
        </section>

        <!-- ***** Section: Security Considerations ***** -->
        <section anchor="security" title="Security Considerations">
            <t>Generally, an attacker has only two attack alternatives:
            to generate ICMP unreachable messages to try to
            make a TCP modified with TCP-LCD to flood the network,
            or to suppress legitimate ICMP unreachable messages to try
            to slow down the transmission rate of a TCP sender.</t>

            <t>In order to generate ICMP unreachable messages that fit as an
            input for TCP-LCD, an attacker would need to guess the correct
            four-tuple (i.e., Source IP Address, Source TCP port, Destination
            IP Address, and Destination TCP port) and the exact segment
            sequence number of the current timeout-based retransmission. Yet,
            the correct sequence number is generally hard to guess as; with a
            probability of 1/2^32.  Even if an attacker has information about
            that sequence number (i.e., the attacker can eavesdrop on the
            retransmissions) the impact on the network load the attacker may be
            considered low, since the retransmission frequency is limited by
            the RTO that was computed before TCP had entered the timeout-based
            loss recovery. Hence, the highest probing frequency is expected to
            be even lower than once per minimum RTO, i.e., 1s as specified by
            <xref target='RFC2988'/>. It is important to note, that an
            attacker, who can correctly guess the four-tuple and the segment
            sequence number, can easily launch more serious attacks (i.e.,
            hijack the connection), whether or not TCP-LCD is used.</t>

            <t>There may be means by which an attacker can cause the
            suppression of legitimate ICMP unreachable messages (e.g., by
            flooding the router experiencing the link outage to trigger ICMP
            rate-limiting). However, even if the attacker could suppress every
            legitimate ICMP unreachable message, the security impact of
            such an attack is negligible, since the TCP sender using TCP-LCD
            will behave like a regular TCP would. Note
            that this kind of attack is indistinguishable from
            a router experiencing a link outage is not sending ICMP unreachable
            messages at all (e.g., because of local policy).</t>

            <t>In summary, the algorithm proposed in this document is
            considered to be secure.</t>
        </section>

        <!-- ***** Section: Acknowledgments ***** -->
        <section anchor="acks" title="Acknowledgments">
            <t>We would like to thank Lars Eggert, Adrian Farrel, Mark Handley,
            Kai Jakobs, Ilpo Jarvinen, Enrico Marocco, Catherine Meadows,
            Jürgen Quittek, Pasi Sarolahti, Tim Shepard, Joe Touch and Carsten
            Wolff for feedback on earlier versions of this document. We also
            thank Michael Faber, Daniel Schaffrath, and Damian Lukowski for
            implementing and testing the algorithm in Linux. Special thanks go
            to Ilpo Jarvinen for giving valuable feedback regarding the Linux
            implementation.</t>

            <t>This work has been supported by the German National Science
            Foundation (DFG) within the research excellence cluster Ultra
            High-Speed Mobile Information and Communication (UMIC), RWTH Aachen
            University.</t>
        </section>

    </middle>

    <!--  ***** BACK MATTER ***** -->
    <back>

        <!-- There are 2 ways to insert reference entries from the citation
             libraries:
             1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
             2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
             (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

             Both are cited textually in the same manner: by using xref elements.
             If you use the PI option, xml2rfc will, by default, try to find
             included files in the same directory as the including file. You can
             also define the XML_LIBRARY environment variable with a value
             containing a set of directories to search. These can be either in
             the local filing system or remote ones accessed by
             http (http://domain/dir/... ).-->

        <!-- References split into informative and normative -->
        <references title="Normative References">
            &rfc0792;

            &rfc0793;

            &rfc1812;

            &rfc1323;

            &rfc2988;

            &rfc4443;

            &rfc5681;
        </references>

        <references title="Informative References">
            &rfc0791;

            &rfc0826;

            &rfc1122;

            &rfc2003;

            &rfc2119;

            &rfc2460;

            &rfc2784;

            &rfc3522;

            &rfc3168;

            &rfc3782;

            &rfc3819;

            &rfc4015;

            &rfc4301;

            &rfc5461;

            &rfc5682;

            &rfc5927;

            &retransmit-now;

            &tcp-rlci;

            <reference anchor="SESB05" target="">
                <front>
                    <title>Protocol enhancements for intermittently connected
                        hosts
                    </title>
                    <author surname="Schuetz" initials="S."
                        fullname="Simon Schuetz"> <organization />
                    </author>
                    <author surname="Eggert" initials="L."
                        fullname="Lars Eggert"> <organization />
                    </author>
                    <author surname="Schmid" initials="S."
                        fullname="Stefan Schmid"> <organization />
                    </author>
                    <author surname="Brunner" initials="M."
                        fullname="Marcus Brunner"> <organization />
                    </author>
                    <date year="2005" month="December" />
                </front>
                <seriesInfo name="SIGCOMM Computer Communication Review"
                    value="vol. 35, no. 3, pp. 5-18" />
            </reference>

            <reference anchor="SM03" target="">
                <front>
                    <title>Link layer-based TCP optimisation for disconnecting
                        networks
                    </title>
                    <author surname="Scott" initials="J."
                        fullname="James Scott"> <organization />
                    </author>
                    <author surname="Mapp" initials="G."
                        fullname="Glenford Mapp"> <organization />
                    </author>
                    <date year="2003" month="October" />
                </front>
                <seriesInfo name="SIGCOMM Computer Communication Review"
                    value="vol. 33, no. 5, pp. 31-42" />
            </reference>

            <reference anchor="CRVP01" target="">
                <front>
                    <title>A feedback-based scheme for improving TCP performance
                        in ad hoc wireless networks
                    </title>
                    <author surname="Chandran" initials="K."
                        fullname="Kartik Chandran"> <organization />
                    </author>
                    <author surname="Raghunathan" initials="S."
                        fullname="Sudarshan Raghunathan"> <organization />
                    </author>
                    <author surname="Venkatesan" initials="S."
                        fullname="Subbarayan Venkatesan"> <organization />
                    </author>
                    <author surname="Prakash" initials="R."
                        fullname="Ravi Prakash"> <organization />
                    </author>
                    <date year="2001" month="February"/>
                </front>
                <seriesInfo name="IEEE Personal Communications"
                    value="vol. 8, no. 1, pp. 34-39" />
            </reference>

            <reference anchor="HV02" target="">
                <front>
                    <title>Analysis of TCP performance over mobile ad hoc
                        networks
                    </title>
                    <author surname="Holland" initials="G."
                        fullname="Gavin Holland"> <organization />
                    </author>
                    <author surname="Vaidya" initials="N."
                        fullname="Nitin Vaidya"> <organization />
                    </author>
                    <date year="2002" month="March" />
                </front>
                <seriesInfo name="Wireless Networks"
                    value="vol. 8, no. 2-3, pp. 275-288" />
            </reference>

            <reference anchor="LS01" target="">
                <front>
                    <title>ATCP: TCP for mobile ad hoc networks</title>
                    <author surname="Liu" initials="J."
                        fullname="Jian Liu"> <organization />
                    </author>
                    <author surname="Singh" initials="S."
                        fullname="Suresh Singh"> <organization />
                    </author>
                    <date year="July" month="2001"/>
                </front>
                <seriesInfo name="IEEE Journal on Selected Areas in
                    Communications" value="vol. 19, no. 7, pp. 1300-1315" />
            </reference>

            <reference anchor="Zh86" target="">
                <front>
                    <title>Why TCP Timers Don't Work Well</title>
                    <author surname="Zhang" initials="L."
                        fullname="Lixia Zhang"> <organization />
                    </author>
                    <date year="1986" month="August"/>
                </front>
                <seriesInfo name="Proceedings of the Conference on Applications,
                    Technologies, Architectures, and Protocols for Computer
                    Communication (SIGCOMM'86)" value="pp. 397-405" />
            </reference>

            <reference anchor="ZimHan09" target="http://www.ietf.org/proceedings/75/slides/tcpm-0.pdf">
                <front>
                    <title>Make TCP more Robust to Long Connectivity Disruptions</title>
                    <author surname="Zimmermann" initials="A."
                        fullname="Alexander Zimmermann">
                        <organization>RWTH Aachen University</organization>
                    </author>
                    <date year="2009" month="July"/>
                </front>
                <seriesInfo name="Proceedings of the 75th IETF Meeting" value="slides" />
                <format type='PDF' octets='373973'
                    target='http://www.ietf.org/proceedings/75/slides/tcpm-0.pdf' />
            </reference>

            <reference anchor="KP87" target="">
                <front>
                    <title>Improving Round-Trip Time Estimates in Reliable
                        Transport Protocols
                    </title>
                    <author surname="Karn" initials="P."
                        fullname="Phil Karn"> <organization />
                    </author>
                    <author surname="Partridge" initials="C."
                        fullname="Craig Partridge"> <organization />
                    </author>
                    <date year="1987" month="August"/>
                </front>
                <seriesInfo name="Proceedings of the Conference on Applications,
                    Technologies, Architectures, and Protocols for Computer
                    Communication (SIGCOMM'87)" value="pp. 2-7" />
            </reference>

<!--
            <reference anchor="ZSH08" target="">
                <front>
                    <title>Improving TCP's Robustness to Long Connectivity
                        Disruptions</title>
                    <author surname="Zimmermann" initials="A."
                        fullname="Alexander Zimmermann"> <organization />
                    </author>
                    <author surname="Schaffrath" initials="D."
                        fullname="Daniel Schaffrath"> <organization />
                    </author>
                    <author surname="Hannemann" initials="A."
                        fullname="Arnd Hannemann"> <organization />
                    </author>
                    <date year="2008" month="November"/>
                </front>
                <seriesInfo name="Proceedings of the 20th IEEE Global
                    Communications Conference (GLOBECOM'08)"
                    value="pp. 5064-5069" />
            </reference>
-->
        </references>

        <!-- ***** Section: Changes from previous versions of the draft ***** -->
        <section anchor="changes" title="Changes from previous versions of the draft">
            <t>This appendix should be removed by the RFC Editor before
            publishing this document as an RFC.</t>

            <section
                anchor="changes_06" title="Changes from
                draft-ietf-tcpm-tcp-lcd-02">
                <t>
                    <list style="symbols">
                        <t>Incorporated feedback submitted by Enrico Marocco (Gen-ART Review)</t>
                        <t>Incorporated feedback submitted by Jürgen Quittek (OpsDir Review)</t>
                        <t>Incorporated feedback submitted by Catherine Meadows (SecDir Review)</t>
                        <t>Incorporated feedback submitted by Adrian Farrel (IESG Review)</t>
                    </list>
                </t>
            </section>

            <section
                anchor="changes_05" title="Changes from
                draft-ietf-tcpm-tcp-lcd-01">
                <t>
                    <list style="symbols">
                        <t>Incorporated feedback submitted by Lars Eggert (AD Review)</t>
                    </list>
                </t>
            </section>

           <section anchor="changes_04" title="Changes from draft-ietf-tcpm-tcp-lcd-00">
                <t>
                    <list style="symbols">
                        <t>Editorial changes.</t>

                        <t>Clarified TCP-LCD's behaviour during connection
                        establishment (Thanks to Mark Handley).</t>
                    </list>
                </t>
            </section>

            <section anchor="changes_03" title="Changes from draft-zimmermann-tcp-lcd-02">
                <t>
                    <list style="symbols">
                        <t>Incorporated feedback submitted by Ilpo Jarvinen.
                            <eref target="http://www.ietf.org/mail-archive/web/tcpm/current/msg04841.html" />
                        </t>

                        <t>Incorporated feedback submitted by Pasi Sarolahti.
                            <eref target="http://www.ietf.org/mail-archive/web/tcpm/current/msg04870.html" />
                        </t>

                        <t>Incorporated feedback submitted by Joe Touch.
                            <eref target="http://www.ietf.org/mail-archive/web/tcpm/current/msg04895.html" />
                            <eref target="http://www.ietf.org/mail-archive/web/tcpm/current/msg04900.html" />
                        </t>

                        <t>Extended and reorganized the discussion
                        (<xref target='discussion' />):
                            <list style="symbols">
                                <t>Every discussion item got its own title, so
                                that we have a better overview.</t>

                                <t>Extended Retransmission Ambiguity section. Added
                                also some references to the historical
                                retransmission ambiguity problem.</t>

                                <t>Heavily extended discussion about wrapped
                                sequence numbers (see Joe's comments).</t>

                                <t>Described the influence of packet duplication
                                on the algorithm (Thanks to Ilpo).</t>

                                <t>The section "Protecting Against Misbehaving
                                Routers" is not a subsection anymore. Moreover,
                                the section was renamed to "Dissolving Ambiguity
                                Issues" and has now real content.</t>
                            </list>
                        </t>

                        <t>An interoperability issues section
                        (<xref target='interoperability' />) was added. In
                        particular comments to ECN, ICMPv6, and to the two
                        thresholds R1 and R2  of <xref target="RFC1122" />
                        (Section 4.2.3.5) were added.</t>

                        <t>Miscellaneous editorial changes. In particular, the
                        algorithm has a name now: TCP-LCD.</t>
                    </list>
                </t>
            </section>

            <section anchor="changes_02" title="Changes from draft-zimmermann-tcp-lcd-01">
                <t>
                    <list style="symbols">
                        <t>The algorithm in <xref target='alg' /> was
                        slightly changed. Instead of reverting the last
                        retransmission timer backoff by halving the RTO, the
                        RTO is recalculated with help of the "BACKOFF_CNT"
                        variable. This fixes an issue that occurred when the
                        retransmission timer was backed off but bounded by a
                        maximum value. The algorithm in the previous version of
                        the draft, would have "reverted" to half of that
                        maximum value, instead of using the value, before the
                        RTO was doubled (and then bounded).</t>

                        <t>Miscellaneous editorial changes.</t>
                    </list>
                </t>
            </section>

            <section anchor="changes_01" title="Changes from draft-zimmermann-tcp-lcd-00">
                <t>
                    <list style="symbols">
                        <t>Miscellaneous editorial changes in Section
                        <xref target='terminology' format="counter" />,
                        <xref target='intro' format="counter" /> and
                        <xref target='cdi' format="counter" />.</t>

                        <t>The document was restructured in Section
                        <xref target='terminology' format="counter" />,
                        <xref target='intro' format="counter" /> and
                        <xref target='cdi' format="counter" /> for easier
                        reading. The motivation for the algorithm is changed
                        according TCP's problem to disambiguate congestion from
                        non-congestion loss.</t>

                        <t>Added <xref target='alg_idea' />.</t>

                        <t>The algorithm in <xref target='alg' /> was
                        restructured and simplified:
                            <list style="symbols">
                                <t>The special case of the first received ICMP
                                destination unreachable message after an RTO was
                                removed.</t>

                                <t>The "BACKOFF_CNT" variable was introduced so
                                it is no longer possible to perform more reverts
                                than backoffs.</t>
                            </list>
                        </t>

                        <t>The discussion in <xref target='discussion' /> was
                        improved and expanded according to the algorithm
                        changes.</t>
                    </list>
                </t>
            </section>

        </section>

    </back>
</rfc>

PAFTECH AB 2003-20262026-04-21 10:25:16