One document matched: draft-scheffenegger-tcpm-timestamp-negotiation-00.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
    which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "http://xml.resource.org/authoring/rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
    There has to be one entity for each item to be referenced.
    An alternate method (rfc include) is described in the references. -->

<!ENTITY RFC0793 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0793.xml">
<!ENTITY RFC0896 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0896.xml">
<!ENTITY RFC1323 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1323.xml">
<!ENTITY RFC2018 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2018.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC3517 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3517.xml">
<!ENTITY RFC3782 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3782.xml">
<!ENTITY RFC5681 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5681.xml">
<!ENTITY RFC5827 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5827.xml">



<!ENTITY I-D.narten-iana-considerations-rfc2434bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.narten-iana-considerations-rfc2434bis.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
    please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
    (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
    (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->

<?rfc comments="yes" ?>

<rfc category="std" docName="draft-scheffenegger-tcpm-timestamp-negotiation-00" ipr='trust200902'>
 <!-- category values: std, bcp, info, exp, and historic
    ipr values: full3667, noModification3667, noDerivatives3667
    you can add the attributes updates="NNNN" and obsoletes="NNNN"
    they will automatically be output with "(if approved)" -->

 <!-- ***** FRONT MATTER ***** -->

 <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the
        full title is longer than 39 characters -->

    <title abbrev="Timestamp Negotiation">Additional negotiation in the TCP Timestamp Option field
                    during the TCP handshake
    </title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Mirja Kuehlewind" initials="M."
    	surname="Kuehlewind">
    	<organization>University of Stuttgart</organization>
    	<address>
    		<postal>
    			<street>Pfaffenwaldring 47</street>
    			<code>70569</code>
    			<city>Stuttgart</city>
    			<country>Germany</country>
    		</postal>
    		<email>mirja.kuehlewind@ikr.uni-stuttgart.de</email>
    	</address>
    </author>
    
    <author fullname="Richard Scheffenegger" initials="R." role="editor"
           surname="Scheffenegger">
     <organization>NetApp, Inc.</organization>
     <address>
       <postal>
         <street>Am Euro Platz 2</street>
         <code>1120</code>
         <city>Vienna</city>
         <region></region>
         <country>Austria</country>
       </postal>
       <phone>+43 1 3676811 3146</phone>
       <email>rs@netapp.com</email>
     </address>
    </author>
    
    


    <date year="2011" />

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill
        in the current day for you. If only the current year is specified, xml2rfc will fill
    in the current day and month for you. If the year is not the current one, it is
    necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the
    purpose of calculating the expiry date).  With drafts it is normally sufficient to
    specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>Transport</area>

    <workgroup>TCP Maintenance and Minor Extensions (tcpm)</workgroup>

    <!-- WG name at the upperleft corner of the doc,
        IETF is fine for individual submissions.  
    If this element is not present, the default is "Network Working Group",
        which is used by the RFC Editor as a nod to the history of the IETF. -->

    <keyword>Internet-Draft</keyword>
    <keyword>I-D</keyword>

    <!-- Keywords will be incorporated into HTML output
        files in a meta tag but they have no effect on text or nroff
        output. If you submit your draft to the RFC Editor, the
        keywords will be used for the search engine. -->

    <abstract>
     <t>RFC 1323 defines the TSecr field of a SYN 
     packet to be not valid and thus this field will always be zero. This 
     documents specifies the use of this field to signal and negotiate 
     additional information about the content of the TSopt field as well 
     as the behavior of the receiver.  If the receiver understands this 
     extension, it will use the TSecr field of the SYN/ACK to reply. 
     Otherwise the receiver will ignore the TSecr field and set a 
     timestamp in the TSecr field as specified in RFC 1323.</t>
   </abstract>
 </front>

 <middle>
    <section anchor="intro" title="Introduction">
     <t>The TCP Timestamps Option (TSopt) provides timestamp echoing for
     Round-trip Time (RTT) measurments.  TSopt is widely deployed and
     activated by default in many systems.  RFC 1323 <xref target="RFC1323"/> specifies
     TSopt the following way:</t>

<figure title="RFC1323 TSopt" align="center">
<artwork align="center"><![CDATA[
   Kind: 8

   Length: 10 bytes

   +-------+-------+---------------------+---------------------+
   |Kind=8 |  10   |   TS Value (TSval)  |TS Echo Reply (TSecr)|
   +-------+-------+---------------------+---------------------+
       1       1              4                     4
]]></artwork></figure>
   <t>
   <list style="empty">
     <t>"The Timestamps option carries two four-byte timestamp fields.  The
   Timestamp Value field (TSval) contains the current value of the
   timestamp clock of the TCP sending the option.</t>

   <t>The Timestamp Echo Reply field (TSecr) is only valid if the ACK bit
   is set in the TCP header; if it is valid, it echos a timestamp
   value that was sent by the remote TCP in the TSval field of a
   Timestamps option.  When TSecr is not valid, its value must be zero.
   The TSecr value will generally be from the most recent Timestamp
   option that was received; however, there are exceptions that are
   explained below.</t>

   <t>A TCP may send the Timestamps option (TSopt) in an initial SYN
   segment (i.e., segment containing a SYN bit and no ACK bit), and may
   send a TSopt in other segments only if it received a TSopt in the
   initial SYN segment for the connection."</t>
   </list>
   </t>

     <t>The comparison of the timestamp in the TSecr field to the current
   time gives an estimation of the RTT.  RFC 1323 <xref target="RFC1323"/> specifies
   various cases when more than one timestamp is available to echo.  The
   proposed solution might not always be the best choice, e.g. when the
   TCP Selective Acknowledgment Option (SACK) is used.  Moreover, more
   and more use cases arise where one-way delay (OWD) measurements are
   needed.  These mechanism misuse usually the TSopt to estimated the
   variation in OWD.  To enable such mechanisms the TSecr field in the
   TCP SYN packet could be used for additional negotiation.</t>

     	
    <section title="Requirements Language">
     <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
     "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
     document are to be interpreted as described in <xref
     target="RFC2119"/>.</t>
     <t><vspace blankLines='100' /></t>
    </section>
    
    </section>
   
    <section title="Overview">
     <t></t>
    </section>
    
    <section title="Definitions">
     <t>The reader is expected to be familiar with the definitions given in 
     <xref target="RFC1323"/>.</t>
    </section>
    
    <section title="Signaling">
     <t>During the initial TCP three-way handshake, timestamp options are negotiated
     using the TSecr field. A compliant TCP receiver will XOR the flags with the received
     TSval, when responding with the SYN+ACK. Timestamp Options MAY only be present when
     the SYN bit is set.</t>
    
     <section title="Capability Flags">
     <t>In order to signal the supported capabilities, the TSecr is overloaded with the
     following flags and fields during the three-way handshake. If optional capabilities
     such as tcp clock range are presented, minimal state will be required in the 
     host to decode the returned Flags xor'ed with the TSval.</t>

<figure title="timestamp option flags" align="center">
<artwork align="center"><![CDATA[
   Kind: 8

   Length: 10 bytes

   +-------+-------+---------------------+---------------------+
   |Kind=8 |  10   |   TS Value (TSval)  |TS Echo Reply (TSecr)|
   +-------+-------+---------------------+---------------------+
       1       1              4          |           4         |
                                        /                      |
   .-----------------------------------´                       |
  /                                                             \
 |                                                               |
 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 |E|R|R|B|M|     | EXP12hi |  FRAC12hi   | EXP12lo |  FRAC12lo   |
 |X|E|N|I|I| MSK +-----------------------+-----------------------+                                                
 |O|S|G|A|R|     |      RES      |S|  EXP16  |      FRAC16       |
 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
]]></artwork></figure>

    <t><list style="hanging" hangIndent="4">
    <t hangText="EXO - Extended Options"><vspace />
    Indicated that the sender supports extended
    timestamp options as defined by this document, and MUST be set ("1") by compliant
    implementations.</t>
    <t hangText="RES - Reserved"><vspace />
    Reserved for future use. MUST not be set ("0"). If
    a timestamp option is received with this bit set, the receiver MUST ignore the
    extended options field and react as if the Flags were not set (compatibility mode).</t>
    <t hangText="RNG - Range negotiation"><vspace />
    Indicated that the sender is capable of adjusting
    the timestamp clock rate within the bounds of the two 12 bit fields (see <!-- xref target="12bit"/-->).
    Only the active sender of a TCP session is allowed to offer a range, while the receiver
    MAY choose a rate within these bounds.</t>
    <t hangText="BIA - Exponent Bias"><vspace />
    When set, the 16 and 12 bit floating point exponents
    are presented with a bias of 21 instead of 15. This allows negotiation of extremely
    fine-grained timestamp clock resolutions, for example in hardware implementations and
    high speed (>10 Gigabit/s) environments. See section <!-- xref target="float"/--> for more details.</t>
    <t hangText="MIR - Always Mirror Timestamp"><vspace />
    To disambiguate segements and aid timing
    calculations even during loss episodes, the timestamp will always be mirrored regardless
    of the state of the receiver. A sender SHOULD use this option only in conjunction with
    Selective Acknowledgements (SACK <xref target="RFC2018"/>).</t>
    <t hangText="MSK - Mask Timestamps"><vspace />
    If the timestamp is used for congestion control
    purposes, an incentive exists for malicious receivers to reflect tampered timestamps.
    A sender MAY choose to protect timestamps from such modifications by including a fingerprint
    (secure hash of some kind) in some of the least significant bits. However, doing so
    would prevent a receiver from using the timestamp for other purposes. The MASK field
    indicates how many least significant nibbles should be excluded by the receiver, when
    processing a timestamp. Note that this does not impact the reflected timestamp in
    any way - TSecr will always be equal to a appropriate TSval. Another use case would be
    when the sender does not support a timestamp clock which can guarantee unique timestamps
    for retransmitted segments. For unambigously identifying regular from retransmitted
    segments, the timestamp must be unique for otherwise identical segments. Reserving
    the lowest nibble for this purpose allows senders with slow running timestamp clocks
    to make use of this feature.</t>
    <t hangText="S - binary16 Sign"><vspace />
    This is the sign bit of the IEEE 754-2008 binary16 floating
    point representation of the timestamp clock. Timestamp clocks MUST be positive, thus this
    bit MUST be zero.</t>
    <t hangText="EXP16 - binary16 Exponent"><vspace />
    The exponent component of a binary16 floating point
    number indicating the timestamp clock. When BIA is not set, the exponent bias is 15 (identical
    to the binary16 definition in IEEE 754-2008). If OFF is set, the exponent bias is 21, allowing
    faster timestamp clock rates. Subnormal numbers (lower precision), where the exponent is zero,
    extend the range to 2^-24 and 2^-30 respectively. Infinity and NaN (all exponent bits set) MUST
    NOT be invalid, and a timestamp option with NaN/Infinity SHOULD be ignored.</t> 
    <t hangText="FRAC16 - binary16 Fraction"><vspace />
    The fraction component of a binary16 floating point
    number indicating the timestamp clock. The clock rate is measured in seconds between ticks. 
    The least significant bit corresponds therefore to a time interval of 59.6 ns with the default 
    bias of 15, and 0.931 ns with bias set to 21. The longest time interval would be 65504 sec 
    with default bias, and 511.75 sec with bias set to 21.</t>
    <t hangText="EXP12hi">and</t>
    <t hangText="EXP12lo - binary12 Exponent"><vspace />
    The exponent component of a truncated, 12 bit floating point number
    indicating the possible timestamp clock ranges. Only the host initiating a TCP session MAY
    offer a timestamp clock range, while the receiver SHOULD select a timestamp clock within these
    bounds. If the receiver can not adjust it's timestamp clock to match the range, it MAY use
    a timestamp clock rate outside these bounds. If the receiver indicated a timestamp clock
    rate within the indicated bounds, the sender MUST set it's timestamp clock rate to the 
    negotiated rate. If the receiver uses a timestamp clock rate outside the indicated bounds,
    it MUST NOT use timestamps where knowledge of the timestamp clock rate is required (ie. 
    congesion control). The exponent bias is 15 when BIA is not set, and 21 otherwise.</t>
    <t hangText="FRAC12hi">and</t>
    <t hangText="FRAC12lo - binary12 Fraction"><vspace />
    The fraction component of a 12 bit floating point number. Subnormal
    numbers are allowed, while Inifinity/NaN MUST NOT be used. Timestamp options with Infinity/NaN 
    values SHOULD be ignored. The smallest representable value is 238 ns with default bias, and 
    3.73 ns with bias set to 21, while the largest values would be virtually identical to the 
    16 bit floating point values (65024 and 508 sec). </t>
    </list>
    </t>
    </section>
    
    </section>
     
    <section title="Discussion">
     <t>One-way delay (variation) based congestion controls would benefit from knowing 
     the clock resolution on both sides.</t> 
     <t>RTT variance during loss episodes is not deeply researched. Current heuristics 
     (RFC1122, RFC1323, Karn's algorithm, RFC2988) explicitly exclude (and prevent) the use of 
     RTT samples when loss occurs. However, solving the retransmission ambiguity problem -
     and the related reliable ACK delivery problem - may allow the refinement of these 
     algorithms further, as well as enabling new research to distinguish between corruption 
     loss (without RTT / one-way delay impact) and congestion loss (with RTT / one-way delay 
     impact). Research into this field appears to be a rather neglected, especially when it 
     comes to large scale, public internet investigations. Due to the very nature of this, 
     passive investigations without signals contained within the headers are only of limited 
     use in empirical research.</t>
     <t>Retransmission ambiguity detection during loss recovery would allow an additional 
     level of loss recovery control without reverting to timer-based methods. As with the 
     deployment of SACK, separating "what" to send from "when" to send it could be driven 
     one step further. In particular, less conservative loss recovery schemes which do not 
     trade principles of packet conservation against timeliness, require a reliable way of 
     prompt and best possible feedback from the receiver about any delivered segment and 
     their ordering. SACK alone goes quite a long way, but using Timestamp information in 
     addition could remove any ambiguity. However, the current specs in RFC1323 make that 
     use impossible, thus a modified signaling (receiver behavior) is a necessity.</t>
    
     
    </section>
   
    <section anchor="Acknowledgements" title="Acknowledgements">
     
     <t>The authors would like to thank Dragana Damjanovic for some initial 
     	thoughts around Timestamps and their extended potential use.</t>
     
    </section>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
     <t>This memo includes no request to IANA.</t>
    </section>

    <section anchor="Security" title="Security Considerations">
     <t>The algorithm presented in this paper shares security considerations
     with <xref target="RFC1323"/>.</t>
     <t>Some implementations address the vulerabilities of <xref target="RFC1323"/>, 
     by dedicating a few low-order bits of the timestamp fields for use with 
     a (secure) hash, that protects against malicious tweaking of TSecr values. A 
     Flag-field has been provided to transparently notify the receiver about that use
     of low-order bits, so that they can be excluded in one-way delay calculations.</t>
    </section>

 </middle>

 <!--  *****BACK MATTER ***** -->

 <back>
    <!-- References split into informative and normative -->

    <!-- There are 2 ways to insert reference entries from the citation libraries:
    1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
    2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
       (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

    Both are cited textually in the same manner: by using xref elements.
    If you use the PI option, xml2rfc will, by default, try to find included files in the same
    directory as the including file. You can also define the XML_LIBRARY environment variable
    with a value containing a set of directories to search.  These can be either in the local
    filing system or remote ones accessed by http (http://domain/dir/... ).-->

    <references title="Normative References">
     <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->
     
     
     &RFC1323;
     
     &RFC2018;

     &RFC2119;
          
<!--    
     <reference anchor="min_ref">
     
       <front>
         <title>Minimal Reference</title>

         <author initials="authInitials" surname="authSurName">
           <organization></organization>
         </author>

         <date year="2006" />
       </front>
     </reference>
-->
    </references>

    <references title="Informative References">
     <!-- Here we use entities that we defined at the beginning. -->
     
<?rfc include="reference.I-D.ietf-tcpm-tcp-security.xml"?>

     
 <!-- 
     <reference anchor="sack-recovery-entry"
                target="http://tools.ietf.org/html/draft-ietf-tcpm-sack-recovery-entry-01">
       <front>
         <title>Using TCP Selective Acknowledgement (SACK) Information 
         to Determine Duplicate Acknowledgements for Loss Recovery Initiation</title>
         
         <author initials="I." surname="Jarvinen">
           <organization>University of Helsinki</organization>
         </author>
         <author initials="M." surname="Kojo">
           <organization>University of Helsinki</organization>
         </author>
         <date month = "Mar" year = "2010"/>
       </front>
     </reference>
-->
	<reference anchor="Chirp"
		target="http://bobbriscoe.net/projects/netsvc_i-f/chirp_pfldnet10.pdf">
		<front>
			<title>Chirping for Congestion Control - 
				Implementation Feasibility</title>
				
			<author initials="M." surname="Kuehlewind">
				<organization>University of Stuttgart</organization>
			</author>
			<author initials="B." surname="Briscoe">
				<organization>British Telekom</organization>
			</author>
			<date month="Nov" year="2010"/>
		</front>
	</reference>
     
          
    </references>

	
 </back>
</rfc>


PAFTECH AB 2003-20262026-04-23 08:27:35