One document matched: draft-ietf-p2psip-diagnostics-01.xml


<?xml version="1.0" encoding="US-ASCII"?>
<!-- edited with XMLSPY v5 rel. 3 U (http://www.xmlspy.com)

     by Daniel M Kohn (private) -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC3261 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3261.xml">
<!ENTITY RFC0792 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0792.xml">
<!ENTITY RFC4330 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4330.xml">
<!ENTITY RFC4981 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4981.xml">
<!ENTITY RFC5226 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5226.xml">
<!ENTITY I-D.ietf-p2psip-sip SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-p2psip-sip.xml">
<!ENTITY I-D.ietf-p2psip-base SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-p2psip-base.xml">
<!ENTITY I-D.song-p2psip-security-eval SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.song-p2psip-security-eval.xml">
<!ENTITY I-D.bryan-p2psip-app-scenarios SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.bryan-p2psip-app-scenarios.xml">
<!ENTITY I-D.bryan-p2psip-requirements SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.bryan-p2psip-requirements.xml">
<!ENTITY I-D.zheng-p2psip-diagnose SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.zheng-p2psip-diagnose.xml">
<!ENTITY I-D.matuszewski-p2psip-security-requirements SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.matuszewski-p2psip-security-requirements.xml">
<!ENTITY I-D.baset-p2psip-p2pp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.baset-p2psip-p2pp.xml">
<!ENTITY I-D.ietf-mmusic-ice SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mmusic-ice.xml">
<!ENTITY I-D.ietf-behave-rfc3489bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-behave-rfc3489bis.xml">
<!ENTITY I-D.ietf-p2psip-concepts SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-p2psip-concepts.xml">
]>
<rfc category="std" docName="draft-ietf-p2psip-diagnostics-01"
     ipr="trust200902" submissionType="IETF" updates="" xml:lang="">
  <?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>

  <?rfc toc="yes" ?>

  <?rfc symrefs="yes" ?>

  <?rfc sortrefs="no"?>

  <?rfc iprnotified="no" ?>

  <?rfc strict="no" ?>

  <?rfc compact="yes"?>

  <?rfc subcompact="no"?>

  <front>
    <title abbrev="P2PSIP Overlay Diagnostics">P2PSIP Overlay
    Diagnostics</title>

    <author fullname="Song Haibin" initials="H." surname="Song">
      <organization>Huawei</organization>

      <address>
        <postal>
          <street>Baixia Road No. 91</street>

          <city>Nanjing</city>

          <region>Jiangsu Province</region>

          <code>210001</code>

          <country>P.R.China</country>
        </postal>

        <phone>+86-25-84565867</phone>

        <facsimile>+86-25-84565888</facsimile>

        <email>melodysong@huawei.com</email>
      </address>
    </author>

    <author fullname="Jiang Xingfeng" initials="X." surname="Jiang">
      <organization>Huawei</organization>

      <address>
        <postal>
          <street>Baixia Road No. 91</street>

          <city>Nanjing</city>

          <region>Jiangsu Province</region>

          <code>210001</code>

          <country>P.R.China</country>
        </postal>

        <phone>+86-25-84565868</phone>

        <facsimile>+86-25-84565888</facsimile>

        <email>jiang.x.f@huawei.com</email>
      </address>
    </author>

    <author fullname="Roni Even" initials="R" surname="Even">
      <organization>Huawei</organization>

      <address>
        <postal>
          <street>14 David Hamelech</street>

          <city>Tel Aviv 64953</city>

          <country>Israel</country>
        </postal>

        <email>even.roni@huawei.com</email>
      </address>
    </author>

    <author fullname="David A. Bryan" initials="D" surname="Bryan">
      <organization>Cogent Force, LLC</organization>

      <address>
        <postal>
          <street>Williamsburg, Virginia</street>

          <country>United States of America</country>
        </postal>

        <email>dbryan@ethernot.org</email>
      </address>
    </author>

    <date day="30" month="June" year="2009" />

    <area>Real-time Applications and Infrastructure</area>

    <workgroup>P2PSIP Working Group</workgroup>

    <keyword>Diagnostics</keyword>

    <keyword>P2PSIP</keyword>

    <abstract>
      <t>This document describes mechanisms for P2PSIP diagnostics. It
      describes the usage scenarios and defines several simple methods for
      performing diagnostics in P2PSIP overlay networks. It also describes the
      diagnostic information which is useful for the connection and node
      status monitoring. The methods and message formats are specified as
      extensions to P2PSIP base protocol RELOAD.</t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>In the last few years, overlay networks have rapidly evolved and
      emerged as a promising platform to deploy new applications and services
      in the Internet. One of the reasons overlay networks are seen as an
      excellent platform for large scale distributed systems is their
      resilience in the presence of failures. This resilience has three
      aspects: data replication, routing recovery, and static resilience.
      Routing recovery algorithms are used to repopulate the routing table
      with live nodes when failures are detected. Static resilience measures
      the extent to which an overlay can route around failures even before the
      recovery algorithm repairs the routing table. Both routing recovery and
      static resilience relies on accurate and timely detection of
      failures.</t>

      <t>As described in <xref
      target="I-D.matuszewski-p2psip-security-requirements">"Security
      requirements in P2PSIP"</xref>, there are a number of situations in
      which some peers in a P2PSIP overlay may malfunction or behave badly.
      For example, these peers may be disabled peers, congested peers or peers
      misrouting messages, and the impact of those peers on the overlay
      network may be a degradation of quality of service provided collectively
      by the peers in the overlay network or an interruption of those
      services. It is desirable to identify malfunctioning or badly behaving
      peers through diagnostic tools, and exclude or reject them from the
      P2PSIP system. Besides those faults, node failures may be caused by
      underlying failures, for example, the recovery from an incorrect overlay
      topology may be slow when the IP layer routing failover speed after link
      failures is very slow. Moreover, if a backbone link fails and the
      failover is slow, the network may be partitioned, leading to partitions
      of overlay topologies and inconsistent routing results between different
      partitioned components.</t>

      <t>Some keep-alive algorithms based on periodic probe and acknowledge
      mechanisms enable accurate and timely detection of failures of one
      peer's neighbors <xref target="Overlay-Failure-Detection"></xref>, but
      these algorithms by themselves can only detect the disabled neighbors
      using the periodic method, it may not be enough for service providers
      operating the overlay network.</t>

      <t>A single, general P2PSIP overlay diagnostic framework supporting
      periodic and on-demand methods for detecting node failures and network
      failures is desirable. This document describes a general P2PSIP overlay
      diagnostic extension to the P2PSIP base protocol and it is a good
      compliment to keep-alive algorithms in the P2P or P2PSIP overlay
      itself.</t>
    </section>

    <section title="Terminology" toc="default">
      <t>The concepts used in this document are compatible with <xref
      target="I-D.ietf-p2psip-concepts">"Concepts and Terminology for Peer to
      Peer SIP"</xref> and the <xref target="I-D.ietf-p2psip-base">P2PSIP base
      protocol RELOAD</xref>.</t>

      <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
      "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
      document are to be interpreted as described in <xref
      target="RFC2119"></xref>.</t>
    </section>

    <section title="Diagnostic Scenarios">
      <t>P2P systems are self-organizing and ideally require no network
      management in the traditional sense to set up and to configure
      individual P2P nodes. However, P2P service providers may contemplate
      usage scenarios where some monitoring and diagnostics are required. We
      present a simple connectivity test and some useful diagnostic
      information that may be used in such diagnostics.</t>

      <t>The common usage scenarios for P2P diagnostics can be broadly
      categorized in three classes:</t>

      <t>a. Automatic diagnostics built into the P2P overlay routing protocol.
      Nodes perform periodic checks of known neighbors and remove those nodes
      from the routing tables that fail to respond to connectivity checks
      <xref target="Handling_Churn_in_a_DHT"></xref>. However, the
      unresponsive nodes may only be temporarily disabled due to some local
      cryptographic processing overload, disk processing overload or link
      overload. It is therefore useful to repeat the connectivity checks to
      see if such nodes have recovered and can be again placed in the routing
      tables. This process is known as 'failed node recovery' and can be
      optimized as described in the paper <xref
      target="Handling_Churn_in_a_DHT">"Handling Churn in a DHT"</xref>.</t>

      <t>b. P2P system diagnostics to check the overall health of the P2P
      overlay network, the consumption of network bandwidth, for the presence
      of problem links and also to check for abusive or malicious nodes. This
      is not a trivial problem and has been studied in detail for content and
      streaming P2P overlays <xref target="Diagnostic_Framework"></xref> as
      well as in earlier P2PSIP documents <xref
      target="Diagnostics_and_NAT_traversal_in_P2PP"></xref>.</t>

      <t>c. Diagnostics for a particular node to follow up an individual user
      complaint. In this case a technical support person may use a desktop
      sharing application with the permission of the user to determine
      remotely the health and possible problems with the malfunctioning node.
      Part of the remote diagnostics may consist of simple connectivity tests
      with other nodes in the P2PSIP overlay and retrieval statistics of nodes
      from the overlay . The simple connectivity tests are not dependent on
      the type of P2PSIP overlay. Note that other tests may be required as
      well, such as checking the health and performance of the user's computer
      or mobile device and also checking the bandwidth of the link connecting
      the user to the Internet.</t>
    </section>

    <section title="Overview of operations" toc="default">
      <t>The diagnostic mechanisms described in this document are mainly
      intended to detect and localize failures or monitor performance in
      P2PSIP overlay networks. It provides mechanisms to detect and localize
      malfunctioning or badly behaving peers including disabled peers,
      congested peers and misrouting peers. It provides a mechanism to detect
      direct connectivity or connectivity to a specified peer, a mechanism to
      detect the availability of specified resource records and a mechanism to
      discover P2PSIP overlay topology and the underlay topology failures.</t>

      <t>The P2PSIP diagnostics extensions define Inspect and Path_Track
      methods for connection quality check and retrieval of diagnostic
      information, and the Error response to these methods. Essentially it
      reuses P2PSIP base protocol specification and extends them to introduce
      the new diagnostics methods. The extensions strictly follow the P2PSIP
      base protocol specification on the messages routing, transporting and
      NAT traversal etc. The diagnostic methods are however P2PSIP protocol
      independent.</t>

      <t>This document mainly describes how to detect and localize failures
      including disabled peers, congested peers, misrouting behaviors and
      underlying network faults in P2PSIP overlay networks through a simple
      and efficient mechanism. This mechanism is modeled after the
      ping/traceroute paradigm: ping (RFC792 <xref target="RFC0792">ICMP echo
      request </xref>) is used for connectivity checks, and traceroute is used
      for hop-by-hop fault localization as well as path tracing. This document
      specifies a "ping" mode (by defining the Inspect method) and a
      "traceroute" mode (by defining the Path_Track method) for diagnosing
      P2PSIP overlay networks.</t>

      <t>We define a simple Path_Track method for retrieving diagnostics
      information iteratively. First, the initiating node asks its neighbor A
      which is the next hop node to the destination ID, and then retrieve the
      next hop node B information, along with optional diagnostic information
      of A, to the initiator node. Then the initiator node asks the next hop
      node B(directly or symmetric routing) to get the further next hop node C
      information and diagnostic information of B. This step can be iterative
      until the request reaches responsible node D for the destination ID, and
      retrieve diagnostic information of node D, or terminates by some
      failures that prevent the process.</t>

      <t>One approach these tools can be used is to detect the connectivity to
      the specified peer or the availability of the specified resource-record
      through P2PSIP Inspect operation once the overlay network receives some
      alarms about overlay service degradation or interruption, if the Inspect
      fails, one can then send a P2PSIP Path_Track to determine where the
      fault lies.</t>

      <t>The authors earlier considered an approach where a response was
      generated by each intermediate peer as the message traversed the
      overlay, but this approach was discarded as a result of working group
      discussion. One reason this approach was discarded was that it could
      provide a DoS mechanism, whereby an attacker could send an arbitrary
      message claiming to be from a spoofed “sender” the real
      sender wished to attack. As a result of sending this one message, many
      messages would be generated and sent back to the spoofed
      “sender” -- one from each intermediate peer on the message
      path. While authentication mechanisms could reduce some of the risk of
      this attack, it still resulted in a fundamental break from the
      request-response nature of the RELOAD protocol, as multiple responses
      are generated to a single request. Although one request with responses
      from all the peers in the route will be more efficient.</t>

      <t>The diagnostic information MUST be only provided to authorized peers.
      Some diagnostic information can be authorized to all the participants in
      the P2PSIP overlay, and some other diagnostic information can only be
      provided to the authorization peer list of each diagnostic information
      according to the local or overlay policy. The authorization mainly
      depends on the kinds of the diagnostic information and the
      administrative considerations.</t>

      <section title="Inspect: “Ping” behavior">
        <t>To provide “ping” like behavior, an Inspect request
        message is forwarded by the intermediate peers along the path and then
        terminated by the responsible peer, and after optional local
        diagnostics, the responsible peer returns an Inspect response message.
        If an error is found when routing, an Error response is sent to the
        initiator node by the intermediate peer.</t>
      </section>

      <section title="Path_Track: “Traceroute” behavior">
        <t>A simple Path_Track method is used for retrieving diagnostics
        information iteratively. First, the initiating node asks its neighbor
        A which is the next hop node to the destination ID, and then retrieve
        the next hop node B information, along with optional diagnostic
        information of A, to the initiator node. Then the initiator node asks
        the next hop node B (directly or through symmetric routing) to get the
        next hop node C information and diagnostic information of B. Unless a
        failure prevents the message from being forwarded, this step can be
        iterative until the request reaches the responsible node D for the
        destination ID, and to retrieve diagnostic information for node D.</t>

        <t>One application of these tools is to detect and diagnose the
        connectivity to the specified peer or the availability of the
        specified resource-record through P2PSIP Inspect operation after the
        overlay network receives some alarms about overlay service degradation
        or interruption. If the Inspect fails, one can then send a P2PSIP
        Path_Track to determine where the fault lies.</t>
      </section>

      <section title="Authorization">
        <t>The diagnostic information must be only be provided to authorized
        peers. Some diagnostic information can be authorized to all the
        participants in the P2PSIP overlay, and some other diagnostic
        information can only be provided to the authorization peer list for
        each piece of diagnostic information according to the local or overlay
        policy. The authorization mainly depends on the kinds of the
        diagnostic information and the administrative considerations.</t>
      </section>
    </section>

    <section title="RELOAD diagnostic extensions">
      <t>This document extends the P2PSIP base protocol to carry diagnostics
      information. Considering the special usage of diagnostics, this document
      defines simple new methods: Inspect and Path_Track . Additionally, the
      related Error codes for these methods, and some useful diagnostics
      information are defined. Processing of the messages is discussed.</t>

      <t>As described in the P2PSIP base protocol, each message has three
      parts. This specification is consistent with the format. <figure
          align="left">
          <artwork>
         +-------------------------+ 
         |    Forwarding Header    | 
         +-------------------------+ 
         |    Message Contents     | 
         +-------------------------+ 
         |       Signature         | 
         +-------------------------+ 
</artwork>
        </figure></t>

      <section title="Message Code Extension">
        <t>The mechanism defined in this document follows P2PSIP base protocol
        specification, the new request and response message use the message
        format specified in P2PSIP base protocol messages. Different types of
        messages convey different message contents following the forwarding
        header according to the protocol design. Please refer to <xref
        target="I-D.ietf-p2psip-base">P2PSIP base protocol</xref> for the
        detailed format of forwarding header.</t>

        <t>This document introduces two types of messages and their responses:
        <figure align="left">
            <artwork>
   Name                  Message Code 
   Inspect    request         101 
   Inspect    response        102 
   Path_Track request         103 
   Path_Track response        104 
  </artwork>
          </figure></t>

        <t>The final message code will be assigned by IANA as specified in
        section 13.6 of <xref target="I-D.ietf-p2psip-base"></xref>.</t>
      </section>

      <section title="Message Type Extensions">
        <t>All P2PSIP base protocol requests and responses use the common
        forwarding header followed by the message contents.</t>

        <t>This document defines Inspect and Path_Track methods to detect and
        localize failures in P2PSIP overlay network. The Error Codes to these
        requests are defined in <xref target="sec_err_codes"></xref> of this
        spec.</t>

        <section title="Inspect">
          <t>In P2PSIP base protocol, Ping is used to test connectivity along
          a path. However, connectivity quality can not be measured well
          without some useful information, such as the timestamp and hop
          counter. Here we define a new method Inspect for connectivity
          quality check purposes. <figure align="center">
              <artwork>
  Peer-1              Peer-2               Peer-3             Peer-4
    |                    |                    |                    |
    |(1). InspectReq     |                    |                    |
    |------------------->|(2).InspectReq      |                    |
    |                    |------------------->|(3). InspectReq     |
    |                    |                    |------------------->|
    |                    |                    |                    |
    |                    |                    |<-------------------|
    |                    |<-------------------|(4). InspectAns     |
    |<-------------------|(5). InspectAns     |                    |
    |(6). InspectAns     |                    |                    |
    |                    |                    |                    |

                          Inspect example
</artwork>
            </figure>See below for the Inspect formats. <figure align="left">
              <artwork>
   Inspect Request: 
            struct { 
              uint64 expiration; 
              uint8  underlayTTL; 
              uint64 timestampInitiated; 
            } InspectReq; 
    
   Inspect Response: 
            struct { 
              uint64 expiration; 
              uint8  hopCounter; 
              uint64 timestampReceived; 
            }InspectAns; 
</artwork>
            </figure></t>

          <t>expiration : The time-of-day (in seconds and microseconds,
          according to the receiver's clock) in NTP timestamp format <xref
          target="RFC4330"></xref> when the Inspect request expires. This
          field can be used to mitigate the replay attack to the destination
          peer and overlay network.</t>

          <t>underlayTTL : It indicates the underlay TTL which the
          intermediate peer must adopt when forwarding the diagnostic
          requests, it is specified by the initiator. If the value is 0, then
          the intermediate peer must ignore this field, and use the underlay
          TTL from its local configuration.</t>

          <t>The requirement here for underlayTTL is that one may want to
          limit each hop underlay TTL from the initiator to the destination,
          if the underlay TTL expires somewhere, this may provide a (possibly
          false) indication that the link is not of good quality. </t>

          <t>Note: underlayTTL means IP layer time-to-live. RELOAD does not
          require the intermediate peers to look into the message body. With
          the diagnostics of UnderlayTTL, we need that. OTOH, what about using
          Path_Track to gather underlay hops?</t>

          <t>timestampInitiated : The time-of-day (in seconds and
          microseconds, according to the sender's clock) in NTP timestamp
          format <xref target="RFC4330"></xref> when the P2PSIP Overlay
          diagnostic request is sent.</t>

          <t>timestampReceived : The time-of-day (in seconds and microseconds,
          according to the receiver's clock) in NTP timestamp format <xref
          target="RFC4330"></xref> when the P2PSIP Overlay diagnostic request
          was received.</t>

          <t>hopCounter : This field only appears in diagnostic responses. It
          must be exactly copied from the TTL field of the forwarding header
          in the received request. This information is sent back to the
          request initiator, allowing it to compute the hops that the message
          traversed in the overlay.</t>
        </section>

        <section anchor="Path_Track" title="Path_Track">
          <t>This document defines a simple Path_Track method to retrieve the
          diagnostic information from the intermediate peers along the routing
          path. At each step of the Path_Track request, the responsible peer
          responds to the initiator node with its status information like
          congestion state, its processing power, its available bandwidth, the
          number of entries in its neighbor table, its uptime, its identity
          and network address information, and the next hop peer
          information.</t>

          <figure align="center">
            <artwork>
   Peer-1              Peer-2               Peer-3             Peer-4 
     |                    |                    |                    | 
     |(1).PathTrackReq    |                    |                    | 
     |------------------->|                    |                    | 
     |(2).PathTrackAns    |                    |                    | 
     |<-------------------|                    |                    | 
     |                    |(3).PathTrackReq    |                    | 
     |--------------------|------------------->|                    | 
     |                    |(4).PathTrackAns    |                    | 
     |<-------------------|--------------------|                    | 
     |                    |                    |(5).PathTrackReq    | 
     |--------------------|--------------------|------------------->| 
     |                    |                    |(6).PathTrackAns    | 
     |<-------------------|--------------------|--------------------| 
     |                    |                    |                    | 
    
                            Path_Track example 
</artwork>
          </figure>

          <t>A Path_Track request specifies which diagnostic information is
          requested by setting different bits in the flag contained in the
          Path_Track request. If the flag is clear (no bits are set), then the
          Path_Track request is only used for requesting the next hop
          information. In this case the iterative mode of Path_Track is
          degraded to a Route_Query method which is only used for checking the
          liveness of the peers along the routing path. The Path_Track request
          can be routed directly or through the overlay based on the routing
          mode chosen by the initiator node.</t>

          <t>A response to a successful PathTrackReq is a PathTrackAns
          message. There is a general diagnostic information portion of the
          payload, the contents of which are based on the flags in the
          request. Please refer to <xref target="diag_information"></xref> for
          the definitions of the diagnostic information.</t>

          <figure align="left">
            <artwork>
   Path_Track request: 
             struct { 
               Destination            destination;
               uint64                 expiration;
               uint64                 timestampInitiated;  
               uint8                  length;

               select (length){
               case 0:
                 uint64                 dMFlags;
              
               case > 0:
                 uint64                 dMFlags;
                 uint64                 dEFlags<0...length-1>; 
               }
             } PathTrackReq; 
</artwork>
          </figure>

          <t><list>
              <t>destination : The destination which the requester is
              interested in. This may be any valid destination object,
              including a Node-ID, compressed ids, or Resource-ID.</t>

              <t>expiration : The time-of-day (in seconds and microseconds,
              according to the receiver's clock) in NTP timestamp format <xref
              target="RFC4330"></xref> when the Path_Track request expires.
              This field can be used to mitigate the replay attack to the
              destination peer and overlay network.</t>

              <t>timestampInitiated : The time-of-day (in seconds and
              microseconds, according to the sender's clock) in NTP timestamp
              format <xref target="RFC4330"></xref> when the P2PSIP Overlay
              diagnostic request is sent.</t>

              <t>length : the number of extended diagnostics flags (in the
              unit of 64 bits). If the value is greater than or equal to 1,
              then one or more extended diagnostics flags (dEFlags) are
              specified. The value of length must not be negative.</t>

              <t>dMFlags : A mandatory flag which is an unsigned 64-bit
              integer indicating which kind of diagnostic information the
              initiator is interested in. The initiator sets different bits to
              retrieve different kinds of diagnostic information. If dMFlags
              is clear, then no mandatory diagnostic information is conveyed
              in the Path_Track response. If dMFlag is set to all '1's, then
              all diagnostic information kinds are requested. (Note: This memo
              specifies the initial set of flags, the flags can be extended by
              standard action We will add a section about extending the flags
              both standard and application specific in a future version) The
              dMflags indicate general diagnostic information The mapping
              between the bits in the dMFlags and the diagnostic information
              kind presented is as below.<list>
                  <t>STATUS_INFO(0x0001) : if set, the status information of
                  the responding peer is requested;</t>

                  <t>ROUTING_TABLE_SIZE(0x0002) : if set, the number of
                  entries in the responding peer's neighbor is requested;</t>

                  <t>PROCESS_POWER(0x0004) : if set, the processing power
                  information of the responding peer is requested;</t>

                  <t>BANDWIDTH(0x0008) : if set, the bandwidth information of
                  the responding peer is requested;</t>

                  <t>SOFTWARE_VERSION(0x0010): if set, the software version of
                  the peer program is requested;</t>

                  <t>MACHINE_UPTIME(0x0020): if set, the uptime of the machine
                  is requested;</t>

                  <t>APP_UPTIME(0x0040): if set, the uptime of the p2p
                  application is requested;</t>

                  <t>MEMORY_FOOTPRINT(0x0080): if set, the memory footprint of
                  the peer program is requested;</t>

                  <t>DATASIZE_STORED(0x0100): if set, the number of bytes of
                  data being stored by this node is requested;</t>

                  <t>MESSAGES_SENT_RCVD(0x0200): if set, an array element
                  containing the number of messages sent and received is
                  requested;</t>

                  <t>EWMA_BYTES_SENT(0x0400): if set, an integer representing
                  the exponential weighted average of bytes sent per second by
                  this peer is requested;</t>

                  <t>EWMA_BYTES_RCVD(0x0800): if set, an integer representing
                  the exponential weighted average of bytes received per
                  second by this peer is requested;</t>
                </list></t>
            </list></t>

          <t>dEFlags : the extended diagnostics flags which can be used by
          applications to retrieve its own customized diagnostics information.
          This allows private extensions.</t>

          <figure align="left">
            <artwork>
   Path_Track response: 
             struct { 
                   Destination     next_hop;
                   uint64          expiration; 
                   uint64          timestampReceived; 
                   uint8           length; 
                   Diagnostic_Info diag_info_list<0..length-1>; 
             } PathTrackAns; 
</artwork>
          </figure>

          <t><list>
              <t>next_hop : The information of the next hop node from the
              responding intermediate peer to the destination node. If the
              responding peer is the responsible peer for the destination ID,
              then the next_hop node ID equals the responding node ID, and
              after that the initiator must stop the iterative process.</t>

              <t>expiration : The time-of-day (in seconds and microseconds,
              according to the receiver's clock) in NTP timestamp format <xref
              target="RFC4330"></xref> when the Path_Track response expires.
              This field can be used to mitigate the replay attack to the
              destination peer and overlay network.</t>

              <t>timestampReceived : The time-of-day (in seconds and
              microseconds, according to the receiver's clock) in NTP
              timestamp format <xref target="RFC4330"></xref> when the P2PSIP
              Overlay diagnostic request was received.</t>

              <t>length : the number of Diagnostic_Info values contained in
              the response.</t>

              <t>diag_info_list : The diagnostic information from the
              responding peer.</t>
            </list></t>

          <t>The TLV structure for Diagnostic_Info is as follows: <figure
              align="left">
              <artwork>
            struct { 
              KindId     kind; 
              uint8     length; 
              Opaque    diagnosic_information<0..2^8-1>; 
            } Diagnostic_Info; 
</artwork>
            </figure></t>

          <t>kind : A numeric code indicating the type of information being
          reported.</t>

          <t>length : the length in bytes of the opaque date containing the
          information being reported</t>

          <t>diagnostic_information : Data of length specified above
          containing the value for the diagnostic information being
          reported.</t>

          <t>Various kinds of diagnostic information can be retrieved, Please
          refer to section <xref target="diag_information"></xref> for details
          of the types and Kind-ID for the diagnostic information that may be
          reported.</t>
        </section>
      </section>

      <section title="Message Payload Extensions">
        <t>As an extension to P2PSIP base protocol, a P2PSIP diagnostics
        protocol message content contains one message code following by its
        payloads. Please refer to <xref target="I-D.ietf-p2psip-base">P2PSIP
        base protocol </xref> for the detailed format of Message Contents.</t>

        <t>In addition to the newly introduced methods, this document extends
        the Error codes defined in P2PSIP base protocol specification.</t>

        <section anchor="sec_err_codes" title="Error Codes">
          <t>This document extends the Error response method defined in the
          P2PSIP base protocol specification to describe the result of
          diagnostics.</t>

          <t><figure align="left">
              <artwork>
   Name                  Message Code 
   Error                    0xFFFF 
</artwork>
            </figure></t>

          <t>This document defines new Error codes to carry different failure
          reports to the initiator node when failure is detected during
          diagnostics. This document introduces new Error Codes as below:
          <figure align="left">
              <artwork>
   Code Value          Error Code Name 
   101                Underlay Destination Unreachable  
   102                Underlay Time exceeded 
   103                Message Expired
   104                Upstream Misrouting
   105                Loop detected
   106                TTL hops exceeded 
</artwork>
            </figure></t>

          <t>The final error codes will be assigned by IANA as specified in
          section 13.7 of the <xref target="I-D.ietf-p2psip-base">p2psip base
          protocol </xref>.</t>

          <t>This document introduces several types of error information in
          the error_info field for Error Code 101 as an example: <figure
              align="left">
              <artwork>
   error_info: 
    
     net unreachable 
     host unreachable 
     protocol unreachable 
     port unreachable 
     fragmentation needed 
     source route failed 
</artwork>
            </figure></t>

          <t>Editor note: We may need more discussion here to see if we need
          to define an additional sub-code field for the error information.
          Sub-code is easier for the machine to process while various text is
          more human readable.</t>
        </section>

        <section anchor="diag_information" title="Diagnostics information">
          <t>This document introduces some diagnostics information conveyed in
          the message payload which can be retrieved to get the statistics and
          also allows for retrieval of other kinds that a node stores. In
          essence, the usage allows querying a node's state such as storage
          and network to obtain the relevant information. It can also be used
          to discover information such as the software version, uptime,
          routing table, stored resource-objects, performance statistics of a
          peer and link quality of a overlay route. The diagnostic information
          data kinds are defined below.<list>
              <t>PROCESS_POWER (32 bits): A single value element containing an
              unsigned 32-bit integer specifying the processing power of the
              node in unit of MIPS.</t>

              <t>BANDWIDTH (32 bits): A single value element containing an
              unsigned 32-bit integer specifying the bandwidth of the node in
              unit of Kbps.<list>
                  <t>Editor's note: For the diagnostic information of
                  processing power, bandwidth and etc., we should look at what
                  has been useful for PlanetLab and in commercial deployments
                  in this context, and further discussion is needed on what
                  mature diagnostics information for p2p overlays can be
                  brought here.</t>
                </list></t>

              <t>ROUTING_TABLE_SIZE (32 bits): A single value element
              containing an unsigned 32-bit integer representing the number of
              peers in the peer's routing table. The administrator of the
              overlay may be interested in statistics of this value for the
              consideration such as routing efficiency.</t>

              <t>STATUS_INFO (8 bits): A single value element containing an
              unsigned byte representing whether or not the node is in
              congestion status.</t>

              <t>SOFTWARE_VERSION: A single value element containing a
              US-ASCII string that identifies the manufacture, model, and
              version of the software.</t>

              <t>MACHINE_UPTIME (64 bits): A single value element containing
              an unsigned 64-bit integer specifying the time the nodes has
              been up in seconds.</t>

              <t>APP_UPTIME (64 bits): A single value element containing an
              unsigned 64-bit integer specifying the time the p2p application
              has been up in seconds.</t>

              <t>MEMORY_FOOTPRINT (32 bits): A single value element containing
              an unsigned 32- bit integer representing the memory footprint of
              the peer program in kilo bytes. <list>
                  <t>Note: A kilo byte in this document represents 1024
                  bytes.</t>
                </list></t>

              <t>DATASIZE_STORED (64 bits): An unsigned 64-bit integer
              representing the number of bytes of data being stored by this
              node.</t>

              <t>INSTANCES_STORED An array element containing the number of
              instances of each kind stored. The array is index by Kind-ID.
              Each entry is an unsigned 64-bit integer.</t>

              <t>MESSAGES_SENT_RCVD An array element containing the number of
              messages sent and received. The array is indexed by method code.
              Each entry in the array is a pair of unsigned 64-bit integers
              (packed end to end) representing sent and received.</t>

              <t>EWMA_BYTES_SENT (32 bits): A single value element containing
              an unsigned 32-bit integer representing an exponential weighted
              average of bytes sent per second by this peer. sent = alpha x
              sent_present + (1 - alpha) x sent where sent_present represents
              the bytes sent per second since the last calculation and sent
              represents the last calculation of bytes sent per second. A
              suitable value for alpha is 0.8. This value is calculated every
              five seconds.</t>

              <t>EWMA_BYTES_RCVD (32 bits): A single value element containing
              an unsigned 32-bit integer representing an exponential weighted
              average of bytes received per second by this peer. Same
              calculation as above.</t>
            </list></t>
        </section>
      </section>

      <section title="Message Processing">
        <section title="Message Creation and Transmission">
          <t>When constructing either an Inspect message or a Path_Track
          message, the sender MUST set both the destination and the expiration
          value (in NTP timestamp format) for the message.</t>

          <t>When constructing an Inspect message, the sender MAY specfify a
          value for underlayTTL. If a value is not specified, the sender MUST
          set underlayTTL to 0. The sender MUST generate an NTP format
          timestamp for the current time of day and place it in the
          timeStampInitiated field.</t>

          <t>When constructing a Path_Track message, the sender MUST set the
          length value to a value equal to or greater than 0. If the value of
          length is set to 0, the sender MUST include a dMFlags value, and
          MUST NOT include any dEFlags values. If the sender wishes to send
          dEFlags in addition to dMFlags, the sender MUST set the value of
          length to be equal to the number of dEFlags present. Note that the
          sender MUST NOT set length to negative. The sender MAY set bits in
          dMFlags as discussed above to request specific information, and MAY
          set bits in dEFlags fields to request user-specific bits. The sender
          also MAY set dMFlags to all zero, indicating that no diagnostic
          information is requested.</t>

          <t>A Path_Track request MUST specify which diagnostic information is
          requested by setting different bits in the flag contained in the
          Path_Track request payload. If the flag is clear, then the
          Path_Track request is only used for asking the next hop information,
          in this case the iterative mode of Path_Track is degraded to a
          Route_Query method which is only used for checking the liveness of
          the peers along the routing path. The Path_Track request can be
          routed directly or through the overlay based on the routing mode
          chosen by the initiator node.</t>
        </section>

        <section anchor="Message_Processing_Intermediate_Peers"
                 title="Message Processing: Intermediate Peers">
          <t>When a request arrives at a peer, if the peer's responsible ID
          space does not cover the destination ID of the request, then the
          peer MUST continue process this request according to the overlay
          specified routing mode from the base draft.</t>

          <t>In p2psip overlay, the error response can be generated by the
          intermediate peer or responsible peer, to a diagnostic message or
          other messages. When a request is received at a peer, the peer may
          find some connectivity failures or malfunction peers through the
          pre-defined rules of the overlay network, e.g. by analyzing via list
          or underlay error messages. The peer should report the error
          responses to the initiator node. The malfunction node information
          should also be reported to the initiator node in the error message
          payload. All error responses contain the Error code followed by the
          subcode and descriptions if existed.</t>

          <t>Each intermediate peer receiving an Inspect or Path_Track
          request/response SHOULD check the expiration value (NTP format) to
          determine if the message expired. If the message expired, the
          intermediate peer SHOULD generate a message with Error Code 103
          "Message Expired" and return it to the initiator node, and discard
          the message.</t>

          <t>The peer should return an Error response with the Error Code 101
          "Underlay Destination Unreachable" when it receives an ICMP message
          with "Destination Unreachable" information after forwarding the
          received request to the destination peer.</t>

          <t>The peer should return an Error response with the Error Code 102
          "Underlay Time Exceeded" when it receives an ICMP message with "Time
          Exceeded" information after forwarding the received request.</t>

          <t>The peer should return an Error response with Error Code 104
          "Upstream Misrouting" when it finds its upstream peer disobeys the
          routing rules defined in the overlay. The immediate upstream peer
          information should also be conveyed to the initiator node.</t>

          <t>The peer should return an Error response with Error Code 105
          "Loop detected" when it finds a loop through the analysis of via
          list.</t>

          <t>The peer should return an Error response with Error Code 106 "TTL
          hops exceeded" when it finds that the TTL field value is no more
          than 0 when forwarding.</t>

          <t>With Path_Track, if a former Path_Track message does not arrive
          at the destination, then the following Path_Track request must copy
          the next_hop field in the former response into the forwarding header
          and keep the destination_ID unchanged.</t>

          <t>Inspect is also used to detect possible failures in the specified
          path of P2PSIP overlay network. If disabled peers, misrouting
          behavior and underlying network faults are detected during the
          routing process, the Error responses with Error codes and
          descriptions, must be sent to the initiator node immediately. </t>
        </section>

        <section anchor="Message_response" title="Message Response Creation">
          <t>When a diagnostic request message arrives at a peer, and it is
          responsible for the destination ID specified in the forwarding
          header, it MUST follow the specifications defined in 5.1.3 of the
          base draft to form the response header.</t>

          <t>When the responsible peer receives an Inspect or Path_Track
          request/response it MUST check the expiration value (NTP format) to
          determine if the message expired. If the message expired, the peer
          MUST generate a message with the Error Code 103 "Message Expired"
          and return it to the initiator node, and discard the message.</t>

          <t>If the request is an Inspect, the destination peer MUST copy the
          TTL value to the HopCounter field. The receiver MUST generate an NTP
          format timestamp for the current time of day and place it in the
          timestampReceived field.</t>

          <t>The initiator node, as well as the responding peer, MAY compute
          the overlay One-Way-Delay time through the value in
          timestampReceived and the timestampInitiated field. However, for a
          single hop measurement, the traditional measurement methods MUST be
          used instead of the overlay layer diagnostics methods.</t>

          <t>Editor note: We need more discussion and careful consideration on
          how to use the timestamp here because time synchronization is a
          barrier in open Internet environment, while in the operator's
          network, it may be less of a problem.</t>

          <t>The initiator node receiving the Inspect response MUST check the
          hopCounter field and compute the overlay hops to the destination
          peer for the statistics of connectivity quality from the perspective
          of overlay hops.</t>

          <t>If the request is a Path_Track, the destination peer MUST check
          if the initiator node has the authority to get certain kinds of
          diagnostic information, and if appropriate, appends the diagnostic
          information requested in the dEFlags and dMFlags to the response
          message. The peer should return an Error response with the Error
          Code 1 "Error_Unauthorized" when the initiator node does not have
          the authority to get the corresponding diagnostic information.</t>

          <t>In the event of an error, an error response containing the error
          code followed by the subcode and description (if they exist) MUST be
          created and sent to the sender.</t>
        </section>
      </section>
    </section>

    <section title="Examples">
      <t>Below, we sketch how these metrics can be used.</t>

      <section title="Example 1">
        <t>A peer may set EWMA_BYTES_SENT and WEMA_BYTES_RCVD flags in the
        PathTrackReq to its direct neighbors. A peer can use EWMA_BYTES_SENT
        and EWMA_BYTES_RCVD of another peer to infer whether it is acting as a
        media relay. It may then choose not to forward any requests for media
        relay to this peer. Similarly, among the various candidates for
        filling up routing table, a peer may prefer a peer with a large UPTIME
        value, small RTT, and small LAST_CONTACT value.</t>
      </section>

      <section title="Example 2">
        <t>A peer may set the StatusInfo Flag in the PathTrackReq to a remote
        destination peer. The overlay has its own threshold definition for
        congestion. The peer can get knowledge of all the status information
        of the intermediate peers along the path. Then it can choose other
        paths to that node for the later requests.</t>
      </section>

      <section title="Example 3">
        <t>A peer may use Inspect to evaluate the average overlay hops to
        other peers by sending InspectReq to a set of random resource or node
        IDs in the overlay. A peer may adjust its timeout value according to
        the change of average overlay hops.</t>
      </section>
    </section>

    <section title="Security Considerations">
      <t>The authorization for diagnostics information must be designed with
      care to prevent it becoming a resort to retrieve information for bots
      attacks. It should also be careful that attackers can use diagnostics to
      analyze overlay information to attack certain key peers if there are. As
      this draft is a RELOAD extension, it follows RELOAD message header and
      routing specifications, the common security considerations described in
      the base draft <xref target="I-D.ietf-p2psip-base"></xref> are also
      applicable to this draft.</t>
    </section>

    <section title="IANA Considerations ">
      <section title="Message Code ">
        <t>This document introduces two new types of message to the "RELOAD
        Message Code" Registry as below: <figure align="left" alt="">
            <artwork>
                +-------------------+----------------+----------+ 
                | Message Code Name |     Code Value |      RFC | 
                +-------------------+----------------+----------+ 
                | inspect_req       |            101 | RFC-AAAA | 
                | inspect_ans       |            102 | RFC-AAAA | 
                | path_track_req    |            103 | RFC-AAAA | 
                | path_track_ans    |            104 | RFC-AAAA | 
                +-------------------+----------------+----------+ 
</artwork>
          </figure></t>
      </section>

      <section title="Error Code">
        <t>This document introduces some new Error Codes to the "RELOAD
        Message Code" Registry as below: <figure align="left">
            <artwork>
   Code Value          Error Code Name 
   101                Underlay Destination Unreachable  
   102                Underlay Time exceeded 
   103                Message Expired 
   104                Upstream Misrouting 
   105                Loop detected 
   106                TTL hops exceeded 
</artwork>
          </figure></t>
      </section>

      <section title="Data Kind-ID">
        <t>This document introduces additional data kind-IDs to the "RELOAD
        Data Kind-ID" Registry as below: <figure>
            <artwork>
      Kind                  Kind-ID  
     StatusInfo                 101  
     ProcessPower               102  
     Bandwidth                  103  
</artwork>
          </figure></t>
      </section>

      <section title="Diagnostics Flag">
        <t>IANA SHALL create a "RELOAD Dianogsitcs Flag" Registry. Entries in
        this registry are 1-bit flag contained in a 64-bits long integer
        dMFlags denoting diagnostic information to be retrieved as described
        in <xref target="Path_Track"></xref>. New entries SHALL be defined via
        <xref target="RFC5226"></xref> Standards Action. The initial contents
        of this registry are:<figure>
            <artwork>
 +-------------------------+------------------------------+--------+
 |  diagnostic information |diagnostic flag in dMFlags    | RFC    |
 |-------------------------+------------------------------+--------|
 |Reserved                 | 0x 0000 0000 0000 0000       |RFC-BBBB|
 |STATUS_INFO              | 0x 0000 0000 0000 0001       |RFC-BBBB|
 |ROUTING_TABLE_SIZE       | 0x 0000 0000 0000 0002       |RFC-BBBB|
 |PROCESS_POWER            | 0x 0000 0000 0000 0004       |RFC-BBBB|
 |BANDWIDTH                | 0x 0000 0000 0000 0008       |RFC-BBBB|
 |SOFTWARE_VERSION         | 0x 0000 0000 0000 0010       |RFC-BBBB|
 |MACHINE_UPTIME           | 0x 0000 0000 0000 0020       |RFC-BBBB|
 |APP_UPTIME               | 0x 0000 0000 0000 0040       |RFC-BBBB|
 |MEMORY_FOOTPRINT         | 0x 0000 0000 0000 0080       |RFC-BBBB|
 |DATASIZE_STORED          | 0x 0000 0000 0000 0100       |RFC-BBBB|
 |MESSAGES_SENT_RCVD       | 0x 0000 0000 0000 0200       |RFC-BBBB|
 |EWMA_BYTES_SENT          | 0x 0000 0000 0000 0400       |RFC-BBBB|
 |EWMA_BYTES_RCVD          | 0x 0000 0000 0000 0800       |RFC-BBBB|
 |Reserved                 | 0x FFFF FFFF FFFF FFFF       |RFC-BBBB|
 +-------------------------+------------------------------+--------+
</artwork>
          </figure></t>
      </section>
    </section>

    <section title="Acknowledgments">
      <t>We would like to thank Zheng Hewen for the contribution of the
      initial version of this draft. We would also like to thank Bruce
      Lowekamp, Salman Baset, Henning Schulzrinne and Jiang Haifeng for the
      email discussion and their valued comments, and special thanks to Henry
      Sinnreich for contributing to the usage scenarios text. We would like to
      thank the authors of the p2psip base draft for transferring text about
      diagnostics to this document.</t>

      <t>The authors would also like to thank the many people of the IETF
      P2PSIP WG that have contributed to discussions and provided input
      invaluable in assembling this document.</t>
    </section>

    <section title="Appendix: Changes">
      <section title="Changes since draft-ietf-p2psip-diagnostics-00">
        <t><list>
            <t>1. Change Title from "Diagnose P2PSIP Overlay Network" into
            "P2PSIP Overlay Diagnostics";</t>

            <t>2. Change the table of contents. Add a section about message
            processing and another section about examples;</t>

            <t>3. Merge diagnostics text from the p2psip base draft 01;</t>

            <t>4. Remove ECHO method for security considerations.</t>
          </list></t>
      </section>
    </section>
  </middle>

  <back>
    <references title="Normative References">
      &RFC0792;

      &RFC2119;

      &RFC3261;

      &RFC5226;

      &I-D.ietf-p2psip-sip;

      &I-D.ietf-p2psip-base;

      &I-D.zheng-p2psip-diagnose;

      <reference anchor="Overlay-Failure-Detection">
        <front>
          <title>On failure detection algorithms in overlay networks</title>

          <author initials="S" surname="Zhuang">
            <organization></organization>
          </author>

          <date day="13-17" month="Mar" year="2005" />
        </front>

        <seriesInfo name="" value="Proc. IEEE Infocomm" />
      </reference>

      <reference anchor="Handling_Churn_in_a_DHT">
        <front>
          <title>Handling Churn in a DHT</title>

          <author initials="S" surname="Rhea">
            <organization></organization>
          </author>

          <date day="" month="June" year="2004" />
        </front>

        <seriesInfo name="USENIX" value="Annual Conference" />
      </reference>

      <reference anchor="Diagnostic_Framework">
        <front>
          <title>A Diagnostic Framework for Peer-to-Peer Streaming</title>

          <author initials="X" surname="Jin">
            <organization>Hong Kong University and Microsoft</organization>
          </author>

          <date year="2005" />
        </front>
      </reference>

      <reference anchor="Diagnostics_and_NAT_traversal_in_P2PP" target="">
        <front>
          <title>Diagnostics and NAT Traversal in P2PP - Design and
          Implementation</title>

          <author initials="G" surname="Gupta">
            <organization></organization>
          </author>

          <date month="June" year="2008" />
        </front>

        <seriesInfo name="Columbia University Report" value="" />
      </reference>
    </references>

    <references title="Informative References ">
      &RFC4330;

      &RFC4981;

      &I-D.ietf-behave-rfc3489bis;

      &I-D.matuszewski-p2psip-security-requirements;

      &I-D.song-p2psip-security-eval;

      &I-D.baset-p2psip-p2pp;

      &I-D.ietf-mmusic-ice;

      &I-D.bryan-p2psip-app-scenarios;

      &I-D.bryan-p2psip-requirements;

      &I-D.ietf-p2psip-concepts;
    </references>
  </back>
</rfc>

PAFTECH AB 2003-20262026-04-23 20:34:51