http://stupid.domain.name/ietf/

One document matched: draft-ietf-sip-dtls-srtp-framework-05.xml
<?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY rfc2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY rfc3261 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3261.xml">
<!ENTITY rfc3262 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3262.xml">
<!ENTITY rfc3264 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3264.xml">
<!ENTITY rfc3280 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3280.xml">
<!ENTITY rfc3323 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3323.xml">
<!ENTITY rfc3325 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3325.xml">
<!ENTITY rfc3546 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3546.xml">
<!ENTITY rfc3550 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml">
<!ENTITY rfc3711 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3711.xml">
<!ENTITY rfc3830 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3830.xml">
<!ENTITY rfc4145 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4145.xml">
<!ENTITY rfc4347 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4347.xml">
<!ENTITY rfc4474 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4474.xml">
<!ENTITY rfc4566 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4566.xml">
<!ENTITY rfc4567 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4567.xml">
<!ENTITY rfc4568 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4568.xml">
<!ENTITY rfc4571 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4571.xml">
<!ENTITY rfc4572 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4572.xml">
<!ENTITY rfc4916 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4916.xml">

<!ENTITY I-D.ietf-mmusic-ice SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mmusic-ice.xml">
<!ENTITY I-D.mcgrew-srtp-ekt SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.mcgrew-srtp-ekt.xml">
<!ENTITY I-D.ietf-avt-dtls-srtp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-avt-dtls-srtp.xml">
<!ENTITY I-D.zimmermann-avt-zrtp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.zimmermann-avt-zrtp.xml">
<!ENTITY I-D.ietf-sip-media-security-requirements SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-sip-media-security-requirements.xml">
<!ENTITY I-D.ietf-mmusic-sdp-capability-negotiation SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mmusic-sdp-capability-negotiation.xml">
<!ENTITY I-D.ietf-avt-rtp-and-rtcp-mux SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-avt-rtp-and-rtcp-mux.xml">
<!ENTITY I-D.wing-sipping-srtp-key SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.wing-sipping-srtp-key.xml">
<!ENTITY I-D.ietf-mmusic-media-path-middleboxes SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mmusic-media-path-middleboxes">
<!ENTITY I-D.ietf-behave-rfc3489bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-behave-rfc3489bis">
<!ENTITY I-D.wing-avt-dtls-srtp-key-transport SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.wing-avt-dtls-srtp-key-transport">
<!ENTITY I-D.wing-sip-identity-media SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.wing-sip-identity-media">
<!ENTITY I-D.fischer-sip-e2e-sec-media SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.fischer-sip-e2e-sec-media">
<!ENTITY I-D.ietf-sip-ua-privacy SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-sip-ua-privacy">

]>


<!-- $Id: dtls-sip.xml,v 1.14 2006/02/26 23:38:20 jfischl Exp $ -->
<?rfc colonspace='yes' ?>
<?rfc compact="yes" ?>
<?rfc inline="yes"?>
<?rfc iprnotified="no" ?>
<?rfc sortrefs="no" ?>
<?rfc strict="yes" ?>
<?rfc symrefs="yes" ?>
<?rfc toc="yes" ?>
<rfc category="std" docName="draft-ietf-sip-dtls-srtp-framework-05.txt" ipr="full3978">
  <front>
    <title abbrev="DTLS-SRTP Framework">Framework for Establishing an SRTP Security Context using
      DTLS</title>

    <author fullname="Jason Fischl" initials="J." surname="Fischl">
      <organization>CounterPath Corporation</organization>
      <address>
        <postal>
          <street>Suite 300, One Bentall Centre, 505 Burrard Street</street>
          <city>Vancouver</city>
          <region>BC</region>
          <code>V7X 1M3</code>
          <country>Canada</country>
        </postal>
        <phone>+1 604 320-3340</phone>
        <email>jason@counterpath.com</email>
      </address>
    </author>
    <author fullname="Hannes Tschofenig" initials="H." surname="Tschofenig">
      <organization>Nokia Siemens Networks</organization>
      <address>
        <postal>
          <street>Otto-Hahn-Ring 6</street>
          <city>Munich</city>
          <region>Bavaria</region>
          <code>81739</code>
          <country>Germany</country>
        </postal>
        <email>Hannes.Tschofenig@nsn.com</email>        
        <uri>http://www.tschofenig.com</uri>
      </address>
    </author>
    <author fullname="Eric Rescorla" initials="E." surname="Rescorla">
      <organization>RTFM, Inc.</organization>
      <address>
        <postal>
          <street>2064 Edgewood Drive</street>
          <city>Palo Alto</city>
          <region>CA</region>
          <code>94303</code>
          <country>USA</country>
        </postal>
        <email>ekr@rtfm.com</email>
      </address>
    </author>

    <date day="29" month="October" year="2008"/>
    <area>Real-time Applications and Infrastructure Area</area>
    <workgroup>SIP</workgroup>

    <abstract>
      <t>This document specifies how to use the Session Initiation Protocol (SIP) to establish an
        Secure Real-time Transport Protocol (SRTP) security context using the Datagram Transport
        Layer Security (DTLS) protocol. It describes a mechanism of transporting a fingerprint
        attribute in the Session Description Protocol (SDP) that identifies the key that will be
        presented during the DTLS handshake. The key exchange travels along the media
        path as opposed to the signaling path.
        The SIP Identity mechanism can be used to protect the integrity of the fingerprint attribute 
	from modification by intermediate proxies.
      </t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>The Session Initiation Protocol (SIP) <xref target="RFC3261"/> and the
        Session Description Protocol (SDP) <xref target="RFC4566"/> are used to
        set up multimedia sessions or calls. SDP is also used to set up TCP
        <xref target="RFC4145" /> and additionally TCP/TLS connections for usage
        with media sessions <xref target="RFC4572" />. The Real-time Transport
        Protocol (RTP) <xref target="RFC3550"/> is used to transmit real time
        media on top of UDP and TCP <xref target="RFC4571"/>. Datagram TLS
        <xref target="RFC4347"/> was introduced to allow TLS functionality to be
        applied to datagram transport protocols, such as UDP and DCCP. This
        draft provides guidelines on how to establish SRTP <xref target="RFC3711"/> 
	security over UDP using an
        extension to DTLS (see <xref target="I-D.ietf-avt-dtls-srtp"/>). </t>

      <t>The goal of this work is to provide a key negotiation technique that allows encrypted
        communication between devices with no prior relationships. It also does not require the
        devices to trust every call signaling element that was involved in routing or session setup.
        This approach does not require any extra effort by end users and does not require deployment
        of certificates that are signed by a well-known certificate authority to all devices. </t>

      <t>The media is transported over a mutually authenticated DTLS session
        where both sides have certificates. It is very important to note that
        certificates are being used purely as a carrier for the public keys of
        the peers. This is required because DTLS does not have a mode for
        carrying bare keys, but it is purely an issue of formatting.  The
        certificates can be self-signed and completely self-generated. All major
        TLS stacks have the capability to generate such certificates on
        demand. However, third party certificates MAY also be used for extra
        security. The certificate fingerprints are sent in SDP over SIP as part
        of the offer/answer exchange.
      </t>
      <t>
	The fingerprint mechanism allows one side of the connection to verify
	that the certificate presented in the DTLS handshake matches the
	certificate used by the party in the signalling. However, this
	requires some form of integrity protection on the signalling. S/MIME signatures, as described in
	RFC 3261, or SIP Identity, as described in <xref target="RFC4474"/> provides
	the highest level of security because they are not susceptible to modification
	by malicious intermediaries. However, even hop-by-hop security such as
	provided by SIPS provides some protection against modification by attackers
	who are not in control of on-path sigaling elements.
      </t>
      <t>This approach differs from previous attempts to secure media traffic where the
        authentication and key exchange protocol (e.g., MIKEY <xref target="RFC3830"/>) is
        piggybacked in the signaling message exchange. With DTLS-SRTP, establishing the
        protection of the media traffic between the endpoints is done by the media endpoints without
        involving the SIP/SDP communication. It allows RTP and SIP to be used in the usual manner
        when there is no encrypted media.</t>

      <t>In SIP, typically the caller sends an offer and the callee may subsequently send one-way
        media back to the caller before a SIP answer is received by the caller. The approach in this
        specification, where the media key negotiation is decoupled from the SIP signaling, allows
        the early media to be set up before the SIP answer is received while preserving the
        important security property of allowing the media sender to choose some of the keying
        material for the media. This also allows the media sessions to be changed, re-keyed, and
        otherwise modified after the initial SIP signaling without any additional SIP signaling.</t>

      <t>Design decisions that influence the applicability of this specification are discussed in
          <xref target="sec.motivation"/>.</t>
    </section>

    <section title="Overview">
      <t>Endpoints wishing to set up an RTP media session do so by exchanging offers and answers in
        SDP messages over SIP. In a typical use case, two endpoints would negotiate to transmit
        audio data over RTP using the UDP protocol.</t>

      <t><xref target="sip-trapezoid"> </xref> shows a typical message exchange in the SIP
        Trapezoid.</t>

      <figure anchor="sip-trapezoid" title="DTLS Usage in the SIP Trapezoid">
        <artwork><![CDATA[
              +-----------+            +-----------+
              |SIP        |   SIP/SDP  |SIP        |
      +------>|Proxy      |----------->|Proxy      |-------+
      |       |Server X   | (+finger-  |Server Y   |       |
      |       +-----------+   print,   +-----------+       |
      |                      +auth.id.)                    |
      | SIP/SDP                              SIP/SDP       |
      | (+fingerprint)                       (+fingerprint,|
      |                                       +auth.id.)   |
      |                                                    |
      |                                                    v
  +-----------+          Datagram TLS               +-----------+
  |SIP        | <-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-> |SIP        |
  |User Agent |               Media                 |User Agent |
  |Alice@X    | <=================================> |Bob@Y      |
  +-----------+                                     +-----------+

  Legend:
  ------>: Signaling Traffic
  <-+-+->: Key Management Traffic
  <=====>: Data Traffic
 ]]></artwork>
      </figure>

      <t>Consider Alice wanting to set up an encrypted audio session with Bob. Both Bob and Alice
        could use public-key based authentication in order to establish a confidentiality protected
        channel using DTLS.</t>

      <t>Since providing mutual authentication between two arbitrary end points
        on the Internet using public key based cryptography tends to be
        problematic, we consider more deployment-friendly alternatives. This
        document uses one approach and several others are discussed in
        <xref target="section.sec"/>.</t>

      <t>Alice sends an SDP offer to Bob over SIP. If Alice uses only
        self-signed certificates for the communication with Bob, a fingerprint
        is included in the SDP offer/answer exchange. This fingerprint 
	binds the DTLS key exchange in the media plane to the signaling
	plane. </t>
	<t>
	The fingerprint alone protects against active attacks on the
	media but not active attacks on the signalling.
	In order to prevent active attacks on the signalling, 
	<xref target="RFC4474">Enhancements for Authenticated Identity
        Management in SIP</xref> may be is used. When Bob receives the offer,
	the peers establish some number of DTLS connections
 	(depending on the number of media sessions) with mutual DTLS authentication
	(i.e., both sides provide certificates)
	At this point, Bob can verify that Alice's credentials offered in
	TLS match the fingerprint in the SDP offer, and 
	Bob can begin sending media to Alice. Once Bob accepts Alice's offer and
        sends an SDP answer to Alice, Alice can begin sending confidential media
        to Bob over the appropriate streams. Alice and Bob will verify the fingerprints from the certificates
        received over the DTLS handshakes match with the fingerprints received
        in the SDP of the SIP signaling. This provides the security property
        that Alice knows that the media traffic is going to Bob and vice-versa
        without necessarily requiring global PKI certificates for Alice and Bob.
	(see <xref target="section.sec"/> for detailed security analysis.)
	</t>

    </section>

    <section anchor="sec.motivation" title="Motivation">
      <t>Although there is already prior work in this area (e.g., <xref
          target="RFC4568">Security Descriptions for SDP </xref>, <xref
          target="RFC4567">Key Management Extensions</xref> combined with MIKEY
          <xref target="RFC3830"/> for authentication and key exchange), this specification is
        motivated as follows:</t>

      <t>
        <list style="symbols">


          <t>TLS will be used to offer security for connection-oriented media. The design of TLS is
            well-known and implementations are widely available.</t>

          <t>This approach deals with forking and early media without requiring support for PRACK
              <xref target="RFC3262"/> while preserving the important security property of allowing
            the offerer to choose keying material for encrypting the media.</t>

          <t>The establishment of security protection for the media path is also provided along the
            media path and not over the signaling path. In many deployment scenarios, the signaling
            and media traffic travel along a different path through the network.</t>

          <t>When RFC 4474 Identity is used, this solution works even when the SIP proxies downstream of the 
	  authentication service are
            not trusted. There is no need to reveal keys in the SIP signaling or in the SDP message
            exchange. 
	    Retargeting of a dialog-forming request (changing the value of the
	    Request-URI), the UA that receives
	    it (the User Agent Server, UAS) can have a different identity from
	    that in the To header field. When RFC 4916 is used then it is
	    possible to supply its identity to the peer UA by means of a request in
	    the
	    reverse direction, and for that identity to be signed by an
	    Authentication Service. 
	  </t>

          <t>In this method, SSRC collisions do not result in any extra SIP signaling.</t>

          <t>Many SIP endpoints already implement TLS. The changes to existing SIP and RTP usage are
            minimal even when DTLS-SRTP <xref target="I-D.ietf-avt-dtls-srtp"/> is used.</t>
        </list>
      </t>
    </section>

    <section title="Terminology">
      <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT",
        "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in
          <xref target="RFC2119"/>.</t>

      <t>DTLS/TLS uses the term "session" to refer to a long-lived set of keying material that spans
        associations. 
	In this document, consistent with SIP/SDP usage, we use it to refer to a
        multimedia session and use the term "TLS session" to refer to the TLS construct. We use the
        term "association" to refer to a particular DTLS ciphersuite and keying material set which is associated with a single host/port quartet. The same DTLS/TLS session can be used to establish the keying material for
	multiple associations. For
        consistency with other SIP/SDP usage, we use the term "connection" when what's being
        referred to is a multimedia stream that is not specifically DTLS/TLS.</t>

      <t>In this document, the term "Mutual DTLS" indicates that both the DTLS client and server
        present certificates even if one or both certificates are self-signed.</t>
    </section>

    <section title="Establishing a Secure Channel">
      <t> The two endpoints in the exchange present their identities as part of the DTLS
      handshake procedure using certificates. This document uses certificates in the same
      style as described in Comedia over TLS in SDP <xref
      target="RFC4572"></xref>.</t> 

      <t> If self-signed certificates are used, the content of the subjectAltName attribute
      inside the certificate MAY use the uniform resource identifier (URI) of the user.
      This is useful for debugging purposes only and is not required to bind the
      certificate to one of the communication endpoints. The integrity of the certificate
      is ensured through the fingerprint attribute in the SDP. The subjectAltName is not
      an important component of the certificate verification.</t>

      <t> The generation of public/private key pairs is relatively expensive. Endpoints are not
      required to generate certificates for each session.</t>


      <t>The offer/answer model, defined in <xref target="RFC3264"/>, is used by
        protocols like the Session Initiation Protocol (SIP)
        <xref target="RFC3261"/> to set up multimedia sessions. In addition to
        the usual contents of an SDP <xref target="RFC4566"/> message, each
        media description ('m' line and associated parameters) will also contain
        several attributes as specified in
        <xref target="I-D.ietf-avt-dtls-srtp"/>, <xref target="RFC4145"/>
        and <xref target="RFC4572" />.</t>

      <t>When an endpoint wishes to set up a secure media session with another endpoint it sends an
        offer in a SIP message to the other endpoint. This offer includes, as part of the SDP
        payload, the fingerprint of the certificate that the endpoint wants to use. 
        The endpoint SHOULD send the SIP message
	containing the offer to the offerer's sip proxy over
	an integrity protected channel. The proxy SHOULD add an Identity
	header field according to the procedures outlined in [RFC4474].
	The SIP message
        containing the offer SHOULD be sent to the offerer's sip proxy over an integrity protected channel.
	When the far endpoint receives the SIP message it can verify the
        identity of the sender using the Identity header field. Since the Identity header field is a digital
        signature across several SIP header fields, in addition to the body of the SIP message, the
        receiver can also be certain that the message has not been tampered with after the digital
        signature was applied and added to the SIP message.</t>

      <t>The far endpoint (answerer) may now establish a DTLS association with DTLS
        to the offerer. Alternately, it can indicate in its answer that the offerer is to initiate
	the TLS association. In either case, mutual DTLS certificate-based authentication will
	be used.
	After completing the DTLS handshake, information about the authenticated
        identities, including the certificates, are made available to the endpoint application. The
        answerer is then able to verify that the offerer's certificate used for authentication in
        the DTLS handshake can be associated to the certificate fingerprint contained in the offer
        in the SDP. At this point the answerer may indicate to the end user that the media is
        secured. The offerer may only tentatively accept the answerer's certificate since it may not
        yet have the answerer's certificate fingerprint.</t>

      <t>When the answerer accepts the offer, it provides an answer back to the offerer containing
        the answerer's certificate fingerprint. At this point the offerer can accept or
        reject the peer's certificate and the offerer can indicate to the end user that the media is
        secured.</t>

      <t>Note that the entire authentication and key exchange for securing the media traffic is
        handled in the media path through DTLS. The signaling path is only used to verify the peers'
        certificate fingerprints.</t>

      <t>
	The offer and answer MUST be conform to the following requirements.
	<list style="symbols">
	  <t>The endpoint MUST use the setup attribute defined in
        <xref target="RFC4145"/>. The endpoint which is the offerer MUST use the
        setup attribute value of setup:actpass and be prepared to receive a
        client_hello before it receives the answer. The answerer MUST use 
	either a setup attribute value of setup:active or setup:passive. Note
	that if the answerer uses setup:passive, then the DTLS handshake
	will not begin until the answerer is received, which adds additional
	latency. setup:active allows the answer and the DTLS handshake
	to occur in parallel. Thus, setup:active is RECOMMENDED.
	Whichever party is active MUST initiate a DTLS handshake by sending 
	a ClientHello over each flow (host/port quartet). 
	  </t>

      <t>The endpoint MUST NOT use the connection attribute defined in <xref target="RFC4145"/>.</t>
      
      <t>The endpoint MUST use the certificate fingerprint attribute as specified in <xref
          target="RFC4572"/>.</t>

      <t>The certificate presented during the DTLS handshake MUST match the fingerprint exchanged
        via the signaling path in the SDP. The security properties of this mechanism are described
        in <xref target="section.sec"/>.</t>

      <t>If the fingerprint does not match the hashed certificate then the endpoint MUST tear down
        the media session immediately. Note that it is permissible to wait until the other
        side's fingerprint has been received before establishing the connection, however this
        may have undesirable latency effects.</t>
	</list>
      </t>
    </section>

    <section title="Miscellaneous Considerations">
      <section title="Anonymous Calls">
        <t>The use of DTLS-SRTP does not provide anonymous calling,
	however it also does not prevent it.
	However, if care is not
	taken when anonymous calling features such as those described
	in <xref target="RFC3325"/> or <xref target="I-D.ietf-sip-ua-privacy"/>
	are used
	DTLS-SRTP may allow deanonymizing an otherwise anonymous call. When
	anonymous calls are being made, the
	following procedures SHOULD be used to prevent deanonymization. </t>
	
        <t>When making anonymous calls, a new self-signed certificate SHOULD be
          used for each call so that the calls can not be correlated as to being
          from the same caller. In situations where some degree of correlation
          is acceptable, the same certificate SHOULD be used for a number of
          calls in order to enable continuity of authentication, see
          <xref target="sec.continuity"/>.</t>

        <t>
	  Additionally note that in networks that deploy
          <xref target="RFC3325"/>, RFC 3325 requires that the Privacy header
          field value defined in <xref target="RFC3323"/> needs to be set to
          'id'.  This is used in conjunction with the SIP identity mechanism to
          ensure that the identity of the user is not asserted when enabling
          anonymous calls. Furthermore, the content of the subjectAltName
          attribute inside the certificate MUST NOT contain information that
          either allows correlation or identification of the user that wishes to
          place an anonymous call.  Note that following this recommendation is
          not sufficient to provide anonymization.</t>
      </section>

      <section title="Early Media">
        <t>If an offer is received by an endpoint that wishes to provide early media, it MUST take
          the setup:active role and can immediately establish a DTLS association with the other
          endpoint and begin sending media. The setup:passive endpoint may not yet have validated
          the fingerprint of the active endpoint's certificate. The security aspects of media
          handling in this situation are discussed in <xref target="section.sec"/>.</t>
      </section>

      <section title="Forking">
        <t>In SIP, it is possible for a request to fork to multiple endpoints. Each forked request
          can result in a different answer. Assuming that the requester provided an offer, each of
          the answerers' will provide a unique answer. Each answerer will form a DTLS association
          with the offerer. The offerer can then securely correlate the SDP answer received in the SIP
          message by comparing the fingerprint in the answer to the hashed certificate for each DTLS
          association.</t>
      </section>

      <section title="Delayed Offer Calls">
        <t>An endpoint may send a SIP INVITE request with no offer in it. When
          this occurs, the receiver(s) of the INVITE will provide the offer in
          the response and the originator will provide the answer in the
          subsequent ACK request or in the PRACK request
          <xref target="RFC3262"/> if both endpoints support reliable
          provisional responses. In any event, the active endpoint still
          establishes the DTLS association with the passive endpoint as
          negotiated in the offer/answer exchange.</t>
      </section>

      <section title="Multiple Associations">
	<t>
	  When there are multiple flows (e.g., multiple media streams, non-multiplexed
	  RTP and RTCP, etc.) the active side MAY perform the DTLS handshakes in any order.
	  Appendix B of <xref target="I-D.ietf-avt-dtls-srtp"/> provides some 
	  guidance on the performance of parallel DTLS handshakes. Note that if
	  the answerer ends up being active, it may only initiate handshakes on
	  some subset of the potential streams (e.g., if audio and video
	  are offered but it only wishes to do audio.) If the offerer ands up being
	  active, the complete answer will be received before the offerer
	  begins initiating handshakes.
	</t>
      </section>

      <section title="Session Modification">
        <t>Once an answer is provided to the offerer, either endpoint MAY
          request a session modification which MAY include an updated
          offer. This session modification can be carried in either an INVITE or
          UPDATE request. The peers can reuse the
	  the existing associations if they are compatible (i.e., they have the
	  same key fingerprints and transport parameters), or establish a new one
	  following the
	  same rules are for initial exchanges,  tearing
          down the existing association as soon as the offer/answer exchange is
          completed. Note that if the active/passive status of the endpoints
	  changes, a new connection MUST be established.</t>
      </section>

      <section title="Middlebox Interaction">
	<t>
	  There are a number of potentially bad interactions between DTLS-SRTP
	  and middleboxes, as documented in 
	  <xref target="I-D.ietf-mmusic-media-path-middleboxes"/>, which
	  also provides recommendations for avoiding such problems.
	</t>
	<section anchor="section.ice" title="ICE Interaction">
	  <t>Interactive Connectivity Establishment (ICE), as specified in <xref
	  target="I-D.ietf-mmusic-ice"/>, provides a methodology of allowing participants in
	  multi-media sessions to verify mutual connectivity. When ICE is being used
	  the ICE connectivity checks are performed before the DTLS handshake
	  begins. Note that if aggressive nomination mode is used, multiple
	  candidate pairs may be marked valid before ICE finally converges on 
	  a single candidate pair. Implementations MUST treat all ICE candidate
	  pairs associated with a single component as part of the same DTLS
	  association. Thus, there will be only one DTLS handshake even if there 
	  are multiple valid candidate pairs. Note that this may mean adjusting
	  the endpoint IP addresses if the selected candidate pair shifts, just
	  as if the DTLS packets were an ordinary media stream.
	  </t>
	  <t>Note that STUN packets are sent directly over UDP, not over DTLS. 
	  <xref target="I-D.ietf-avt-dtls-srtp"/> describes how to demultiplex
	  STUN packets from DTLS packets and SRTP packets.</t>
	  
	</section>
	<section title="Latching Control Without ICE">
	  <t>If ICE is not being used, then there is potential for a bad interaction
	  with SBCs via "latching", as described in 
	  <xref target="I-D.ietf-mmusic-media-path-middleboxes"/>. In order to avoid
	  this issue, if ICE is not being used and the DTLS handshake has not completed,
	  upon receiving the other side's SDP
	  then the passive side MUST do a
	  single unauthenticated STUN <xref target="I-D.ietf-behave-rfc3489bis"/>
	  connectivity check in order to open up the appropriate 
	  pinhole. All implementations MUST be prepared to answer this request
	  during the handshake period even if they do not otherwise do ICE.
	  However, the active side MUST proceed with the DTLS handshake
	  as appopriate even if no such STUN check is received and the
	  passive MUST NOT wait for a STUN answer before sending its 
	  ServerHello.
	  </t>
	</section>
      </section>
      <section title="Rekeying">
        <t>As with TLS, DTLS endpoints can rekey at any time by redoing the DTLS handshake. While
          the rekey is under way, the endpoints continue to use the previously established keying
	  material for usage with DTLS. Once the new session keys are established the session can
          switch to using these and abandon the old keys. This ensures that latency is not
          introduced during the rekeying process.</t>

        <t> Further considerations regarding rekeying in case the SRTP security context is
          established with DTLS can be found in Section 3.7 of <xref target="I-D.ietf-avt-dtls-srtp"
          />.</t>
      </section>

      <section title="Conference Servers and Shared Encryptions Contexts">
        <t>It has been proposed that conference servers might use the same encryption context for
          all of the participants in a conference. The advantage of this approach is that the
          conference server only needs to encrypt the output for all speakers instead of once per
          participant.</t>

        <t>This shared encryption context approach is not possible under this specification
	  because each DTLS handshake establishes fresh keys which are not completely
	  under the control of either side.
          However, it is argued that the effort to encrypt each RTP packet is small compared to the
          other tasks performed by the conference server such as the codec processing.</t>

        <t> Future extensions such as <xref target="I-D.mcgrew-srtp-ekt"/> or
	<xref target="I-D.wing-avt-dtls-srtp-key-transport"/> could be used to provide
          this functionality in concert with the mechanisms described in this specification.</t>
      </section>

      <section title="Media over SRTP">
        <t>Because DTLS's data transfer protocol is generic, it is less highly
          optimized for use with RTP than is SRTP <xref target="RFC3711"/>,
          which has been specifically tuned for that purpose. DTLS-SRTP
          <xref target="I-D.ietf-avt-dtls-srtp"/>, has been defined to provide
          for the negotiation of SRTP transport using a DTLS connection, thus
          allowing the performance benefits of SRTP with the easy key management
          of DTLS. The ability to reuse existing SRTP software and hardware
          implementations may in some environments provide another important
          motivation for using DTLS-SRTP instead of RTP over
          DTLS. Implementations of this specification MUST support DTLS-SRTP
          <xref target="I-D.ietf-avt-dtls-srtp"/>.</t>
      </section>

      <section title="Best Effort Encryption">
        <t><xref target="I-D.ietf-sip-media-security-requirements"/> describes
        a requirement for best effort encryption where SRTP is used where both
        endpoints support it and key negotiation succeeds, otherwise RTP is used. </t>
        
        <t><xref target="I-D.ietf-mmusic-sdp-capability-negotiation"/>
        describes a mechanism which can signal both RTP and SRTP as an
        alternative. This allows an offerer to express a preference for SRTP, but RTP is the
	default and will be understood by endpoints that do not understand
	SRTP or this key exchange mechanism. Implementations of this document
	MUST support <xref target="I-D.ietf-mmusic-sdp-capability-negotiation"/>.
	</t>
      </section>

    </section>

    <section anchor="section.example" title="Example Message Flow">
      <t>Prior to establishing the session, both Alice and Bob generate self-signed certificates
        which are used for a single session or, more likely, reused for multiple sessions. In this
        example, Alice calls Bob. In this example we assume that Alice and Bob share the same proxy.</t>
      
      <t>The example shows the SIP message flows where Alice acts as the passive endpoint and Bob
        acts as the active endpoint meaning that as soon as Bob receives the INVITE from Alice, with
        DTLS specified in the 'm' line of the offer, Bob will begin to negotiate a DTLS association
        with Alice for both RTP and RTCP streams. Early media (RTP and RTCP) starts to flow from Bob
        to Alice as soon as Bob sends the DTLS finished message to Alice. Bi-directional media (RTP
        and RTCP) can flow after Alice receives the SIP 200 response and once Alice has sent the DTLS
        finished message.</t>
      
      <t>The SIP signaling from Alice to her proxy is transported over TLS to ensure an integrity
        protected channel between Alice and her identity service. Transport between proxies should
	also be protected somehow, especialy if Identity is not in use.
	Note that all other signaling is
        transported over TCP in this example although it could be done over any supported transport.</t>
      <t>
        <figure>
          <artwork><![CDATA[
Alice            Proxies             Bob
  |(1) INVITE       |                  |
  |---------------->|                  |
  |                 |(2) INVITE        |
  |                 |----------------->|
  |                 |(3) hello         |
  |<-----------------------------------|
  |(4) hello        |                  |
  |----------------------------------->|
  |                 |(5) finished      |
  |<-----------------------------------|
  |                 |(6) media         |
  |<-----------------------------------|
  |(7) finished     |                  |
  |----------------------------------->|
  |                 |(8)  200 OK       |
  |<-----------------------------------|
  |                 |(9) media         |
  |----------------------------------->|
  |(10) ACK         |                  |
  |----------------------------------->|
          ]]></artwork>
        </figure>
      </t>
      <t>
        <list style="hanging">
          
          <t hangText="Message (1):">INVITE Alice ->
            Proxy<vspace blankLines="2"/> This shows the initial INVITE from
            Alice to Bob carried over the TLS transport protocol to ensure an
            integrity protected channel between Alice and her proxy which acts
            as Alice's identity service. Note that Alice has requested to be
            either the active or passive endpoint by specifying
            a=setup:actpass. Bob chooses to act as the DTLS client and will
            initiate the session. Also note that there is a fingerprint
            attribute in the SDP. This is computed from Alice's
            self-signed certificate.<vspace blankLines="2"/>
          </t>
          <t> <figure>
              <artwork><![CDATA[
INVITE sip:bob@example.com SIP/2.0
Via: SIP/2.0/TLS 192.0.2.101:5060;branch=z9hG4bK-0e53sadfkasldkfj
Max-Forwards: 70
Contact: <sip:alice@192.0.2.103:6937;transport=TLS>
To: <sip:bob@example.com>
From: "Alice"<sip:alice@example.com>;tag=843c7b0b
Call-ID: 6076913b1c39c212@REVMTEpG
CSeq: 1 INVITE
Allow: INVITE, ACK, CANCEL, OPTIONS, BYE 
Content-Type: application/sdp
Content-Length: xxxx

v=0
o=- 1181923068 1181923196 IN IP4 192.0.2.103
s=example1
c=IN IP4 192.0.2.103
a=setup:actpass
a=fingerprint: \
  SHA-1 4A:AD:B9:B1:3F:82:18:3B:54:02:12:DF:3E:5D:49:6B:19:E5:7C:AB
t=0 0
m=audio 6056 RTP/AVP 0 
a=sendrecv
a=tcap:1 UDP/TLS/RTP/SAVP RTP/AVP 
a=pcfg:1 t=1 

        ]]></artwork>
            </figure>
            <vspace blankLines="1"/></t>
          
          <t hangText="Message (2):"> INVITE Proxy -> Bob<vspace blankLines="2"/> This shows
            the INVITE being relayed to Bob from Alice (and Bob's) proxy. Note that Alice's proxy
            has inserted an Identity and Identity-Info header. This example only shows one element
            for both proxies for the purposes of simplification. Bob verifies the identity provided
            with the INVITE. Note that this offer includes a default m-line
            offering RTP in case the answerer does not support SRTP. However,
            the potential configuration utilizing a transport of SRTP is preferred. See
            <xref target="I-D.ietf-mmusic-sdp-capability-negotiation"/> for more
            details on the details of SDP capability negotiation. <vspace blankLines="2"/>
            <figure>
              <artwork><![CDATA[
INVITE sip:bob@example.com SIP/2.0
Via: SIP/2.0/TLS 192.0.2.101:5060;branch=z9hG4bK-0e53sadfkasldkfj
Via: SIP/2.0/TCP 192.0.2.100:5060;branch=z9hG4bK-0e53244234324234
Via: SIP/2.0/TCP 192.0.2.103:6937;branch=z9hG4bK-0e5b7d3edb2add32
Max-Forwards: 70
Contact: <sip:alice@192.0.2.103:6937;transport=TLS>
To: <sip:bob@example.com>
From: "Alice"<sip:alice@example.com>;tag=843c7b0b
Call-ID: 6076913b1c39c212@REVMTEpG
CSeq: 1 INVITE
Identity: CyI4+nAkHrH3ntmaxgr01TMxTmtjP7MASwliNRdupRI1vpkXRvZXx1ja9k
          3W+v1PDsy32MaqZi0M5WfEkXxbgTnPYW0jIoK8HMyY1VT7egt0kk4XrKFC
          HYWGCl0nB2sNsM9CG4hq+YJZTMaSROoMUBhikVIjnQ8ykeD6UXNOyfI=
Identity-Info: https://example.com/cert
Allow: INVITE, ACK, CANCEL, OPTIONS, BYE 
Content-Type: application/sdp
Content-Length: xxxx

v=0
o=- 1181923068 1181923196 IN IP4 192.0.2.103
s=example1
c=IN IP4 192.0.2.103
a=setup:actpass
a=fingerprint: \
  SHA-1 4A:AD:B9:B1:3F:82:18:3B:54:02:12:DF:3E:5D:49:6B:19:E5:7C:AB
t=0 0
m=audio 6056 RTP/AVP 0 
a=sendrecv
a=tcap:1 UDP/TLS/RTP/SAVP RTP/AVP 
a=pcfg:1 t=1 
        ]]></artwork>
            </figure>
            <vspace blankLines="1"/></t>
          

          <t hangText="Message (3):">ClientHello Bob -> Alice<vspace blankLines="2"/>
            Assuming that Alice's identity is valid, Line 3 shows Bob sending a DTLS ClientHello(s)
            directly to Alice. In this case two DTLS ClientHello
            messages would be sent to Alice: one to 192.0.2.103:6056 for RTP
            and another to port 6057 for RTCP, but only one arrow is drawn for
	  compactness of the figure.<vspace blankLines="1"/></t>

          
          <t hangText="Message (4):">ServerHello+Certificate Alice -> Bob<vspace
            blankLines="2"/> Alice sends back a ServerHello, Certificate, ServerHelloDone for both
            RTP and RTCP associations. Note that the same certificate is used for both the RTP and
            RTCP associations. If RTP/RTCP multiplexing <xref target="I-D.ietf-avt-rtp-and-rtcp-mux"
            /> were being used only a single association would be required.<vspace blankLines="1"/></t>
          
          <t hangText="Message (5):"> Certificate Bob -> Alice<vspace blankLines="2"/> Bob
            sends a Certificate, ClientKeyExchange, CertificateVerify, change_cipher_spec and
            Finished for both RTP and RTCP associations. Again note that Bob uses the same server
            certificate for both associations.<vspace blankLines="1"/></t>
          
          <t hangText="Message (6):">Early Media Bob -> Alice<vspace blankLines="2"/> At this
            point, Bob can begin sending early media (RTP and RTCP) to Alice. Note that Alice can't
            yet trust the media since the fingerprint has not yet been received. This lack of
            trusted, secure media is indicated to Alice via the UA user interface.<vspace blankLines="1"/></t>
          
          <t hangText="Message (7):"> Finished Alice -> Bob<vspace blankLines="2"/> After
            Message 7 is received by Bob, Alice sends change_cipher_spec and Finished.<vspace
              blankLines="1"/></t>
          
          <t hangText="Message (8):"> 200 OK Bob -> Alice<vspace blankLines="2"/> When Bob
            answers the call, Bob sends a 200 OK SIP message which contains the fingerprint for
            Bob's certificate. When Alice receives the message and validates the certificate
            presented in Message 7. The endpoint now shows Alice that the call as secured.
	    Note that
            in this case, Bob signals the actual transport protocol
            configuration of SRTP over DTLS in the acfg parameter.<vspace
              blankLines="2"/>
            <figure>
              <artwork><![CDATA[
SIP/2.0 200 OK

To: <sip:bob@example.com>;tag=6418913922105372816
From: "Alice" <sip:alice@example.com>;tag=843c7b0b
Via: SIP/2.0/TCP 192.0.2.103:6937;branch=z9hG4bK-0e5b7d3edb2add32
Call-ID: 6076913b1c39c212@REVMTEpG
CSeq: 1 INVITE
Contact: <sip:192.0.2.104:5060;transport=TCP>
Content-Type: application/sdp
Content-Length: xxxx

v=0
o=- 6418913922105372816 2105372818 IN IP4 192.0.2.104
s=example2
c=IN IP4 192.0.2.104
a=setup:active
a=fingerprint:\
  SHA-1 FF:FF:FF:B1:3F:82:18:3B:54:02:12:DF:3E:5D:49:6B:19:E5:7C:AB
t=0 0
m=audio 12000 UDP/TLS/RTP/SAVP 0
a=acfg:1 t=1

          ]]></artwork>
            </figure>
            <vspace blankLines="1"/></t>
          
          <t hangText="Message (9):"> RTP+RTCP Alice -> Bob<vspace blankLines="2"/> At this
            point, Alice can also start sending RTP and RTCP to Bob. <vspace blankLines="1"/></t>
          
          <t hangText="Message (10):">ACK Alice -> Bob<vspace blankLines="2"/> Finally, Alice
            sends the SIP ACK to Bob.</t>
        </list>
      </t>
      <t>
	In this example, the DTLS handshake has already completed by the time Alice
	receives Bob's 200 OK (8). Therefore, no STUN check is sent.
	However, if Alice had a NAT, then Bob's ClientHello
	might get blocked by that NAT, in which case Alice would send the
	the STUN check described in <xref target="section.ice"/> upon receiving
	the 200 OK, as shown below:
      </t>
        <figure>
          <artwork><![CDATA[
Alice            Proxies             Bob
  |(1) INVITE       |                  |
  |---------------->|                  |
  |                 |(2) INVITE        |
  |                 |----------------->|
  |                 |(3) hello         |
  |                 X<-----------------|
  |                 |(4)  200 OK       |
  |<-----------------------------------|
  | (5) conn-check  |                  |
  |----------------------------------->|
  |                 |(6) conn-response |
  |<-----------------------------------|
  |                 |(7) hello         |
  |<-----------------------------------|
  |(8) hello (rtx)  |                  |
  |----------------------------------->|
  |                 |(9) finished      |
  |<-----------------------------------|
  |                 |(10) media        |
  |<-----------------------------------|
  |(11) finished    |                  |
  |----------------------------------->|
  |                 |(11) media        |
  |----------------------------------->|
  |(12) ACK         |                  |
  |----------------------------------->|
          ]]></artwork>
        </figure>
	<t>
	  The messages here are the same as in the previous example, with the
	  following three new messages:
	</t>
	<t>
	  <list style="hanging">
          <t hangText="Message (5):"> STUN connectivity-check Alice -> Bob<vspace blankLines="2"/>
            <xref target="section.ice"/> describes an approach to avoid an SBC
            interaction issue where the endpoints do not support ICE. Alice (the
            passive endpoint) sends a STUN connectivity check to Bob. This opens
	    a pinhole in Alice's NAT/firewall.<vspace blankLines="1"/></t>	
          <t hangText="Message (6):"> STUN connectivity-check response Bob -> Alice<vspace blankLines="2"/>
	  Bob (the active endpoint) sends a response to the STUN
            connectivity check (Message 3) to Alice. This tells Alice that
	  her connectivity check has succeeded and she can stop the retransmit
	  state machine.<vspace blankLines="1"/></t>

          <t hangText="Message (7):"> Hello (retransmit) Bob -> Alice<vspace blankLines="2"/>
	    Bob retransmits his DTLS ClientHello which now passes through the pinhole
	    created in Alice's firewall. At this point, the DTLS handshake proceeds as
	    before.
	  <vspace blankLines="1"/></t>	
	  </list>
	</t>
	  
    </section>
    
    
    <section anchor="section.sec" title="Security Considerations">
      <t>DTLS or TLS media signalled with SIP requires a way to ensure that the communicating peers'
        certificates are correct.</t>

      <t>The standard TLS/DTLS strategy for authenticating the communicating parties is to give the
        server (and optionally the client) a PKIX <xref target="RFC3280"/> certificate. The client
        then verifies the certificate and checks that the name in the certificate matches the
        server's domain name. This works because there are a relatively small number of servers with
        well-defined names; a situation which does not usually occur in the VoIP context.</t>

      
      <t>
        The design described in this document is intended to leverage the authenticity of the
        signaling channel (while not requiring confidentiality). As long each side of the
        connection can verify the integrity of the SDP received from the other side, then
	the DTLS handshake cannot be hijacked via a man-in-the-middle attack. 
	This integrity protection is easily provided by the
        caller to the callee (see Alice to Bob in <xref target="section.example"/>) via the SIP
        Identity <xref target="RFC4474"/> mechanism. Other mechanisms, such as the 
	S/MIME mechanism described in RFC 3261, or perhaps future mechanisms yet to be defined
<!--	the mechanisms described in 
	<xref target="I-D.wing-sip-identity-media"/> or <xref target="I-D.fischer-sip-e2e-sec-media"/>,-->
	could also serve this purpose.
      </t>
      <t>
	While this mechanism can still be used without such integrity mechanisms,
	the security provided is limited to defense against passive attack by
	intermediaries. An active attack on the signaling plus an active attack
	on the media plane can allow an attacker to attack the connection (R-SIG-MEDIA
	in the notation of <xref target="I-D.ietf-sip-media-security-requirements"/>).
      </t>
      <section title="Responder Identity">
	<t>
	  SIP Identity does not support signatures in responses.
	  Ideally Alice would want to know that Bob's SDP had not been tampered with and who it was
	  from so that Alice's User Agent could indicate to Alice that there was a secure phone call
	  to Bob. <xref target="RFC4916"/> defines an approach for a UA to supply its identity to its peer
	UA and for this identity to be signed by an authentication service. For example, using
	this approach, Bob sends an answer, then immediately follows up with an UPDATE that
	includes the fingerprint and uses the SIP Identity mechanism to assert that the message is
	from Bob@example.com. The downside of this approach is that it requires the extra round
	trip of the UPDATE. However, it is simple and secure even when not all of the proxies are
	trusted. In this example, Bob only needs to trust his proxy. 
	Answerers SHOULD use this UPDATE mechanism.
	</t>
	<t>In some cases, answerers will not send an UPDATE and in many calls,
	some media will be sent before the UPDATE is received.
	In these cases, no integrity is provided for the fingerprint from Bob to Alice. In this
	approach, an attacker that was on the signaling path could tamper with the fingerprint and
	insert themselves as a man-in-the-middle on the media. Alice would know that she had a
	secure call with someone but would not know if it was with Bob or a man-in-the-middle. Bob
	would know that an attack was happening. The fact that one side can detect this attack
	means that in most cases where Alice and Bob both wish the communications to be encrypted
	there is not a problem. Keep in mind that in any of the possible approaches Bob could
	always reveal the media that was received to anyone. We are making the assumption that Bob
	also wants secure communications. In this do nothing case, Bob knows the media has not
	been tampered with or intercepted by a third party and that it is from Alice@example.com.
	Alice knows that she is talking to someone and that whoever that is has probably checked
	that the media is not being intercepted or tampered with. This approach is certainly less
	than ideal but very usable for many situations.</t>
      </section>

      <section title="SIPS">
	<t>If SIP Identity is not used, but the signaling is protected by SIPS,
	the security guarantees are weaker. Some security is still
	provided as long as all proxies are trusted. This provides integrity
	for the fingerprint in a chain-of-trust security model. Note,
	however, that if the proxies are not trusted, then the level of 
	security provided is limited.
	</t>
      </section>

      <section title="S/MIME">
	<t><xref target="RFC3261">RFC 3261</xref> defines a S/MIME security mechanism for SIP that
	could be used to sign that the fingerprint was from Bob. This would be secure. </t>
      </section>

      <section title="Continuity of Authentication" anchor="sec.continuity">
	<t> One desirable property of a secure media system is to provide continuity of
	authentication: being able to ensure cryptographically that you are talking to the same
	person as before. With DTLS, continuity of authentication is achieved by having each side
	use the same public key/self-signed certificate for each connection (at least with a given
	peer entity). It then becomes possible to cache the credential (or its hash) and verify
	that it is unchanged. Thus, once a single secure connection has been established, an
	implementation can establish a future secure channel even in the face of future insecure
	signalling. </t>
	<t> In order to enable continuity of authentication, implementations SHOULD attempt to keep
	a constant long-term key. Verifying implementations SHOULD maintain a cache of the key
	used for each peer identity and alert the user if that key changes. </t>
      </section>
      <section title="Short Authentication String">
	<t>An alternative available to Alice and Bob is to use human speech to verify each others'
	identity and then to verify each others' fingerprints also using human speech. Assuming
	that it is difficult to impersonate another's speech and seamlessly modify the audio
	contents of a call, this approach is relatively safe. It would not be effective if other
	forms of communication were being used such as video or instant messaging. DTLS supports
	this mode of operation. The minimal secure fingerprint length is around 64 bits.</t>

	<t>ZRTP <xref target="I-D.zimmermann-avt-zrtp"/> includes Short Authentication String mode
	in which a unique per-connection bitstring is generated as part of the cryptographic
	handshake. The SAS can be as short as 25 bits and so is somewhat easier to read. 
	DTLS does not natively support
	this mode. Based on the level of deployment interest a TLS
	extension <xref target="RFC3546"/> could provide support for it. Note that SAS
	schemes only work well when the endpoints recognize each other's voices,
	which is not true in many settings (e.g., call centers).
	</t>
      </section>
      <section title="Limits of Identity Assertions">
	<t>
	  When RFC 4474 is used to bind the media keying material
	  to the SIP signalling, the assurances about the provenance
	  and security of the media are only as good as those for the
	  signalling. There are two important cases to note here:
	</t>
	<t>
	  <list style="symbols">
	    <t>
	      RFC 4474 assumes that the proxy with the certificate "example.com"
	      controls the namespace "example.com".  Therefore the RFC 4474
	      authentication service which is authoritative for a given
	      namespace can control which user is assigned each name.  Thus, the
	      authentication service can take an address formerly assigned to
	      Alice and transfer it to Bob. This is an intentional design
	      feature of RFC 4474 and a direct consequence of the SIP namespace
	      architecture.
	    </t>
	    <t>
	      When phone number URIs (e.g.,
	      'sip:+17005551008@chicago.example.com' or
	      'sip:+17005551008@chicago.example.com;user=phone') are used,
	      there is no
	      structural reason to trust that the domain name is authoritative
	      for a given phone number, although individual proxies and UAs may
	      have private arrangements that allow them to trust other domains.
	      This is a structural issue in that PSTN elements are trusted to
	      assert their phone number correctly and that there is no real
	      concept of a given entity being authoritative for some number
	      space.
	    </t>
	  </list>
	</t>
	<t>
	  In both of these cases, the assurances taht DTLS-SRTP provides in
	  terms of data origin integrity and confidentiality are necessarily
	  no better than SIP provides for signalling integrity when RFC 4474
	  is used. Implementors should therefore take care not to indicate
	  misleading peer identity information in the user interface.
	  e.g. If the peer's identity is
	  sip:+17005551008@chicago.example.com, it is not sufficient to
	  display that the identity of the peer as +17005551008, unless
	  there is some policy that states that the domain "chicago.example.com"
	  is trusted to assert the E.164 numbers it is asserting. In cases
	  where the UA can determine that the peer identity is clearly an
	  E.164 number, it may be less confusing to simply identify the call
	  as encrypted but to an unknown peer.
	</t>
	<t>
	  In addition, some middleboxes (B2BUAs and Session Border
	  Controllers) are known to modify portions of the SIP message which
	  are included in the RFC 4474 signature computation, thus breaking
	  the signature.  This sort of man-in-the-middle operation is
	  precisely the sort of message modification that 4474 is intended to
	  detect.  In cases where the middlebox is itself permitted to
	  generate valid RFC 4474 signatures (e.g., it is within the same
	  administrative domain as the RFC 4474 authentication service), then
	  it may generate a new signature on the modified
	  message. Alternately, the middlebox may be able to sign with some
	  other identity that it is permitted to assert. Otherwise, the
	  recipient cannot rely on the RFC 4474 Identity assertion and
	  the UA MUST NOT indicate to the user that a secure call has
	  been established to the claimed identity. Implementations which
	  are configured to only establish secure calls SHOULD terminate
	  the call in this case.
	</t>
	<t>
	  If SIP Identity or an equivalent mechanism is not used, then 
	  only protection against attackers who cannot actively change
	  the signaling is provided. while this is still superior to
	  previous mechanisms, the security provided is inferior to that
	  provided if integrity is provided for the signaling.
	</t>
      </section>
      <section title="Perfect Forward Secrecy">
        <t> One concern about the use of a long-term key is that compromise of that key may lead to
          compromise of past communications. In order to prevent this attack, DTLS supports modes
          with Perfect Forward Secrecy using Diffie-Hellman and Elliptic-Curve Diffie-Hellman cipher
          suites. When these modes are in use, the system is secure against such attacks. Note that
          compromise of a long-term key may still lead to future active attacks. If this is a
          concern, a backup authentication channel such as manual fingerprint establishment or a
          short authentication string should be used. </t>
      </section>
    </section>

    <section title="IANA Considerations">
      <t>This specification does not require any IANA actions.</t>
    </section>

    <section title="Acknowledgments">
      <t>Cullen Jennings contributed substantial text and comments to this
        document. This document benefited from discussions with Francois Audet,
        Nagendra Modadugu, and Dan Wing. Thanks also for useful comments by
        Flemming Andreasen, Jonathan Rosenberg, Rohan Mahy, David McGrew, Miguel
        Garcia, Steffen Fries, Brian Stucker, Robert Gilman, David Oran, and Peter Schneider.</t>
      <t>We would like to thank Thomas Belling, 
        Guenther Horn, Steffen Fries, Brian Stucker, Francois Audet, Dan Wing, Jari Arkko, and 
        Vesa Lehtovirta for their input regarding traversal of SBCs.</t>
    </section>
  </middle>

  <back>
    <references title="Normative References"> 
      &rfc2119; &rfc3261; &rfc3264; &rfc3280; &rfc3323;  &rfc3550; &rfc4145;
      &rfc4347; &rfc4474; &rfc4566; &rfc4572;
      &I-D.ietf-behave-rfc3489bis;
    </references>

    <references title="Informational References"> &rfc4571; &rfc3325; 
      &I-D.ietf-mmusic-ice; &rfc4567; &rfc4568;
      &I-D.zimmermann-avt-zrtp;
      &I-D.mcgrew-srtp-ekt; &I-D.ietf-avt-dtls-srtp;
      &I-D.ietf-sip-media-security-requirements;
      &I-D.ietf-mmusic-sdp-capability-negotiation; &I-D.ietf-avt-rtp-and-rtcp-mux;
      &rfc3262; &rfc3546; &rfc4916; <!--      &rfc3548; --> &rfc3711;
      &rfc3830;  &I-D.wing-sipping-srtp-key; 
      &I-D.wing-avt-dtls-srtp-key-transport;
      &I-D.ietf-mmusic-media-path-middleboxes; 
<!--      &I-D.fischer-sip-e2e-sec-media;
      &I-D.wing-sip-identity-media;-->
      &I-D.ietf-sip-ua-privacy;
</references>


    <section title="Requirements Analysis">
      <t>
        <xref target="I-D.ietf-sip-media-security-requirements"/> describes security requirements
        for media keying. This section evaluates this proposal with respect to each requirement. </t>

      <section title="Forking and retargeting (R-FORK-RETARGET, R-BEST-SECURE, R-DISTINCT)">
        <t>In this draft, the SDP offer (in the INVITE) is simply an advertisement of the capability
          to do security. This advertisement does not depend on the identity of the communicating
          peer, so forking and retargeting work work when all the endpoints will do SRTP. When a mix
          of SRTP and non-SRTP endpoints are present, we use the SDP capabilities
          mechanism currently being defined <xref
            target="I-D.ietf-mmusic-sdp-capability-negotiation"/> to transparently negotiate
          security where possible. Because DTLS establishes a new key for each session, only the
          entity with which the call is finally established gets the media encryption keys (R3).
        </t>
      </section>

      <section title="Distinct Cryptographic Contexts (R-DISTINCT)">
        <t> DTLS performs a new DTLS handshake with each endpoint, which establishes
	distinct keys and cryptographic contexts for each endpoint.</t>
      </section>        

      <section title="Reusage of a Security Context (R-REUSE)">
        <t>DTLS allows sessions to be resumed with the 'TLS session resumption' 
	functionality. This
          feature can be used to lower the amount of cryptographic computation that needs to be done
          when two peers re-initiates the communication. 
	See <xref target="I-D.ietf-avt-dtls-srtp"/> for more on session resumption
	in this context.</t>
      </section>

      <section title="Clipping (R-AVOID-CLIPPING)">
        <t> Because the key establishment occurs in the media plane, media need not be clipped
          before the receipt of the SDP answer. Note, however, that only confidentiality
	  is provided until the offerer receives the answer: the answerer knows that they
	  are not sending data to an attacker but the offerer cannot know that they are 
	  receiving data from the answerer.
	</t>
      </section>

       <section title="Passive Attacks on the Media Path (R-PASS-MEDIA)">
        <t>The public key algorithms used by DTLS ciphersuites, such as RSA, Diffie-Hellman, and
          Elliptic Curve Diffie-Hellman, are secure against passive attacks. </t>
      </section>
      <section title="Passive Attacks on the Signaling Path (R-PASS-SIG)">
        <t>DTLS provides protection against passive attacks by adversaries on the signaling path
          since only a fingerprint is exchanged using SIP signaling.</t>
      </section>

      <section title="(R-SIG-MEDIA, R-ACT-ACT)">
        <t>
	An attacker who controls the media channel but not the
	signalling channel can perform
	a MITM attack on the DTLS handshake but this will
	change the certificates which will cause the fingerprint
	check to fail. Thus, any successful attack requires
	that the attacker modify the signalling messages
	to replace the fingerprints.
	</t>
	<t>
	If RFC 4474 Identity or an equivalent mechanism is
	used, a attacker who controls the signalling channel at 
	any point between the proxies performing the Identity
	signatures cannot modify the fingerprints without
	invalidating the signature. Thus, even an
	attacker who controls both signalling and media paths
	cannot successfully attack the media traffic.
	Note that the channel between the UA and the 
	authentication service MUST be secured and the
	authentication service MUST verify the UA's identity
	in order for this mechanism to be secure.
	</t>
	<t>
	Note that an attacker who controls the authentication
	service can impersonate the UA using that authentication
	service. This is an intended feature of SIP Identity--the
	authentication service owns the namespace and therefore
	defines which user has which identity.
	</t>
      </section>
      
      <section title="Binding to Identifiers (R-ID-BINDING)">
	<t> When an end-to-end mechanism such as
	SIP-Identity
	<xref target="RFC4474"/> and SIP-Connected-Identity
	<xref target="RFC4916"/> or S/MIME are used, they bind the
	endpoint's certificate fingerprints to the From: address
	in the signalling. The fingerprint is
	covered by the Identity signature. When other
	mechanisms (e.g., SIPS) are used, then the binding
	is correspondingly weaker.</t>
      </section>

      

      <section title="Perfect Forward Secrecy (R-PFS)">
        <t> DTLS supports Diffie-Hellman and Elliptic Curve Diffie-Hellman cipher suites which
          provide PFS. </t>
      </section>

      <section title="Algorithm Negotiation (R-COMPUTE)">
        <t> DTLS negotiates cipher suites before performing significant cryptographic computation
          and therefore supports algorithm negotiation and multiple cipher suites without additional
          computational expense. </t>
      </section>

      <section title="RTP Validity Check (R-RTP-VALID)">
        <t>DTLS packets do not pass the RTP validity check. The first
	byte of a DTLS packet is the content type and All current
	DTLS content types have the first two bits set to zero,
	resulting in a version of 0, thus failing the 
	first validity check. DTLS packets can also be distinguished from
	STUN packets. See <xref target="I-D.ietf-avt-dtls-srtp"/> for details
	on demultiplexing.</t>
      </section>

      <!-- 
      <section title="Endpoint Idenfification When Forking (R10)">
        <t> Once the SDP response is received, the implementation can match the fingerprint against
          the offered client Certificate message. Note, however, that if the server is using
          ephemeral DH or ECDH, it still must compute a fresh DH share and sign it in the
          ServerKeyExchange. This could be optimized away by having a DTLS ClientHello extension in
          which the client provide a copy of its fingerprint in advance. </t>
      </section>
      -->

      <section title="3rd Party Certificates (R-CERTS, R-EXISTING)">
        <t> Third party certificates are not required because signalling
	  (e.g., <xref target="RFC4474"/>) is used to authenticate
	  the certificates used by DTLS. However, if the parties share an
          authentication infrastructure that is compatible with TLS (3rd party certificates or
          shared keys) it can be used. </t>
      </section>

      <section title="FIPS 140-2 (R-FIPS)">
        <t> TLS implementations already may be FIPS 140-2 approved and the algorithms used here are
          consistent with the approval of DTLS and DTLS-SRTP. </t>
      </section>

      <section title="Linkage between Keying Exchange and SIP Signaling (R-ASSOC)">
        <t>The signaling exchange is linked to the key management exchange using the fingerprints
          carried in SIP and the certificates are exchanged in DTLS.</t>
      </section>

      <section title="Denial of Service Vulnerability (R-DOS)">
        <t>DTLS offers some degree of DoS protection as a built-in feature 
	(see Section 4.2.1 or RFC 4347).</t>
      </section>

      <section title="Crypto-Agility (R-AGILITY)">
        <t>DTLS allows ciphersuites to be negotiated and hence new algorithms can be incrementally
          deployed. Work on replacing the fixed MD5/SHA-1 key derivation function is ongoing.</t>
      </section>

      <section title="Downgrading Protection (R-DOWNGRADE)">
        <t>DTLS provides protection against downgrading attacks since the selection of the offered
          ciphersuites is confirmed in a later stage of the handshake. This protection is efficient
          unless an adversary is able to break a ciphersuite in real-time.
	  RFC 4474 is able to
	  prevent an active attacker on the signalling path from downgrading the call
	from SRTP to RTP.</t>
      </section>

      <section title="Media Security Negotation (R-NEGOTIATE)">
        <t>DTLS allows a User Agent to negotiate media security parameters for each individual
          session.</t>
      </section>

      <section title="Signaling Protocol Independence (R-OTHER-SIGNALING)">
        <t>The DTLS-SRTP framework does not rely on SIP; every protocol that is capable of
          exchanging a fingerprint and the media description can be secured.</t>
      </section>

      <section title="Media Recording (R-RECORDING)">
        <t>An extension, see <xref target="I-D.wing-sipping-srtp-key"/>, has been specified to
          support media recording that does not require intermediaries to act as a MITM.</t>
        <t>When media recording is done by intermediaries then they need to act as a MITM.</t>
      </section>

      <section title="Interworking with Intermediaries (R-TRANSCODER)">
        <t>In order to interface with any intermediary that transcodes the media, the
	transcoder must have access to the keying material and be treated as an 
	endpoint for the purposes of this document.
	</t>
      </section>

      <section title="PSTN Gateway Termination (R-PSTN)">
        <t> The DTLS-SRTP framework allows the media security to terminate at a PSTN gateway. This does not provide end-to-end security, but is consistent with the
security goals of this framework because the gateway is authorized to speak
for the PSTN namespace. </t>
      </section>

      <section title="R-ALLOW-RTP">
	<t>DTLS-SRTP allows RTP media to be received by the calling
	party until SRTP has been negotiated with the answerer, after which SRTP
	is preferred over RTP.
	</t>
      </section>
    
      <section title="R-HERFP">
	<t>
	  The Heterogeneous Error Response Forking Problem (HERFP) is not
	  applicable to DTLS-SRTP since the key exchange protocol will be executed
	  along the media path and hence error messages are communicated along
	  this path and proxies do not need to progress them.
	</t>
      </section>
      </section>
  </back>
</rfc>

<!-- Keep this comment at the end of the file
Local variables:
mode: xml
sgml-omittag:nil
sgml-shorttag:nil
sgml-namecase-general:nil
sgml-general-insert-case:lower
sgml-minimize-attributes:nil
sgml-always-quote-attributes:t
sgml-indent-step:2
sgml-indent-data:nil
sgml-parent-document:nil
sgml-exposed-tags:nil
sgml-local-catalogs:nil
sgml-local-ecat-files:nil
End:
-->
PAFTECH AB 2003-2026
2026-04-23 10:10:12