One document matched: draft-ietf-sip-media-security-requirements-04.xml
<?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC3261 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3261.xml">
<!ENTITY RFC3262 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3262.xml">
<!ENTITY RFC3264 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3264.xml">
<!ENTITY RFC3711 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3711.xml">
<!ENTITY RFC5027 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5027.xml">
<!ENTITY RFC3550 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml">
<!ENTITY RFC3372 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3372.xml">
<!ENTITY I-D.ietf-mmusic-ice SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mmusic-ice.xml">
<!ENTITY I-D.stucker-sipping-early-media-coping SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.stucker-sipping-early-media-coping.xml">
<!ENTITY RFC4474 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4474.xml">
<!ENTITY I-D.wing-sipping-srtp-key SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.wing-sipping-srtp-key.xml">
<!ENTITY rfc4568 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4568.xml">
<!ENTITY rfc4650 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4650.xml">
<!ENTITY I-D.ietf-msec-mikey-ecc SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-msec-mikey-ecc.xml">
<!ENTITY rfc4738 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4738.xml">
<!ENTITY RFC4949 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4949.xml">
<!ENTITY I-D.ietf-sip-certs SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-sip-certs.xml">
<!ENTITY I-D.mahy-sipping-herfp-fix SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.mahy-sipping-herfp-fix.xml">
<!ENTITY rfc3830 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3830.xml">
<!ENTITY rfc4492 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4492.xml">
<!ENTITY rfc3388 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3388.xml">
<!ENTITY rfc4346 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4346.xml">
<!ENTITY rfc4916 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4916.xml">
<!ENTITY I-D.fischl-sipping-media-dtls SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.fischl-sipping-media-dtls.xml">
<!ENTITY I-D.ietf-msec-mikey-applicability SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-msec-mikey-applicability.xml">
<!ENTITY I-D.zimmermann-avt-zrtp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.zimmermann-avt-zrtp.xml">
<!ENTITY I-D.baugher-mmusic-sdp-dh SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.baugher-mmusic-sdp-dh.xml">
<!ENTITY I-D.mcgrew-srtp-ekt SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.mcgrew-srtp-ekt.xml">
<!ENTITY I-D.ietf-mmusic-media-path-middleboxes SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mmusic-media-path-middleboxes.xml">
<!ENTITY rfc4771 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4771.xml">
<!ENTITY I-D.jennings-sipping-multipart SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.jennings-sipping-multipart.xml">
<!ENTITY I-D.ietf-avt-dtls-srtp SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-avt-dtls-srtp.xml">
<!ENTITY I-D.dondeti-msec-rtpsec-mikeyv2 SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.dondeti-msec-rtpsec-mikeyv2.xml">
<!ENTITY I-D.ietf-mmusic-sdp-capability-negotiation SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-mmusic-sdp-capability-negotiation.xml">
]>
<?rfc toc="yes" ?>
<?rfc symrefs="yes" ?>
<?rfc sortrefs="yes"?>
<?rfc iprnotified="no" ?>
<?rfc strict="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<?rfc rfcprocack="yes"?>
<rfc category="info" docName="draft-ietf-sip-media-security-requirements-04"
ipr="full3978">
<front>
<title abbrev="Media Security Requirements">Requirements and Analysis of
Media Security Management Protocols</title>
<author fullname="Dan Wing" initials="D." role="editor" surname="Wing">
<organization abbrev="Cisco">Cisco Systems, Inc.</organization>
<address>
<postal>
<street>170 West Tasman Drive</street>
<city>San Jose</city>
<region>CA</region>
<code>95134</code>
<country>USA</country>
</postal>
<email>dwing@cisco.com</email>
</address>
</author>
<author fullname="Steffen Fries" initials="S." surname="Fries">
<organization>Siemens AG</organization>
<address>
<postal>
<street>Otto-Hahn-Ring 6</street>
<city>Munich</city>
<region>Bavaria</region>
<code>81739</code>
<country>Germany</country>
</postal>
<email>steffen.fries@siemens.com</email>
</address>
</author>
<author fullname="Hannes Tschofenig" initials="H" surname="Tschofenig">
<organization>Nokia Siemens Networks</organization>
<address>
<postal>
<street>Otto-Hahn-Ring 6</street>
<city>Munich</city>
<region>Bavaria</region>
<code>81739</code>
<country>Germany</country>
</postal>
<email>Hannes.Tschofenig@nsn.com</email>
<uri>http://www.tschofenig.com</uri>
</address>
</author>
<author fullname="Francois Audet" initials="F." surname="Audet">
<organization>Nortel</organization>
<address>
<postal>
<street>4655 Great America Parkway</street>
<city>Santa Clara</city>
<region>CA</region>
<code>95054</code>
<country>USA</country>
</postal>
<email>audet@nortel.com</email>
</address>
</author>
<date year="2008" />
<area>RAI</area>
<workgroup>SIP Working Group</workgroup>
<keyword>keying</keyword>
<keyword>Secure RTP</keyword>
<keyword>SRTP</keyword>
<abstract>
<t>This document describes requirements for a protocol to negotiate a
security context for SIP-signaled SRTP media. In addition to the natural
security requirements, this negotiation protocol must interoperate well
with SIP in certain ways. A number of proposals have been published and
a summary of these proposals is in the appendix of this document.</t>
</abstract>
</front>
<middle>
<section title="Introduction">
<t>The work on media security started when the Session Initiation
Protocol (SIP) was still in its infancy. With the increased SIP
deployment and the availability of new SIP extensions and related
protocols, the need for end-to-end security was re-evaluated. The
procedure of re-evaluating prior protocol work and design decisions is
not an uncommon strategy and, to some extent, considered necessary to
ensure that the developed protocols indeed meet the previously
envisioned needs for the users on the Internet.</t>
<t>This document summarizes media security requirements, i.e.,
requirements for mechanisms that negotiate security context such as
cryptographic keys and parameters for SRTP.</t>
<t>The organization of this document is as follows: <xref
target="terminology"></xref> introduces terminology, <xref
target="attack_scenarios"></xref> describes various attack scenarios
against the signaling path and media path, <xref
target="scenarios"></xref> provides an overview about possible call
scenarios, <xref target="requirements"></xref> lists requirements for
media security. The main part of the document concludes with the
security considerations <xref target="security"></xref>, IANA
considerations <xref target="iana"></xref> and an acknowledgement
section in <xref target="acks"></xref>. <xref
target="comparison"></xref> lists and compares available solution
proposals. The following <xref target="eval-sip"></xref> compares the
different approaches regarding their suitability for the SIP signaling
scenarios described in <xref target="comparison"></xref>, while <xref
target="eval-sec"></xref> provides a comparison regarding security
aspects. <xref target="ofs"></xref> lists non-goals for this
document.</t>
</section>
<section anchor="terminology" title="Terminology">
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in <xref
target="RFC2119"></xref>, with the important qualification that, unless
otherwise stated, these terms apply to the design of the media security
key management protocol, not its implementation or application.</t>
<t>Additionally, the following items are used in this document:</t>
<t><list style="hanging">
<t hangText="AOR (Address-of-Record): ">A SIP or SIPS URI that
points to a domain with a location service that can map the URI to
another URI where the user might be available. Typically, the
location service is populated through registrations. An AOR is
frequently thought of as the "public address" of the user.</t>
<t hangText="SSRC:">The 32-bit value that defines the
synchronization source, used in RTP. These are generally unique, but
collisions can occur.</t>
<t hangText="two-time pad:">The use of the same key and the same
keystream to encrypt different data. For SRTP, a two-time pad occurs
if two senders are using the same key and the same RTP SSRC
value.</t>
<t hangText="Perfect Forward Secrecy (PFS):">The property that
disclosure of the long-term secret keying material that is used to
derive an agreed ephemeral key does not compromise the secrecy of
agreed keys from earlier runs.</t>
<t hangText="active adversary:">An active adversary is able to alter
data communication to affect its operation (see also <xref
target="RFC4949"></xref>).</t>
<t hangText="passive adversary:">A passive adversary is able to
learn information from data communication, but not alter that data
communication (see also<xref target="RFC4949"></xref>).</t>
<t hangText="signaling path:">The signaling path is the route taken
by SIP signaling messages transmitted between the calling and called
user agents. This can be either direct signaling between the calling
and called user agents or, more commonly involves the SIP proxy
servers that were involved in the call setup.</t>
<t hangText="media path:">The media path is the route taken by media
packets exchanged by the endpoints. In the simplest case, the
endpoints exchange media directly, and the “media path”
is defined by a quartet of IP addresses and TCP/UDP ports, along
with an IP route. In other cases, this path may include RTP relays,
mixers, transcoders, session border controllers, NATs, or media
gateways.</t>
</list></t>
</section>
<section anchor="attack_scenarios" title="Attack Scenarios">
<t>The discussion in this section relates to requirements R-PASS-MEDIA,
R-PASS-SIG, R-ASSOC, R-SIG-MEDIA, R-ACT-ACT, and R-ID-BINDING.</t>
<t>This document classifies adversaries according to their access and
their capabilities. An adversary might have access:<list style="numbers">
<t hangText="(1)">only to the media path,</t>
<t hangText="(2)">only to the signaling path,</t>
<t hangText="(3)">to the media path and to the signaling path.</t>
</list></t>
<t>An attacker that can solely be located along the signaling path, and
does not have access to media (item 2), is not considered in this
document.</t>
<t>There are two different types of adversaries, active and passive. An
active adversary may need to be active with regard to the key exchange
relevant information traveling along the media path or traveling along
the signaling path.</t>
<t>Based on their robustness against the adversary capabilities
described above, we can group security mechanisms using the following
labels. This list is generally ordered from easiest to compromise (at
the top) to more difficult to compromise:</t>
<texttable>
<ttcol align="center">SIP signaling</ttcol>
<ttcol align="center">media</ttcol>
<ttcol align="center">abbreviation</ttcol>
<c>none</c>
<c>passive</c>
<c>no-signaling-passive-media</c>
<c>none</c>
<c>active</c>
<c>no-signaling-active-media</c>
<c>passive</c>
<c>passive</c>
<c>passive-signaling-passive-media</c>
<c>passive</c>
<c>active</c>
<c>passive-signaling-active-media</c>
<c>active</c>
<c>passive</c>
<c>active-signaling-passive-media</c>
<c>active</c>
<c>active</c>
<c>active-signaling-active-media</c>
<c>active</c>
<c>active</c>
<c>active-signaling-active-media-detect</c>
</texttable>
<t><list style="hanging">
<t hangText="no-signaling-passive-media:"><vspace blankLines="0" />
Access to only the media path is sufficient to reveal the content of
the media traffic.</t>
<t hangText="passive-signaling-passive-media:"><vspace
blankLines="0" />Passive attack on the signaling and passive attack
on the media path is necessary to reveal the content of the media
traffic.</t>
<t hangText="passive-signaling-active-media:"><vspace
blankLines="0" /> Passive attack on the signaling and active attack
on the media path is necessary to reveal the content of the media
traffic.</t>
<t hangText="active-signaling-passive-media:"><vspace
blankLines="0" />Active attack on the signaling path and passive
attack on the media path is necessary to reveal the content of the
media traffic.</t>
<t hangText="no-signaling-active-media:"><vspace
blankLines="0" />Active attack on the media path is sufficient to
reveal the content of the media traffic.</t>
<t hangText="active-signaling-active-media:"><vspace
blankLines="0" />Active attack on both the signaling path and the
media path is necessary to reveal the content of the media
traffic.</t>
<t hangText="active-signaling-active-media-detect:"><vspace
blankLines="0" />Active attack on both signaling and media path is
necessary to reveal the content of the media traffic (as with
active-signaling-active-media), and the attack is detectable by
protocol messages exchanged between the end points.</t>
</list></t>
<t>For example, unencrypted RTP is vulnerable to
no-signaling-passive-media.</t>
<t>As another example, <xref target="RFC4568">Security
Descriptions</xref>, when protected by TLS (as it is commonly
implemented and deployed), belongs in the
passive-signaling-passive-media category since the adversary needs to
learn the Security Descriptions key by seeing the SIP signaling message
at a SIP proxy (assuming that the adversary is in control of the SIP
proxy). The media traffic can be decrypted using that learned key.</t>
<t>As another example, DTLS-SRTP falls into
active-signaling-active-media category when DTLS-SRTP is used with a
public key based ciphersuite with self-signed certificates and without
<xref target="RFC4474">SIP-Identity</xref>. An adversary would have to
modify the fingerprint that is sent along the signaling path and
subsequently to modify the certificates carried in the DTLS handshake
that travel along the media path. If DTLS-SRTP is used with both <xref
target="RFC4474">SIP Identity</xref> and <xref target="RFC4916">SIP
Connected Identity</xref>, the RFC4474 signature protects both the offer
and the answer, and such a system would then belong to the
active-signaling-active-attack-detect category (provided, of course, the
signaling path to the RFC4474 authenticator and verifier is secured as
per RFC4474 and the RFC4474 authenticator and verifier are behaving as
per RFC4474).</t>
<t>The above discussion of DTLS-SRTP demonstrates how a single security
protocol can be in different classes depending on the mode in which it
is operated. Other protocols can achieve similar effect by adding
functions outside of the on-the-wire key management protocol itself.
Although it may be appropriate to deploy lower-classed mechanisms in
some cases, the ultimate security requirement for a media security
negotiation protocol is that it have a mode of operation available in
which it is detect-attack, which provides protection against the passive
and active attacks and provides detection of such attacks. That is,
there must be a way to use the protocol so that an active attack is
required against both the signaling and media paths, and so that such
attacks are detectable by the endpoints.</t>
</section>
<section anchor="scenarios" title="Call Scenarios">
<t>The following subsections describe call scenarios that pose the most
challenge to the key management system for media data in cooperation
with SIP signaling.</t>
<!-- ====================================================================== -->
<section anchor="clipping"
title="Clipping Media Before Signaling Answer">
<t>The discussion in this section relates to requirement
R-AVOID-CLIPPING.</t>
<t>Per the SDP Offer/Answer Model <xref target="RFC3264"></xref>,</t>
<t><list>
<t>"Once the offerer has sent the offer, it MUST be prepared to
receive media for any recvonly streams described by that offer. It
MUST be prepared to send and receive media for any sendrecv
streams in the offer, and send media for any sendonly streams in
the offer (of course, it cannot actually send until the peer
provides an answer with the needed address and port
information)."</t>
</list></t>
<t>To meet this requirement with SRTP, the offerer needs to know the
SRTP key for arriving media. If either endpoint receives encrypted
media before it has access to the associated SRTP key, it cannot play
the media -- causing clipping.</t>
<t>For key exchange mechanisms that send the answerer's key in SDP, a
SIP provisional response <xref target="RFC3261"></xref>, such as 183
(session progress), is useful. However, the 183 messages are not
reliable unless both the calling and called end point support PRACK
<xref target="RFC3262"></xref>, use TCP across all SIP proxies,
implement Security Preconditions <xref target="RFC5027"></xref>, or
the both ends implement ICE <xref target="I-D.ietf-mmusic-ice"></xref>
and the answerer implements the reliable provisional response
mechanism described in ICE. Unfortunately, there is not wide
deployment of any of these techniques and there is industry reluctance
to require these techniques to avoid the problems described in this
section.</t>
<t>Note that the receipt of an SDP answer is not always sufficient to
allow media to be played to the offerer. Sometimes, the offerer must
send media in order to open up firewall holes or NAT bindings before
media can be received (for details see <xref
target="I-D.ietf-mmusic-media-path-middleboxes"></xref>). In this
case, even a solution that makes the key available before the SDP
answer arrives will not help.<!-- Here additional measures as
using ICE may provide a solution space. --></t>
<t>Fixes to early media (i.e., the media that arrives at the SDP
offerer before the SDP answer arrives) might make the requirements to
become obsolete, but at the time of writing no progress has been
accomplished.</t>
</section>
<!-- === -->
<section anchor="forking" title="Retargeting and Forking">
<t>The discussion in this section relates to requirements
R-FORK-RETARGET, R-DISTINCT, R-HERFP, and R-BEST-SECURE.</t>
<t>In SIP, a request sent to a specific AOR but delivered to a
different AOR is called a "retarget". A typical scenario is a "call
forwarding" feature. In <xref target="retargeting_figure"></xref>
Alice sends an INVITE in step 1 that is sent to Bob in step 2. Bob
responds with a redirect (SIP response code 3xx) pointing to Carol in
step 3. This redirect typically does not propagate back to Alice but
only goes to a proxy (i.e., the retargeting proxy) that sends the
original INVITE to Carol in step 4.</t>
<t><figure anchor="retargeting_figure" title="Retargeting">
<artwork align="center"><![CDATA[
+-----+
|Alice|
+--+--+
|
| INVITE (1)
V
+----+----+
| proxy |
++-+-----++
| ^ |
INVITE (2) | | | INVITE (4)
& redirect (3) | | |
V | V
++-++ ++----+
|Bob| |Carol|
+---+ +-----+
]]></artwork>
</figure></t>
<t>Using retargeting might lead to situations where the UAC does not
know where its request will be going. This might not immediately seem
like a serious problem; after all, when one places a telephone call on
the PSTN, one never really knows if it will be forwarded to a
different number, who will pick up the line when it rings, and so on.
However, when considering SIP mechanisms for authenticating the called
party, this function can also make it difficult to differentiate an
intermediary that is behaving legitimately from an attacker. From this
perspective, the main problems with retargeting ares:</t>
<t><list style="hanging">
<t hangText="Not detectable by the caller: ">The originating user
agent has no means of anticipating that the condition will arise,
nor any means of determining that it has occurred until the call
has already been set up.</t>
<t hangText="Not preventable by the caller:">There is no existing
mechanism that might be employed by the originating user agent in
order to guarantee that the call will not be re-targeted.</t>
</list></t>
<t>The mechanism used by SIP for identifying the calling party is SIP
Identity <xref target="RFC4474"></xref>. However, due to the nature of
retargeting SIP Identity can only identify the calling party (that is,
the party that initiated the SIP request). Some key exchange
mechanisms predate SIP Identity and include their own identity
mechanism (e.g., MIKEY). However, those built-in identity mechanism
also suffer from the SIP retargeting problem. While <xref
target="RFC4916">Connected Identity</xref> allows positive
identification of the called party, the primary difficulty still
remains that the calling party does not know if a mismatched called
party is legitimate (i.e., due to authorized retargeting) or
illegitimate (i.e., due to unauthorized retargeting by an attacker
above to modify SIP signaling).</t>
<t>In SIP, 'forking' is the delivery of a request to multiple
locations. This happens when a single AOR is registered more than
once. An example of forking is when a user has a desk phone, PC
client, and mobile handset all registered with the same AOR.</t>
<t><figure anchor="forking_figure" title="Forking">
<artwork align="center"><![CDATA[
+-----+
|Alice|
+--+--+
|
| INVITE
V
+-----+-----+
| proxy |
++---------++
| |
INVITE | | INVITE
V V
+--+--+ +--+--+
|Bob-1| |Bob-2|
+-----+ +-----+
]]></artwork>
</figure></t>
<t>With forking, both Bob-1 and Bob-2 might send back SDP answers in
SIP responses. Alice will see those intermediate (18x) and final (200)
responses. It is useful for Alice to be able to associate the SIP
response with the incoming media stream. Although this association can
be done with ICE <xref target="I-D.ietf-mmusic-ice"></xref>, and ICE
is useful to make this association with RTP, it is not desirable to
require ICE to accomplish this association.</t>
<t>Forking and retargeting are often used together. For example, a
boss and secretary might have both phones ring (forking) and rollover
to voice mail if neither phone is answered (retargeting).</t>
<t>To maintain security of the media traffic, only the end point that
answers the call should know the SRTP keys for the session. Forked and
re-targeted calls only reveal sensitive information to non-responders
when the signaling messages contain sensitive information (e.g., SRTP
keys) that is accessible by parties that receive the offer, but may
not respond (i.e., the original recipients in a retargeted call, or
non-answering endpoints in a forked call). For key exchange mechanisms
that do not provide secure forking or secure retargeting, one
workaround is to re-key immediately after forking or retargeting.
However, because the originator may not be aware that the call forked
this mechanism requires rekeying immediately after every session is
established. This doubles the number of messages processed by the
network.</t>
<t>Further compounding this problem is a unique feature of SIP that
when forking is used, there is always only one final error response
delivered to the sender of the request: the forking proxy is
responsible for choosing which final response to choose in the event
where forking results in multiple final error responses being received
by the forking proxy. This means that if a request is rejected, say
with information that the keying information was rejected and
providing the far end's credentials, it is very possible that the
rejection will never reach the sender. This problem, called the <xref
target="I-D.mahy-sipping-herfp-fix">Heterogeneous Error Response
Forking Problem (HERFP)</xref>, is difficult to solve in SIP. Because
we expect the HERFP to continue to be a problem in SIP for the
foreseeable future, a media security system should function even in
the presence of HERFP behavior.</t>
</section>
<!--
<section anchor="ICE4association" title="Using ICE to Associate Media and Signaling">
<t>In the absence of a technique in the key exchange to associate SIP signaling with the
media, ICE may be used. This technique does not need an external STUN server or external
TURN server; rather, what is used are ICE connectivity checks:</t>
<t>
<list style="symbols">
<t>The offer has at least one a=candidate line, matching the m/c lines</t>
<t>The answerer has to minimally support the new 'lite' mode of ICE. This means the
answerer's SDP also has an a=candidate line that matches its m/c lines. In ICE's
'lite' mode, the answerer only responds to STUN Binding Requests.</t>
<t>There are two ways the offerer will notice forking occurred:</t>
<list style="symbols">
<t>media (RTP or SRTP) arrives from different transport addresses</t>
<t>STUN connectivity checks with different STUN usernames arrive from different
transport addresses</t>
<t>multiple answers arrive in SIP signaling</t>
</list>
<t>When the offerer notices forking occurred, and the offerer needs to associate an SDP
answer with the media path, the offerer can send a STUN Binding Request to the address
specified in the SDP and perform ICE triggered checks, as specified by ICE. This
allows correlating the media path with the endpoint that generated the SDP answer.</t>
</list>
</t>
<t>[Editor's Note: Even though this describes a possible solution in a requirements
document, we listed it for further comments.]</t>
</section>
-->
<!-- === -->
<section anchor="conferencing" title="Shared Key Conferencing">
<t>The consensus on the RTPSEC mailing list was to concentrate on
unicast, point-to-point sessions. Thus, there are no requirements
related to shared key conferencing. This section is retained for
informational purposes.</t>
<t>For efficient scaling, large audio and video conference bridges
operate most efficiently by encrypting the current speaker once and
distributing that stream to the conference attendees. Typically,
inactive participants receive the same streams -- they hear (or see)
the active speaker(s), and the active speakers receive distinct
streams that don't include themselves. In order to maintain
confidentiality of such conferences where listeners share a common
key, all listeners must rekeyed when a listener joins or leaves a
conference.</t>
<t>An important use case for mixers/translators is a conference
bridge:</t>
<t><figure anchor="figure_centralized_keying"
title="Centralized Keying">
<artwork align="center"><![CDATA[
+----+
A --- 1 --->| |
<-- 2 ----| M |
| I |
B --- 3 --->| X |
<-- 4 ----| E |
| R |
C --- 5 --->| |
<-- 6 ----| |
+----+
]]></artwork>
</figure></t>
<t>In the figure above, 1, 3, and 5 are RTP media contributions from
Alice, Bob, and Carol, and 2, 4, and 6 are the RTP flows to those
devices carrying the 'mixed' media.</t>
<t>Several scenarios are possible:</t>
<t><list style="letters">
<t>Multiple inbound sessions: 1, 3, and 5 are distinct RTP
sessions,</t>
<t>Multiple outbound sessions: 2, 4, and 6 are distinct RTP
sessions,</t>
<t>Single inbound session: 1, 3, and 5 are just different sources
within the same RTP session,</t>
<t>Single outbound session: 2, 4, and 6 are different flows of the
same (multi-unicast) RTP session</t>
</list></t>
<t>If there are multiple inbound sessions and multiple outbound
sessions (scenarios a and b), then every keying mechanism behaves as
if the mixer were an end point and can set up a point-to-point secure
session between the participant and the mixer. This is the simplest
situation, but is computationally wasteful, since SRTP processing has
to be done independently for each participant. The use of multiple
inbound sessions (scenario a) doesn't waste computational resources,
though it does consume additional cryptographic context on the mixer
for each participant and has the advantage of data origin
authentication.</t>
<t>To support a single outbound session (scenario d), the mixer has to
dictate its encryption key to the participants. Some keying mechanisms
allow the transmitter to determine its own key, and others allow the
offerer to determine the key for the offerer and answerer. Depending
on how the call is established, the offerer might be a participant
(such as a participant dialing into a conference bridge) or the
offerer might be the mixer (such as a conference bridge calling a
participant). The use of offerless INVITEs may help some keying
mechanisms reverse the role of offerer/answerer. A difficulty,
however, is knowing a priori if the role should be reversed for a
particular call. The significant advantage of a single outbound
session is the number of SRTP encryption operations remains constant
even as the number of participants increases. However, a disadvantage
is that data origin authentication is lost, allowing any participant
to spoof the sender (because all participants know the sender's SRTP
key).</t>
</section>
<section anchor="recording" title="Recording">
<t>The discussion in this section relates to requirement
R-RECORDING.</t>
<t>Some business environments, such as stock brokers, banks, and
catalog call centers, require recording calls with customers. This is
the familiar "this call is being recorded for quality purposes" heard
during calls to these sorts of businesses. In these environments,
media recording is typically performed by an intermediate device (with
RTP, this is typically implemented in a 'sniffer').</t>
<t>When performing such call recording with SRTP, the end-to-end
security is compromised. This is unavoidable, but necessary because
the operation of the business requires such recording. It is desirable
that the media security is not unduly compromised by the media
recording. The endpoint within the organization needs to be informed
that there is an intermediate device and needs to cooperate with that
intermediate device.</t>
<t>This scenario does not place a requirement directly on the key
management protocol. The requirement could be met directly by the key
management protocol (e.g., MIKEY-NULL or <xref
target="RFC4568"></xref>) or through an external out-of-band-mechanism
(e.g., <xref target="I-D.wing-sipping-srtp-key"></xref>).</t>
</section>
<section anchor="pstn_gateway" title="PSTN gateway">
<t>The discussion in this section relates to requirement R-PSTN.</t>
<t>It is desirable, even when one leg of a call is on the PSTN, that
the IP leg of the call be protected with SRTP.</t>
<t>A typical case of using media security where two entities are
having a VoIP conversation over IP capable networks. However, there
are cases where the other end of the communication is not connected to
an IP capable network. In this kind of setting, there needs to be some
kind of gateway at the edge of the IP network which converts the VoIP
conversation to format understood by the other network. An example of
such gateway is a PSTN gateway sitting at the edge of IP and PSTN
networks (such as the architecture described in <xref
target="RFC3372"></xref>).</t>
<t>If media security (e.g., SRTP protection) is employed in this kind
of gateway-setting, then media security and the related key management
is terminated at the PSTN gateway. The other network (e.g., PSTN) may
have its own measures to protect the communication, but this means
that from media security point of view the media security is not
employed truely end-to-end between the communicating entities.</t>
</section>
<section title="Call Setup Performance">
<t>The discussion in this section relates to requirement R-REUSE.</t>
<t>Some devices lack sufficient processing power to perform public key
operations or Diffie-Hellman operations for each call, or prefer to
avoid performing those operations on every call. The ability to re-use
previous public key or Diffie-Hellman operations can vastly decrease
the call setup delay and processing requirements for such devices.</t>
<t>In certain devices, it can take a second or two to perform a
Diffie-Hellman operation. Examples of these devices include handsets,
IP Multimedia Services Identity Module (ISIMs), and PSTN gateways.
PSTN gateways typically utilize a Digital Signal Processor (DSP) which
is not yet involved with typical DSP operations at the beginning of a
call, thus the DSP could be used to perform the calculation, so as to
avoid having the central host processor perform the calculation.
However, not all PSTN gateways use DSPs (some have only central
processors or their DSPs are incapable of performing the necessary
public key or Diffie-Hellman operation), and handsets lack a separate,
unused processor to perform these operations.</t>
<t>Two scenarios where R-REUSE is useful are calls between an endpoint
and its voicemail server or its PSTN gateway. In those scenarios calls
are made relatively often and it can be useful for the voicemail
server or PSTN gateway to avoid public key operations for subsequent
calls.</t>
<t>Storing keys across sessions often interferes with perfect forward
secrecy (R-PFS).</t>
</section>
<section title="Transcoding">
<t>The discussion in this section relates to requirement
R-TRANSCODER.</t>
<t>In some environments is is necessary for network equipment to
transcode from one codec (e.g., a highly compressed codec which makes
efficient use of wireless bandwidth) to another codec (e.g., a
standardized codec to a SIP peering interface). With RTP, a
transcoding function can be performed with the combination of a SIP
B2BUA (to modify the SDP) and a processor to perform the transcoding
between the codecs. However, with end-to-end secured SRTP, a
transcoding function implemented the same way is a man in the middle
attack, and the key management system prevents its use.</t>
<t>However, such a network-based transcoder can still be realized with
the cooperation and approval of the endpoint, and can provide
end-to-transcoder and transcoder-to-end security.</t>
</section>
<section title="Upgrading to SRTP">
<t>The discussion in this section relates to the requirement
R-ALLOW-RTP.</t>
<t>Legitimate RTP media can be sent to an endpoint for announcements,
colorful ringback tones (e.g., music), advertising, or normal call
progress tones. The RTP may be received before an associated SDP
answer. For details on various scenarios, see <xref
target="I-D.stucker-sipping-early-media-coping"> </xref>.</t>
<t>While receiving such RTP exposes the calling party to a risk of
receiving malicious RTP from an attacker, SRTP endpoints will need to
receive and play out RTP media in order to be compatible with deployed
systems that send RTP to calling parties.</t>
</section>
</section>
<section anchor="requirements" title="Requirements">
<t>This section is divided into several parts: requirements specific to
the key management protocol (<xref target="req_key_mgmt"></xref>),
attack scenarios (<xref target="req_attack_scenario"></xref>), and
requirements which can be met inside the key management protocol or
outside of the key management protocol (<xref
target="req_outside_key_mgmt"></xref>).</t>
<section anchor="req_key_mgmt"
title="Key Management Protocol Requirements">
<t>SIP Forking and Retargeting, from <xref
target="forking"></xref>:<list hangIndent="6" style="hanging">
<t hangText="R-FORK-RETARGET:"><vspace blankLines="0" />The media
security key management protocol MUST securely support forking and
retargeting when all endpoints are willing to use SRTP without
causing the call setup to fail. This requirement means the
endpoints that did not answer the call MUST NOT learn the SRTP
keys (in either direction) used by the answering endpoint.</t>
<t hangText="R-DISTINCT:"><vspace blankLines="0" />The media
security key management protocol MUST be capble of creating
distinct, independent cryptographic contexts for each endpoint in
a forked session.</t>
<t hangText="R-HERFP:"><vspace blankLines="0" />The media security
key management protocol MUST function securely even in the
presence of HERFP behavior.</t>
</list>Performance considerations:<list hangIndent="6"
style="hanging">
<t hangText="R-REUSE:"><vspace blankLines="0" />The media security
key management protocol MAY support the re-use of a previously
established security context.<list>
<t>Note: re-use of the security context does not imply re-use
of RTP parameters (e.g., payload type or SSRC).</t>
</list></t>
</list>Media considerations:<list hangIndent="6" style="hanging">
<t hangText="R-AVOID-CLIPPING:"><vspace blankLines="0" />The media
security key management protocol SHOULD avoid clipping media
before SDP answer without requiring <xref
target="RFC5027">Security Preconditions</xref>. This requirement
comes from <xref target="clipping"></xref>.</t>
<t hangText="R-RTP-VALID:"><vspace blankLines="0" />If SRTP key
negotiation is performed over the media path (i.e., using the same
UDP/TCP ports as media packets), the key negotiation packets MUST
NOT pass the RTP validity check defined in Appendix A.1 of <xref
target="RFC3550"></xref>.</t>
<t hangText="R-ASSOC:"><vspace blankLines="0" />The media security
key management protocol SHOULD include a mechanism for associating
key management messages with both the signaling traffic that
initiated the session and with protected media traffic. Allowing
such an association also allows the SDP offerer to avoid
performing CPU-consuming operations (e.g., Diffie-Hellman or
public key operations) with attackers that have not seen the
signaling messages.<vspace blankLines="1" />For example, if using
a Diffie-Hellman keying technique with security preconditions that
forks to 20 end points, the call initiator would get 20
provisional responses containing 20 signed Diffie-Hellman key
pairs. Calculating 20 DH secrets and validating signatures can be
a difficult task depending on the device capabilities. Hence, in
the case of forking, it is not desirable to perform a DH or PK
operation with every party, but rather only with the party that
answers the call (and incur some media clipping). To do this, the
signaling and media need to be associated so the calling party
knows which key management needs to be completed. This might be
done by using the transport address indicated in the SDP, although
NATs can complicate this association.<list>
<t>Note: due to RTP's design requirements, it is expected that
SRTP receivers will have to perform authentication of any
received SRTP packets.</t>
</list></t>
<t hangText="R-NEGOTIATE:"><vspace blankLines="0" />The media
security key management protocol MUST allow a SIP User Agent to
negotiate media security parameters for each individual
session.</t>
<t hangText="R-PSTN:"><vspace blankLines="0" />The media security
key management protocol MUST support termination of media security
in a PSTN gateway. This requirement is from <xref
target="pstn_gateway"></xref>.</t>
</list></t>
</section>
<section anchor="req_attack_scenario" title="Security Requirements">
<t>This section describes overall security requirements and specific
requirements from the attack scenarios (<xref
target="attack_scenarios"></xref>).</t>
<t>Overall security requirements:<list hangIndent="6" style="hanging">
<t hangText="R-PFS:"><vspace blankLines="0" />The media security
key management protocol MUST be able to support perfect forward
secrecy.</t>
<t hangText="R-COMPUTE:"><vspace blankLines="0" />The media
security key management protocol MUST support offering additional
SRTP cipher suites without incurring significant computational
expense.</t>
<t hangText="R-CERTS:"><vspace blankLines="0" />If the media
security key management protocol employs certificates, it MUST be
able to make use of both self-signed and CA-issued certificates.
As an alternative, the media security key management protocol MAY
make use of "bare" public keys.</t>
<t hangText="R-FIPS:"><vspace blankLines="0" />The media security
key management protocol SHOULD use algorithms that allow <xref
target="FIPS-140-2">FIPS 140-2</xref> certification.<vspace
blankLines="1" /> Note that the United States Government can only
purchase and use crypto implementations that have been validated
by the <xref target="FIPS-140-2">FIPS-140</xref> process: <vspace
blankLines="1" /> "The FIPS-140 standard is applicable to all
Federal agencies that use cryptographic-based security systems to
protect sensitive information in computer and telecommunication
systems, including voice systems. The adoption and use of this
standard is available to private and commercial
organizations."<xref target="cryptval"></xref> <vspace
blankLines="1" /> Some commercial organizations, such as banks and
defense contractors, also require or prefer equipment which has
validated by the FIPS-140 process.</t>
<t hangText="R-DOS:"><vspace blankLines="0" />The media security
key management protocol SHOULD NOT introduce new denial of service
vulnerabilities (e.g., the protocol should not request the
endpoint to perform CPU-intensive operations without the client
being able to validate or authorize the request).</t>
<t hangText="R-EXISTING:"><vspace blankLines="0" />The media
security key management protocol SHOULD allow endpoints to
authenticate using pre-existing cryptographic credentials, e.g.,
certificates or pre-shared keys.</t>
<t hangText="R-AGILITY:"><vspace blankLines="0" />The media
security key management protocol MUST provide crypto-agility,
i.e., the ability to adapt to evolving cryptography and security
requirements (update of cryptographic algorithms without
substantial disruption to deployed implementations)</t>
<t hangText="R-DOWNGRADE:"><vspace blankLines="0" />The media
security key management protocol MUST protect cipher suite
negotiation against downgrading attacks.</t>
<t hangText="R-PASS-MEDIA:"><vspace blankLines="0" />The media
security key management protocol MUST have a mode which prevents a
passive adversary with access to the media path from gaining
access to keying material used to protect SRTP media packets.</t>
<t hangText="R-PASS-SIG:"><vspace blankLines="0" />The media
security key management protocol MUST have a mode in which it
prevents a passive adversary with access to the signaling path
from gaining access to keying material used to protect SRTP media
packets.</t>
<t hangText="R-SIG-MEDIA:"><vspace blankLines="0" />The media
security key management protocol MUST have a mode in which it
defends itself from an attacker that is solely on the media path
and from an attacker that is solely on the signaling path. A
successful attack refers to the ability for the adversary to
obtain keying material to decrypt the SRTP encrypted media
traffic.</t>
<t hangText="R-ID-BINDING:"><vspace blankLines="0" />The media
security key management protocol MUST enable the media security
keys to be cryptographically bound to an identity of the endpoint.
<list>
<t>This allows domains to deploy <xref target="RFC4474">SIP
Identity</xref>.</t>
</list></t>
<t hangText="R-ACT-ACT:"><vspace blankLines="0" />The media
security key management protocol MUST support a mode of operation
that provides active-signaling-active-media-detect robustness, and
MAY support modes of operation that provide lower levels of
robustness (as described in <xref
target="attack_scenarios"></xref>).<list>
<t>Failing to meet R-ACT-ACT indicates the protocol can not
provide secure end-to-end media.</t>
</list></t>
</list></t>
</section>
<section anchor="req_outside_key_mgmt"
title="Requirements Outside of the Key Management Protocol">
<t>The requirements in this section are for an overall VoIP security
system. These requirements can be met within the key management
protocol itself, or can be solved outside of the key management
protocol itself (e.g., solved in SIP or in SDP).<list hangIndent="6"
style="hanging">
<t hangText="R-BEST-SECURE:"><vspace blankLines="0" />Even when
some end points of a forked or retargeted call are incapable of
using SRTP, a solution MUST be described which allows the
establishment of SRTP associations with SRTP-capable endpoints and
/ or RTP associations with non-SRTP-capable endpoints. This
requirement comes from <xref target="forking"></xref>.</t>
<t hangText="R-OTHER-SIGNALING:"><vspace blankLines="0" />A
solution SHOULD be able to negotiate keys for SRTP sessions
created via different call signaling protocols (e.g., between
Jabber, SIP, H.323, MGCP).</t>
<t hangText="R-RECORDING:"><vspace blankLines="0" />A solution
SHOULD be described which supports recording of decrypted media.
This requirement comes from <xref target="recording"></xref>.</t>
<t hangText="R-TRANSCODER:"><vspace blankLines="0" />A solution
SHOULD be described which supports intermediate nodes (e.g.,
transcoders), terminating or processing media, between the end
points.</t>
<t hangText="R-ALLOW-RTP:">A solution SHOULD be described which
allows RTP media to be received by the calling party until SRTP
has been negotiated with the answerer, after which SRTP is preferred over RTP.</t>
</list></t>
</section>
</section>
<section anchor="security" title="Security Considerations">
<t>This document lists requirements for securing media traffic. As such,
it addresses security throughout the document.</t>
</section>
<section anchor="iana" title="IANA Considerations">
<t>This document does not require actions by IANA.</t>
</section>
<section anchor="acks" title="Acknowledgements">
<t>For contributions to the requirements portion of this document, the
authors would like to thank the active participants of the RTPSEC BoF
and on the RTPSEC mailing list. The authors would furthermore like to
thank Wolfgang Buecker, Guenther Horn, Peter Howard, Hans-Heinrich
Grusdt, Srinath Thiruvengadam, Martin Euchner, Eric Rescorla, Matt
Lepinski, Dan York, Werner Dittmann, Richard Barnes, Vesa Lehtovirta,
Colin Perkins, Peter Schneider, and Christer Holmberg for their feedback
to this document.</t>
<t>For contributions to the analysis portion of this document, the
authors would like to thank Special thanks to Steffen Fries and Dragan
Ignjatic for their excellent <xref
target="I-D.ietf-msec-mikey-applicability">MIKEY comparison
document</xref>. The authors would furthermore like to thank Cullen
Jennings, David Oran, David McGrew, Mark Baugher, Flemming Andreasen,
Eric Raymond, Dave Ward, Leo Huang, Eric Rescorla, Lakshminath Dondeti,
Steffen Fries, Alan Johnston, Dragan Ignjatic and John Elwell for their
feedback to this document.</t>
<t>Thanks to Richard Barnes and Peter Schneider for thorough reviews and
suggestions which improved the document considerably.</t>
</section>
</middle>
<back>
<references title="Normative References">
&RFC2119;
&RFC3261;
&RFC3262;
&RFC3264;
&RFC3711;
<reference anchor="FIPS-140-2"
target="http://csrc.nist.gov/publications/fips/fips140-2/fips1402.pdf">
<front>
<title>Security Requirements for Cryptographic Modules</title>
<author fullname="NIST">
<organization>NIST</organization>
</author>
<date day="13" month="June" year="2005" />
</front>
</reference>
<reference anchor="cryptval"
target="http://csrc.nist.gov/cryptval/140-2APP.htm">
<front>
<title>Cryptographic Module Validation Program</title>
<author fullname="NIST">
<organization>NIST</organization>
</author>
<date day="19" month="December" year="2006" />
</front>
</reference>
</references>
<references title="Informative References">
&RFC5027;
&RFC3550;
&RFC3372;
&I-D.ietf-mmusic-ice;
&I-D.stucker-sipping-early-media-coping;
&RFC4474;
&I-D.wing-sipping-srtp-key;
&rfc4568;
&rfc4650;
&I-D.ietf-msec-mikey-ecc;
&rfc4738;
&RFC4949;
&I-D.ietf-sip-certs;
&I-D.mahy-sipping-herfp-fix;
&rfc3830;
&rfc4492;
&rfc3388;
&rfc4346;
&rfc4916;
&I-D.fischl-sipping-media-dtls;
&I-D.ietf-msec-mikey-applicability;
&I-D.zimmermann-avt-zrtp;
&I-D.baugher-mmusic-sdp-dh;
&I-D.mcgrew-srtp-ekt;
&I-D.ietf-mmusic-media-path-middleboxes;
&rfc4771;
&I-D.jennings-sipping-multipart;
&I-D.ietf-avt-dtls-srtp;
&I-D.dondeti-msec-rtpsec-mikeyv2;
&I-D.ietf-mmusic-sdp-capability-negotiation;
</references>
<section anchor="comparison"
title="Overview and Evaluation of Existing Keying Mechanisms">
<t>Based on how the SRTP keys are exchanged, each SRTP key exchange
mechanism belongs to one general category:</t>
<t><list>
<t><list style="hanging">
<t hangText="signaling path:">All the keying is carried in the
call signaling (SIP or SDP) path.</t>
<t hangText="media path:">All the keying is carried in the
SRTP/SRTCP media path, and no signaling whatsoever is carried in
the call signaling path.</t>
<t hangText="signaling and media path:">Parts of the keying are
carried in the SRTP/SRTCP media path, and parts are carried in
the call signaling (SIP or SDP) path.</t>
</list></t>
</list></t>
<t>One of the significant benefits of SRTP over other end-to-end
encryption mechanisms, such as for example IPsec, is that SRTP is
bandwidth efficient and SRTP retains the header of RTP packets.
Bandwidth efficiency is vital for VoIP in many scenarios where access
bandwidth is limited or expensive, and retaining the RTP header is
important for troubleshooting packet loss, delay, and jitter.</t>
<t>Related to SRTP's characteristics is a goal that any SRTP keying
mechanism to also be efficient and not cause additional call setup
delay. Contributors to additional call setup delay include network or
database operations: retrieval of certificates and additional SIP or
media path messages, and computational overhead of establishing keys or
validating certificates.</t>
<t>When examining the choice between keying in the signaling path,
keying in the media path, or keying in both paths, it is important to
realize the media path is generally 'faster' than the SIP signaling
path. The SIP signaling path has computational elements involved which
parse and route SIP messages. The media path, on the other hand, does
not normally have computational elements involved, and even when
computational elements such as firewalls are involved, they cause very
little additional delay. Thus, the media path can be useful for
exchanging several messages to establish SRTP keys. A disadvantage of
keying over the media path is that interworking different key exchange
requires the interworking function be in the media path, rather than
just in the signaling path; in practice this involvement is probably
unavoidable anyway.</t>
<section title="Signaling Path Keying Techniques">
<section title="MIKEY-NULL">
<t><xref target="RFC3830">MIKEY-NULL</xref> has the offerer indicate
the SRTP keys for both directions. The key is sent unencrypted in
SDP, which means the SDP must be encrypted hop-by-hop (e.g., by
using TLS (SIPS)) or end-to-end (e.g., by using S/MIME).</t>
<t>MIKEY-NULL requires one message from offerer to answerer (half a
round trip), and does not add additional media path messages.</t>
</section>
<section title="MIKEY-PSK">
<t>MIKEY-PSK (pre-shared key) <xref target="RFC3830"></xref>
requires that all endpoints share one common key. MIKEY-PSK has the
offerer encrypt the SRTP keys for both directions using this
pre-shared key.</t>
<t>MIKEY-PSK requires one message from offerer to answerer (half a
round trip), and does not add additional media path messages.</t>
</section>
<section title="MIKEY-RSA">
<t><xref target="RFC3830">MIKEY-RSA</xref> has the offerer encrypt
the keys for both directions using the intended answerer's public
key, which is obtained from a mechanism outside of MIKEY.</t>
<t>MIKEY-RSA requires one message from offerer to answerer (half a
round trip), and does not add additional media path messages.
MIKEY-RSA requires the offerer to obtain the intended answerer's
certificate.</t>
</section>
<section title="MIKEY-RSA-R">
<t><xref target="RFC4738">MIKEY-RSA-R </xref> is essentially the
same as MIKEY-RSA but reverses the role of the offerer and the
answerer with regards to providing the keys. That is, the answerer
encrypts the keys for both directions using the offerer's public
key. Both the offerer and answerer validate each other's public keys
using a standard X.509 validation techniques. MIKEY-RSA-R also
enables sending certificates in the MIKEY message.</t>
<t>MIKEY-RSA-R requires one message from offerer to answer, and one
message from answerer to offerer (full round trip), and does not add
additional media path messages. MIKEY-RSA-R requires the offerer
validate the answerer's certificate.</t>
</section>
<section title="MIKEY-DHSIGN">
<t><xref target="RFC3830">In MIKEY-DHSIGN</xref> the offerer and
answerer derive the key from a Diffie-Hellman exchange. In order to
prevent an active man-in-the-middle the DH exchange itself is signed
using each endpoint's private key and the associated public keys are
validated using standard X.509 validation techniques.</t>
<t>MIKEY-DHSIGN requires one message from offerer to answerer, and
one message from answerer to offerer (full round trip), and does not
add additional media path messages. MIKEY-DHSIGN requires the
offerer and answerer to validate each other's certificates.
MIKEY-DHSIGN also enables sending the answerer's certificate in the
MIKEY message.</t>
</section>
<section title="MIKEY-DHHMAC">
<t><xref target="RFC4650">MIKEY-DHHMAC</xref> uses a pre-shared
secret to HMAC the Diffie-Hellman exchange, essentially combining
aspects of MIKEY-PSK with MIKEY-DHSIGN, but without MIKEY-DHSIGN's
need for certificate authentication.</t>
<t>MIKEY-DHHMAC requires one message from offerer to answerer, and
one message from answerer to offerer (full round trip), and does not
add additional media path messages.</t>
</section>
<section title="MIKEY-ECIES and MIKEY-ECMQV (MIKEY-ECC)">
<t><xref target="I-D.ietf-msec-mikey-ecc">ECC Algorithms For
MIKEY</xref> describes how ECC can be used with MIKEY-RSA (using
ECDSA signature) and with MIKEY-DHSIGN (using a new DH-Group code),
and also defines two new ECC-based algorithms, Elliptic Curve
Integrated Encryption Scheme (ECIES) and Elliptic Curve
Menezes-Qu-Vanstone (ECMQV) .</t>
<t>With this proposal, the ECDSA signature, MIKEY-ECIES, and
MIKEY-ECMQV function exactly like MIKEY-RSA, and the new DH-Group
code function exactly like MIKEY-DHSIGN. Therefore these ECC
mechanisms are not discussed separately in this document.</t>
</section>
<section anchor="sdesc" title="Security Descriptions with SIPS">
<t><xref target="RFC4568">Security Descriptions</xref> has each side
indicate the key it will use for transmitting SRTP media, and the
keys are sent in the clear in SDP. Security Descriptions relies on
hop-by-hop (TLS via "SIPS:") encryption to protect the keys
exchanged in signaling.</t>
<t>Security Descriptions requires one message from offerer to
answerer, and one message from answerer to offerer (full round
trip), and does not add additional media path messages.</t>
</section>
<section title="Security Descriptions with S/MIME">
<t>This keying mechanism is identical to <xref
target="sdesc"></xref>, except that rather than protecting the
signaling with TLS, the entire SDP is encrypted with S/MIME.</t>
</section>
<section title="SDP-DH (expired)">
<t><xref target="I-D.baugher-mmusic-sdp-dh">SDP
Diffie-Hellman</xref> exchanges Diffie-Hellman messages in the
signaling path to establish session keys. To protect against active
man-in-the-middle attacks, the Diffie-Hellman exchange needs to be
protected with S/MIME, SIPS, or <xref
target="RFC4474">SIP Identity</xref> and <xref
target="RFC4916">SIP Conected Identity</xref>.</t>
<t>SDP-DH requires one message from offerer to answerer, and one
message from answerer to offerer (full round trip), and does not add
additional media path messages.</t>
</section>
<section anchor="mikey2-sdp" title="MIKEYv2 in SDP (expired)">
<t><xref target="I-D.dondeti-msec-rtpsec-mikeyv2">MIKEYv2</xref>
adds mode negotiation to MIKEYv1 and removes the time
synchronization requirement. It therefore now takes 2 round-trips to
complete. In the first round trip, the communicating parties learn
each other's identities, agree on a MIKEY mode, crypto algorithm,
SRTP policy, and exchanges nonces for replay protection. In the
second round trip, they negotiate unicast and/or group SRTP context
for SRTP and/or SRTCP.</t>
<t>Furthemore, MIKEYv2 also defines an in-band negotiation mode as
an alternative to SDP (see <xref
target="mikey2-inband"></xref>).</t>
</section>
<section anchor="eval-sip" title="Evaluation Criteria - SIP">
<t>This section considers how each keying mechanism interacts with
SIP features.</t>
<section anchor="retargeting"
title="Secure Retargeting and Secure Forking">
<t></t>
<t>Retargeting and forking of signaling requests is described
within <xref target="forking"></xref>. The following builds upon
this description.</t>
<t>The following list compares the behavior of secure forking,
answering association, two-time pads, and secure retargeting for
each keying mechanism.</t>
<t><list>
<t><list style="hanging">
<t hangText="MIKEY-NULL">Secure Forking: No, all AORs see
offerer's and answerer's keys. Answer is associated with
media by the SSRC in MIKEY. Additionally, a two-time pad
occurs if two branches choose the same 32-bit SSRC and
transmit SRTP packets.<vspace blankLines="1" />Secure
Retargeting: No, all targets see offerer's and answerer's
keys. Suffers from retargeting identity problem.</t>
<t hangText="MIKEY-PSK"><vspace blankLines="0" />Secure
Forking: No, all AORs see offerer's and answerer's keys.
Answer is associated with media by the SSRC in MIKEY. Note
that all AORs must share the same pre-shared key in order
for forking to work at all with MIKEY-PSK. Additionally, a
two-time pad occurs if two branches choose the same 32-bit
SSRC and transmit SRTP packets.<vspace
blankLines="1" />Secure Retargeting: Not secure. For
retargeting to work, the final target must possess the
correct PSK. As this is likely in scenarios were the call
is targeted to another device belonging to the same user
(forking), it is very unlikely that other users will
possess that PSK and be able to successfully answer that
call.</t>
<t hangText="MIKEY-RSA"><vspace blankLines="0" />Secure
Forking: No, all AORs see offerer's and answerer's keys.
Answer is associated with media by the SSRC in MIKEY. Note
that all AORs must share the same private key in order for
forking to work at all with MIKEY-RSA. Additionally, a
two-time pad occurs if two branches choose the same 32-bit
SSRC and transmit SRTP packets.<vspace
blankLines="1" />Secure Retargeting: No.</t>
<t hangText="MIKEY-RSA-R"><vspace blankLines="0" />Secure
Forking: Yes. Answer is associated with media by the SSRC
in MIKEY.<vspace blankLines="1" />Secure Retargeting:
Yes.</t>
<t hangText="MIKEY-DHSIGN"><vspace blankLines="0" />Secure
Forking: Yes, each forked endpoint negotiates unique keys
with the offerer for both directions. Answer is associated
with media by the SSRC in MIKEY.<vspace
blankLines="1" />Secure Retargeting: Yes, each target
negotiates unique keys with the offerer for both
directions.</t>
<t hangText="MIKEYv2 in SDP"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
<t hangText="MIKEY-DHHMAC"><vspace blankLines="0" />Secure
Forking: Yes, each forked endpoint negotiates unique keys
with the offerer for both directions. Answer is associated
with media by the SSRC in MIKEY.<vspace
blankLines="1" />Secure Retargeting: Yes, each target
negotiates unique keys with the offerer for both
directions. Note that for the keys to be meaningful, it
would require the PSK to be the same for all the potential
intermediaries, which would only happen within a single
domain.</t>
<t hangText="Security Descriptions with SIPS"><vspace
blankLines="0" />Secure Forking: No. Each forked endpoint
sees the offerer's key. Answer is not associated with
media.<vspace blankLines="1" />Secure Retargeting: No.
Each target sees the offerer's key.</t>
<t hangText="Security Descriptions with S/MIME"><vspace
blankLines="0" />Secure Forking: No. Each forked endpoint
sees the offerer's key. Answer is not associated with
media.<vspace blankLines="1" />Secure Retargeting: No.
Each target sees the offerer's key. Suffers from
retargeting identity problem.</t>
<t hangText="SDP-DH"><vspace blankLines="0" />Secure
Forking: Yes. Each forked endpoint calculates a unique
SRTP key. Answer is not associated with media.<vspace
blankLines="1" />Secure Retargeting: Yes. The final target
calculates a unique SRTP key.</t>
<t hangText="ZRTP"><vspace blankLines="0" />Secure
Forking: Yes. Each forked endpoint calculates a unique
SRTP key. As ZRTP isn't signaled in SDP, there is no
association of the answer with media.<vspace
blankLines="1" />Secure Retargeting: Yes. The final target
calculates a unique SRTP key.</t>
<t hangText="EKT"><vspace blankLines="0" />Secure Forking:
Inherited from the bootstrapping mechanism (the specific
MIKEY mode or Security Descriptions). Answer is associated
with media by the SPI in the EKT protocol. Answer is
associated with media by the SPI in the EKT
protocol.<vspace blankLines="1" />Secure Retargeting:
Inherited from the bootstrapping mechanism (the specific
MIKEY mode or Security Descriptions).</t>
<t hangText="DTLS-SRTP"><vspace blankLines="0" />Secure
Forking: Yes. Each forked endpoint calculates a unique
SRTP key. Answer is associated with media by the
certificate fingerprint in signaling and certificate in
the media path.<vspace blankLines="1" /> Secure
Retargeting: Yes. The final target calculates a unique
SRTP key.</t>
<t hangText="MIKEYv2 Inband"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
</list></t>
</list></t>
</section>
<section title="Clipping Media Before SDP Answer">
<t>Clipping media before receiving the signaling answer is
described within <xref target="clipping"></xref>. The following
builds upon this description.</t>
<t>Furthermore, the problem of clipping gets compounded when
forking is used. For example, if using a Diffie-Hellman keying
technique with security preconditions that forks to 20 endpoints,
the call initiator would get 20 provisional responses containing
20 signed Diffie-Hellman half keys. Calculating 20 DH secrets and
validating signatures can be a difficult task depending on the
device capabilities.</t>
<t>The following list compares the behavior of clipping before SDP
answer for each keying mechanism.</t>
<t><list>
<t><list style="hanging">
<t hangText="MIKEY-NULL"><vspace blankLines="0" />Not
clipped. The offerer provides the answerer's keys.</t>
<t hangText="MIKEY-PSK"><vspace blankLines="0" />Not
clipped. The offerer provides the answerer's keys.</t>
<t hangText="MIKEY-RSA"><vspace blankLines="0" />Not
clipped. The offerer provides the answerer's keys.</t>
<t hangText="MIKEY-RSA-R"><vspace
blankLines="0" />Clipped. The answer contains the
answerer's encryption key.</t>
<t hangText="MIKEY-DHSIGN"><vspace
blankLines="0" />Clipped. The answer contains the
answerer's Diffie-Hellman response.</t>
<t hangText="MIKEY-DHHMAC"><vspace
blankLines="0" />Clipped. The answer contains the
answerer's Diffie-Hellman response.</t>
<t hangText="MIKEYv2 in SDP"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
<t hangText="Security Descriptions with SIPS"><vspace
blankLines="0" />Clipped. The answer contains the
answerer's encryption key.</t>
<t hangText="Security Descriptions with S/MIME"><vspace
blankLines="0" />Clipped. The answer contains the
answerer's encryption key.</t>
<t hangText="SDP-DH"><vspace blankLines="0" />Clipped. The
answer contains the answerer's Diffie-Hellman
response.</t>
<t hangText="ZRTP"><vspace blankLines="0" />Not clipped
because the session intially uses RTP. While RTP is
flowing, both ends negotiate SRTP keys in the media path
and then switch to using SRTP.</t>
<t hangText="EKT"><vspace blankLines="0" />Not clipped, as
long as the first RTCP packet (containing the answerer's
key) is not lost in transit. The answerer sends its
encryption key in RTCP, which arrives at the same time (or
before) the first SRTP packet encrypted with that
key.<list>
<t>Note: RTCP needs to work, in the
answerer-to-offerer direction, before the offerer can
decrypt SRTP media.</t>
</list></t>
<t hangText="DTLS-SRTP"><vspace blankLines="0" />No
clipping after the DTLS-SRTP handshake has completed. SRTP
keys are exchanged in the media path. Need to wait for SDP
answer to ensure DTLS-SRTP handshake was done with an
authorized party.<list>
<t>If a middlebox interferes with the media path,
there can be clipping <xref
target="I-D.ietf-mmusic-media-path-middleboxes"></xref>.</t>
</list></t>
<t hangText="MIKEYv2 Inband"><vspace blankLines="0" />Not
clipped. Keys are exchanged in the media path without
relying on the signaling path.</t>
</list></t>
</list></t>
</section>
<section title="Centralized Keying">
<t>Centralized keying is described within <xref
target="conferencing"></xref>. The following builds upon this
description.</t>
<t>The following list describes how each keying mechanism behaves
with centralized keying (scenario d) and rekeying.<list>
<t><list style="hanging">
<t hangText="MIKEY-NULL"><vspace blankLines="0" />Keying:
Yes, if offerer is the mixer. No, if offerer is the
participant (end user).<vspace blankLines="1" />Rekeying:
Yes, via re-INVITE</t>
<t hangText="MIKEY-PSK"><vspace blankLines="0" />Keying:
Yes, if offerer is the mixer. No, if offerer is the
participant (end user).<vspace blankLines="1" />Rekeying:
Yes, with a re-INVITE</t>
<t hangText="MIKEY-RSA"><vspace blankLines="0" />Keying:
Yes, if offerer is the mixer. No, if offerer is the
participant (end user).<vspace blankLines="1" />Rekeying:
Yes, with a re-INVITE</t>
<t hangText="MIKEY-RSA-R"><vspace blankLines="0" />Keying:
No, if offerer is the mixer. Yes, if offerer is the
participant (end user).<vspace blankLines="1" />Rekeying:
n/a</t>
<t hangText="MIKEY-DHSIGN"><vspace
blankLines="0" />Keying: No; a group-key Diffie-Hellman
protocol is not supported.<vspace
blankLines="1" />Rekeying: n/a</t>
<t hangText="MIKEY-DHHMAC"><vspace
blankLines="0" />Keying: No; a group-key Diffie-Hellman
protocol is not supported.<vspace
blankLines="1" />Rekeying: n/a</t>
<t hangText="MIKEYv2 in SDP"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
<t hangText="Security Descriptions with SIPS"><vspace
blankLines="0" />Keying: Yes, if offerer is the mixer.
Yes, if offerer is the participant.<vspace
blankLines="1" />Rekeying: Yes, with a re-INVITE.</t>
<t hangText="Security Descriptions with S/MIME"><vspace
blankLines="0" />Keying: Yes, if offerer is the mixer.
Yes, if offerer is the participant.<vspace
blankLines="1" />Rekeying: Yes, with a re-INVITE.</t>
<t hangText="SDP-DH"><vspace blankLines="0" />Keying: No;
a group-key Diffie-Hellman protocol is not
supported.<vspace blankLines="1" />Rekeying: n/a</t>
<t hangText="ZRTP"><vspace blankLines="0" />Keying: No; a
group-key Diffie-Hellman protocol is not supported.<vspace
blankLines="1" />Rekeying: n/a</t>
<t hangText="EKT"><vspace blankLines="0" />Keying: Yes.
After bootstrapping a KEK using Security Descriptions or
MIKEY, each member originating an SRTP stream can send its
SRTP master key, sequence number and ROC via RTCP.<vspace
blankLines="1" />Rekeying: Yes. EKT supports each sender
to transmit its SRTP master key to the group via RTCP
packets. Thus, EKT supports each originator of an SRTP
stream to rekey at any time.</t>
<t hangText="DTLS-SRTP"><vspace blankLines="0" />Keying:
Yes, because with the assumed cipher suite,
TLS_RSA_WITH_3DES_EDE_CBC_SHA, each end indicates its SRTP
key.<vspace blankLines="1" />Rekeying: via DTLS in the
media path.</t>
<t hangText="MIKEYv2 Inband"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
</list></t>
</list></t>
</section>
<section title="SSRC and ROC">
<t>In SRTP, a cryptographic context is defined as the SSRC,
destination network address, and destination transport port
number. Whereas RTP, a flow is defined as the destination network
address and destination transport port number. This results in a
problem -- how to communicate the SSRC so that the SSRC can be
used for the cryptographic context.</t>
<t>Two approaches have emerged for this communication. One, used
by all MIKEY modes, is to communicate the SSRCs to the peer in the
MIKEY exchange. Another, used by Security Descriptions, is to use
"late bindng" -- that is, any new packet containing a
previously-unseen SSRC (which arrives at the same destination
network address and destination transport port number) will create
a new cryptographic context. Another approach, common amongst
techniques with media-path SRTP key establishment, is to require a
handshake over that media path before SRTP packets are sent.
MIKEY's approach changes RTP's SSRC collision detection behavior
by requiring RTP to pre-establish the SSRC values for each
session.</t>
<t>Another related issue is that SRTP introduces a rollover
counter (ROC), which records how many times the SRTP sequence
number has rolled over. As the sequence number is used for SRTP's
default ciphers, it is important that all endpoints know the value
of the ROC. The ROC starts at 0 at the beginning of a session.</t>
<t>Some keying mechanisms cause a two-time pad to occur if two
endpoints of a forked call have an SSRC collision.</t>
<t>Note: A proposal has been made to send the ROC value on every
Nth SRTP packet<xref target="RFC4771"></xref>. This proposal has
not yet been incorporated into this document.</t>
<t>The following list examines handling of SSRC and ROC:</t>
<t><list>
<t><list style="hanging">
<t hangText="MIKEY-NULL"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
<t hangText="MIKEY-PSK"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
<t hangText="MIKEY-RSA"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
<t hangText="MIKEY-RSA-R"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
<t hangText="MIKEY-DHSIGN"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
<t hangText="MIKEY-DHHMAC"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
<t hangText="MIKEYv2 in SDP"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
<t hangText="Security Descriptions with SIPS"><vspace
blankLines="0" />Neither SSRC nor ROC are signaled. SSRC
'late binding' is used.</t>
<t hangText="Security Descriptions with S/MIME"><vspace
blankLines="0" />Neither SSRC nor ROC are signaled. SSRC
'late binding' is used.</t>
<t hangText="SDP-DH"><vspace blankLines="0" />Neither SSRC
nor ROC are signaled. SSRC 'late binding' is used.</t>
<t hangText="ZRTP"><vspace blankLines="0" />Neither SSRC
nor ROC are signaled. SSRC 'late binding' is used.</t>
<t hangText="EKT"><vspace blankLines="0" />The SSRC of the
SRTCP packet containing an EKT update corresponds to the
SRTP master key and other parameters within that
packet.</t>
<t hangText="DTLS-SRTP"><vspace blankLines="0" />Neither
SSRC nor ROC are signaled. SSRC 'late binding' is
used.</t>
<t hangText="MIKEYv2 Inband"><vspace blankLines="0" />Each
endpoint indicates a set of SSRCs and the ROC for SRTP
packets it transmits.</t>
</list></t>
</list></t>
</section>
</section>
<section anchor="eval-sec" title="Evaluation Criteria - Security">
<t>This section evaluates each keying mechanism on the basis of
their security properties.</t>
<section title="Distribution and Validation of Public Keys and Certificates">
<t>Using public key cryptography for confidentiality and
authentication can introduce requirements for two types of
systems: (1) a system to distribute public keys (often in the form
of certificates), and (2) a system for validating certificates. We
refer to the former as a key distribution system and the latter as
an authentication infrastructure. In many cases, a monolithic
public key infrastructure (PKI) is used for fulfill both of these
roles. However, these functions can be provided by many other
systems. For instance, key distribution may be accomplished by any
public repository of keys. Any system in which the two endpoints
have access to trust anchors and intermediate CA certificates that
can be used to validate other endpoints’ certificates
(including a system of self-signed certificates) can be used to
support certificate validation in the below schemes.</t>
<t>With real-time communications it is desirable to avoid fetching
keys or certificates that delay call setup; rather it is
preferable to fetch or validate certificates in such a way that
call setup isn't delayed. For example, a certificate can be
validated while the phone is ringing or can be validated while
ring-back tones are being played or even while the called party is
answering the phone and saying "hello".</t>
<t hangText="Avoids PKI:">SRTP key exchange mechanisms that
require a particular authentication infrastructure to operate
(whether for distribution or validation) are gated on the
deployment of a such an infrastructure available to both
endpoints. This means that no media security is achievable until
such an infrastructure exists. For SIP, something like <xref
target="I-D.ietf-sip-certs">sip-certs</xref> might be used to
obtain the certificate of a peer.</t>
<t><list>
<t>Note: Even if <xref
target="I-D.ietf-sip-certs">sip-certs</xref> was deployed, the
<xref target="retargeting">retargeting problem</xref> would
still prevent successful deployment of keying techniques which
require the offerer to obtain the actual target's public
key.</t>
</list></t>
<t>The following list compares the requirements introduced by the
use of public-key cryptography in each keying mechanism, both for
public key distribution and for certificate validation.</t>
<t><list>
<t><list style="hanging">
<t hangText="MIKEY-NULL"><vspace
blankLines="0" />Public-key cryptography is not used.</t>
<t hangText="MIKEY-PSK"><vspace
blankLines="0" />Public-key cryptography is not used.
Rather, all endpoints must have some way to exchange
per-endpoint or per-system pre-shared keys.</t>
<t hangText="MIKEY-RSA"><vspace blankLines="0" />The
offerer obtains the intended answerer's public key before
initiating the call. This public key is used to encrypt
the SRTP keys. There is no defined mechanism for the
offerer to obtain the answerer's public key, although
<xref target="I-D.ietf-sip-certs"></xref> might be viable
in the future.<vspace blankLines="1" />The offer may also
contain a certificate for the offeror, which would require
an authentication infrastructure in order to be validated
by the receiver.</t>
<t hangText="MIKEY-RSA-R"><vspace blankLines="0" />The
offer contains the offerer's certificate, and the answer
contains the answerer's certificate. The answerer uses the
public key in the certificate to encrypt the SRTP keys
that will be used by the offerer and the answerer. An
authentication infrastructure is necessary to validate the
certificates.</t>
<t hangText="MIKEY-DHSIGN"><vspace blankLines="0" />An
authentication infrastructure is used to authenticate the
public key that is included in the MIKEY message.</t>
<t hangText="MIKEY-DHHMAC"><vspace
blankLines="0" />Public-key cryptography is not used.
Rather, all endpoints must have some way to exchange
per-endpoint or per-system pre-shared keys.</t>
<t hangText="MIKEYv2 in SDP"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
<t hangText="Security Descriptions with SIPS"><vspace
blankLines="0" />Public-key cryptography is not used.</t>
<t hangText="Security Descriptions with S/MIME"><vspace
blankLines="0" />Use of S/MIME requires that the endpoints
be able to fetch and validate certificates for each other.
The offerer must obtain the intended target's certificate
and encrypts the SDP offer with the public key contained
in target's certificate. The answerer must obtain the
offerer's certificate and encrypt the SDP answer with the
public key contained in the offerer's certificate.</t>
<t hangText="SDP-DH"><vspace blankLines="0" />Public-key
cryptography is not used.</t>
<t hangText="ZRTP"><vspace blankLines="0" />Public-key
cryptography is not used.</t>
<t hangText="EKT"><vspace blankLines="0" />Public-key
cryptography is not used by itself, but might be used by
the EKT bootstrapping keying mechanism (such as certain
MIKEY modes).</t>
<t hangText="DTLS-SRTP"><vspace blankLines="0" />Remote
party's certificate is sent in media path, and a
fingerprint of the same certificate is sent in the
signaling path.</t>
<t hangText="MIKEYv2 Inband"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
</list></t>
</list></t>
</section>
<section title="Perfect Forward Secrecy">
<t>In the context of SRTP, Perfect Forward Secrecy is the property
that SRTP session keys that protected a previous session are not
compromised if the static keys belonging to the endpoints are
compromised. That is, if someone were to record your encrypted
session content and later acquires either party's private key,
that encrypted session content would be safe from decryption if
your key exchange mechanism had perfect forward secrecy.</t>
<t>The following list describes how each key exchange mechanism
provides PFS.</t>
<t><list>
<t><list style="hanging">
<t hangText="MIKEY-NULL"><vspace blankLines="0" />Not
applicable; MIKEY-NULL does not have a long-term
secret.</t>
<t hangText="MIKEY-PSK"><vspace blankLines="0" />No
PFS.</t>
<t hangText="MIKEY-RSA"><vspace blankLines="0" />No
PFS.</t>
<t hangText="MIKEY-RSA-R"><vspace blankLines="0" />No
PFS.</t>
<t hangText="MIKEY-DHSIGN"><vspace blankLines="0" />PFS is
provided with the Diffie-Hellman exchange.</t>
<t hangText="MIKEY-DHHMAC"><vspace blankLines="0" />PFS is
provided with the Diffie-Hellman exchange.</t>
<t hangText="MIKEYv2 in SDP"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
<t hangText="Security Descriptions with SIPS"><vspace
blankLines="0" />Not applicable; Security Descriptions
does not have a long-term secret.</t>
<t hangText="Security Descriptions with S/MIME"><vspace
blankLines="0" />Not applicable; Security Descriptions
does not have a long-term secret.</t>
<t hangText="SDP-DH"><vspace blankLines="0" />PFS is
provided with the Diffie-Hellman exchange.</t>
<t hangText="ZRTP"><vspace blankLines="0" />PFS is
provided with the Diffie-Hellman exchange.</t>
<t hangText="EKT"><vspace blankLines="0" />No PFS.</t>
<t hangText="DTLS-SRTP"><vspace blankLines="0" />PFS is
achieved if the negotiated cipher suite includes an
exponential or discrete-logarithmic key exchange (e.g.,
Diffie-Hellman (DH_RSA from <xref
target="RFC4346"></xref>) or <xref
target="RFC4492">Elliptic Curve
Diffie-Hellman</xref>).</t>
<t hangText="MIKEYv2 Inband"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
</list></t>
</list></t>
</section>
<section title="Best Effort Encryption">
<t>With best effort encryption, SRTP is used with endpoints that
support SRTP, otherwise RTP is used.</t>
<t>SIP needs a backwards-compatible best effort encryption in
order for SRTP to work successfully with SIP retargeting and
forking when there is a mix of forked or retargeted devices that
support SRTP and don't support SRTP.</t>
<t><list>
<t>Consider the case of Bob, with a phone that only does RTP
and a voice mail system that supports SRTP and RTP. If Alice
calls Bob with an SRTP offer, Bob's RTP-only phone will reject
the media stream (with an empty "m=" line) because Bob's phone
doesn't understand SRTP (RTP/SAVP). Alice's phone will see
this rejected media stream and may terminate the entire call
(BYE) and re-initiate the call as RTP-only, or Alice's phone
may decide to continue with call setup with the SRTP-capable
leg (the voice mail system). If Alice's phone decided to
re-initiate the call as RTP-only, and Bob doesn't answer his
phone, Alice will then leave voice mail using only RTP, rather
than SRTP as expected.</t>
</list>Currently, several techniques are commonly considered as
candidates to provide opportunistic encryption:</t>
<t><list style="hanging">
<t hangText="multipart/alternative"><vspace blankLines="0" />
<xref target="I-D.jennings-sipping-multipart"></xref>
describes how to form a multipart/alternative body part in
SIP. The significant issues with this technique are (1) that
multipart MIME is incompatible with existing SIP proxies,
firewalls, Session Border Controllers, and endpoints and (2)
when forking, the <xref
target="I-D.mahy-sipping-herfp-fix">Heterogeneous Error
Response Forking Problem (HERFP)</xref> causes problems if
such non-multipart-capable endpoints were involved in the
forking.</t>
<t hangText="SDP Grouping"><vspace blankLines="0" />A new SDP
grouping mechanism (following the idea introduced in <xref
target="RFC3388"></xref>) has been discussed which would allow
a media line to indicate RTP/AVP and another media line to
indicate RTP/SAVP, allowing non-SRTP-aware endpoints to choose
RTP/AVP and SRTP-aware endpoints to choose RTP/SAVP. As of
this writing, this SDP grouping mechanism has not been
published as an Internet Draft.</t>
<t hangText="session attribute"><vspace blankLines="0" />With
this technique, the endpoints signal their desire to do SRTP
by signaling RTP (RTP/AVP), and using an attribute ("a=") in
the SDP. This technique is entirely backwards compatible with
non-SRTP-aware endpoints, but doesn't use the RTP/SAVP
protocol registered by <xref target="RFC3711">SRTP</xref>.</t>
<t hangText="SDP Capability Negotiation"><vspace
blankLines="0" /><xref
target="I-D.ietf-mmusic-sdp-capability-negotiation">SDP
Capability Negotiation</xref> provides a backwards-compatible
mechanism to allow offering both SRTP and RTP in a single
offer. This is the preferred technique.</t>
<t hangText="Probing"><vspace blankLines="0" />With this
technique, the endpoints first establish an RTP session using
RTP (RTP/AVP). The endpoints send probe messages, over the
media path, to determine if the remote endpoint supports their
keying technique.</t>
</list>The preferred technique, <xref
target="I-D.ietf-mmusic-sdp-capability-negotiation">SDP Capability
Negotiation</xref>, can be used with all key exchange mechanisms.
What remains unique is ZRTP, which can also accomplish its best
effort encryption by probing (sending ZRTP messages over the media
path) or by session attribute (see "a=zrtp", defined in Section 10
of <xref target="I-D.zimmermann-avt-zrtp"></xref>). Current
implementations of ZRTP use probing.</t>
</section>
<section title="Upgrading Algorithms">
<t>It is necessary to allow upgrading SRTP encryption and hash
algorithms, as well as upgrading the cryptographic functions used
for the key exchange mechanism. With SIP's offer/answer model,
this can be computionally expensive because the offer needs to
contain all combinations of the key exchange mechanisms (all MIKEY
modes, Security Descriptions) and all SRTP cryptographic suites
(AES-128, AES-256) and all SRTP cryptographic hash functions
(SHA-1, SHA-256) that the offerer supports. In order to do this,
the offerer has to expend CPU resources to build an offer
containing all of this information which becomes computationally
prohibitive.</t>
<t>Thus, it is important to keep the offerer's CPU impact fixed so
that offering multiple new SRTP encryption and hash functions
incurs no additional expense.</t>
<t>The following list describes the CPU effort involved in using
each key exchange technique.</t>
<t><list>
<t><list style="hanging">
<t hangText="MIKEY-NULL"><vspace blankLines="0" />No
significant computaional expense.</t>
<t hangText="MIKEY-PSK"><vspace blankLines="0" />No
significant computational expense.</t>
<t hangText="MIKEY-RSA"><vspace blankLines="0" />For each
offered SRTP crypto suite, the offerer has to perform RSA
operation to encrypt the TGK</t>
<t hangText="MIKEY-RSA-R"><vspace blankLines="0" />For
each offered SRTP crypto suite, the offerer has to perform
public key operation to sign the MIKEY message.</t>
<t hangText="MIKEY-DHSIGN"><vspace blankLines="0" />For
each offered SRTP crypto suite, the offerer has to perform
Diffie-Hellman operation, and a public key operation to
sign the Diffie-Hellman output.</t>
<t hangText="MIKEY-DHHMAC"><vspace blankLines="0" />For
each offered SRTP crypto suite, the offerer has to perform
Diffie-Hellman operation.</t>
<t hangText="MIKEYv2 in SDP"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
<t hangText="Security Descriptions with SIPS"><vspace
blankLines="0" />No significant computational expense.</t>
<t hangText="Security Descriptions with S/MIME"><vspace
blankLines="0" />S/MIME requires the offerer and the
answerer to encrypt the SDP with the other's public key,
and to decrypt the received SDP with their own private
key.</t>
<t hangText="SDP-DH"><vspace blankLines="0" />For each
offered SRTP crypto suite, the offerer has to perform a
Diffie-Hellman operation.</t>
<t hangText="ZRTP"><vspace blankLines="0" />The offerer
has no additional computational expense at all, as the
offer contains no information about ZRTP or might contain
"a=zrtp".</t>
<t hangText="EKT"><vspace blankLines="0" />The offerer's
Computational expense depends entirely on the EKT
bootstrapping mechanism selected (one or more MIKEY modes
or Security Descriptions).</t>
<t hangText="DTLS-SRTP"><vspace blankLines="0" />The
offerer has no additional computational expense at all, as
the offer contains only a fingerprint of the certificate
that will be presented in the DTLS exchange.</t>
<t hangText="MIKEYv2 Inband"><vspace blankLines="0" />The
behavior will depend on which mode is picked.</t>
</list></t>
</list></t>
</section>
</section>
</section>
<section title="Media Path Keying Technique">
<t></t>
<section title="ZRTP">
<t><xref target="I-D.zimmermann-avt-zrtp">ZRTP</xref> does not
exchange information in the signaling path (although it's possible
for endpoints to indicate support for ZRTP with "a=zrtp" in the
initial Offer). In ZRTP the keys are exchanged entirely in the media
path using a Diffie-Hellman exchange. The advantage to this
mechanism is that the signaling channel is used only for call setup
and the media channel is used to establish an encrypted channel --
much like encryption devices on the PSTN. ZRTP uses voice
authentication of its Diffie-Hellman exchange by having each person
read digits to the other person. Subsequent sessions with the same
ZRTP endpoint can be authenticated using the stored hash of the
previously negotiated key rather than voice authentication.</t>
<t>ZRTP uses 4 media path messages (Hello, Commit, DHPart1, and
DHPart2) to establish the SRTP key, and 3 media path confirmation
messages. These initial messages are all sent as non-RTP packets.
<list>
<t>Note that when ZRTP probing is used, unencrypted RTP is being
exchanged until the SRTP keys are established.</t>
</list></t>
</section>
</section>
<section title="Signaling and Media Path Keying Techniques">
<t></t>
<section title="EKT">
<t><xref target="I-D.mcgrew-srtp-ekt">EKT</xref> relies on another
SRTP key exchange protocol, such as Security Descriptions or MIKEY,
for bootstrapping. In the initial phase, each member of a conference
uses an SRTP key exchange protocol to establish a common key
encryption key (KEK). Each member may use the KEK to securely
transport its SRTP master key and current SRTP rollover counter
(ROC), via RTCP, to the other participants in the session.</t>
<t>EKT requires the offerer to send some parameters (EKT_Cipher,
KEK, and security parameter index (SPI)) via the bootstrapping
protocol such as Security Descriptions or MIKEY. Each answerer sends
an SRTCP message which contains the answerer's SRTP Master Key,
rollover counter, and the SRTP sequence number. Rekeying is done by
sending a new SRTCP message. For reliable transport, multiple RTCP
messages need to be sent.</t>
</section>
<section anchor="dtls-srtp" title="DTLS-SRTP">
<t><xref target="I-D.ietf-avt-dtls-srtp">DTLS-SRTP</xref> exchanges
public key fingerprints in SDP <xref
target="I-D.fischl-sipping-media-dtls"></xref> and then establishes
a DTLS session over the media channel. The endpoints use the DTLS
handshake to agree on crypto suites and establish SRTP session keys.
SRTP packets are then exchanged between the endpoints.</t>
<t>DTLS-SRTP requires one message from offerer to answerer (half
round trip), and one message from the answerer to offerer (full
round trip) so the offerer can correlate the SDP answer with the
answering endpoint. DTLS-SRTP uses 4 media path messages to
establish the SRTP key.</t>
<t>This document assumes DTLS will use TLS_RSA_WITH_3DES_EDE_CBC_SHA
as its cipher suite, which is the mandatory-to-implement cipher
suite in <xref target="RFC4346">TLS</xref>.</t>
</section>
<section anchor="mikey2-inband" title="MIKEYv2 Inband (expired)">
<t>As defined in <xref target="mikey2-sdp"></xref>, MIKEYv2 also
defines an in-band negotiation mode as an alternative to SDP (see
<xref target="mikey2-inband"></xref>). The details are not sorted
out in the draft yet on what in-band actually means (i.e., UDP, RTP,
RTCP, etc.).</t>
</section>
</section>
</section>
<section anchor="ofs" title="Out-of-Scope">
<t>Discussions concluded that key management for shared-key encryption
of conferencing is outside the scope of this document. As the priority
is point-to-point unicast SRTP session keying, resolving shared-key SRTP
session keying is deferred to later and left as an item for future
investigations.</t>
<t>The compromise of an endpoint that has access to decrypted media
(e.g., SIP user agent, transcoder, recorder) is out of scope of this
document. Such a compromise might be via privilege escalation,
installation of a virus or trojan horse, or similar attacks.</t>
</section>
<section title="Requirement renumbering in -02">
<t>[[RFC Editor: Please delete this section prior to publication.]]</t>
<t>Previous versions of this document used requirement numbers, which
were changed to mnemonics as follows: <list hangIndent="6"
style="hanging">
<t hangText="R1">R-FORK-RETARGET</t>
<t hangText="R2">R-BEST-SECURE</t>
<t hangText="R3">R-DISTINCT</t>
<t hangText="R4">R-REUSE; changed from 'MAY' to 'protocol MUST
support, and SHOULD implement'</t>
<t hangText="R5">R-AVOID-CLIPPING</t>
<t hangText="R6">R-PASS-MEDIA</t>
<t hangText="R7">R-PASS-SIG</t>
<t hangText="R8">R-PFS</t>
<t hangText="R9">R-COMPUTE</t>
<t hangText="R10">R-RTP-VALID</t>
<t hangText="R11">(folded into R4; was reuse previous session)</t>
<t hangText="R12">R-CERTS</t>
<t hangText="R13">R-FIPS</t>
<t hangText="R14">R-ASSOC</t>
<t hangText="R15">R-ALLOW-RTP</t>
<t hangText="R16">R-DOS</t>
<t hangText="R17">R-SIG-MEDIA</t>
<t hangText="R18">R-EXISTING</t>
<t hangText="R19">R-AGILITY</t>
<t hangText="R20">R-DOWNGRADE</t>
<t hangText="R21">R-NEGOTIATE</t>
<t hangText="R23">R-OTHER-SIGNALING</t>
<t hangText="R23">R-RECORDING (R23 was duplicated in previous
versions of the document)</t>
<t hangText="R24">(deleted; was lawful intercept)</t>
<t hangText="R25">R-TRANSCODER</t>
<t hangText="R26">R-PSTN</t>
<t hangText="R27">R-ID-BINDING</t>
<t hangText="R28">R-ACT-ACT</t>
</list></t>
</section>
</back>
</rfc>| PAFTECH AB 2003-2026 | 2026-04-23 16:33:23 |