One document matched: draft-ietf-siprec-protocol-17.xml
<?xml version="1.0" encoding="utf-8"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
There has to be one entity for each item to be referenced.
An alternate method (rfc include) is described in the references. -->
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2506 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2506.xml">
<!ENTITY RFC2804 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2804.xml">
<!ENTITY RFC3261 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3261.xml">
<!ENTITY RFC3264 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3264.xml">
<!ENTITY RFC3311 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3311.xml">
<!ENTITY RFC3325 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3325.xml">
<!ENTITY RFC3550 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml">
<!ENTITY RFC3551 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3551.xml">
<!ENTITY RFC3711 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3711.xml">
<!ENTITY RFC3840 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3840.xml">
<!ENTITY RFC4574 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4574.xml">
<!ENTITY RFC4568 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4568.xml">
<!ENTITY RFC4585 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4585.xml">
<!ENTITY RFC4916 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4916.xml">
<!ENTITY RFC4961 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4961.xml">
<!ENTITY RFC5104 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5104.xml">
<!ENTITY RFC5124 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5124.xml">
<!ENTITY RFC5168 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5168.xml">
<!ENTITY RFC5234 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5234.xml">
<!ENTITY RFC5630 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5630.xml">
<!ENTITY RFC5761 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5761.xml">
<!ENTITY RFC6263 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6263.xml">
<!ENTITY RFC6341 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6341.xml">
<!ENTITY RFC7022 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7022.xml">
<!ENTITY RFC7245 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7245.xml">
<!ENTITY RFC7525 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7525.xml">
<!ENTITY I-D.ietf-siprec-metadata SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-siprec-metadata.xml">
<!ENTITY I-D.ietf-avtcore-srtp-ekt SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-avtcore-srtp-ekt.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
(Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
(using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-ietf-siprec-protocol-17" ipr="trust200902">
<!-- category values: std, bcp, info, exp, and historic
ipr values: full3667, noModification3667, noDerivatives3667
you can add the attributes updates="NNNN" and obsoletes="NNNN"
they will automatically be output with "(if approved)" -->
<front>
<!-- The abbreviated title is used in the page header - it is only necessary if the
full title is longer than 39 characters -->
<title abbrev="Session Recording Protocol">Session Recording
Protocol</title>
<!-- add 'role="editor"' below for the editors if appropriate -->
<!-- Another author who claims to be an editor -->
<author fullname="Leon Portman" initials="L.P." surname="Portman">
<organization>NICE Systems</organization>
<address>
<postal>
<street>22 Zarhin Street</street>
<street>P.O. Box 690</street>
<city>Ra'anana</city>
<code>4310602</code>
<country>Israel</country>
</postal>
<email>leon.portman@gmail.com</email>
</address>
</author>
<author fullname="Henry Lum" initials="H." role="editor" surname="Lum">
<organization>Genesys</organization>
<address>
<postal>
<street>1380 Rodick Road, Suite 201</street>
<city>Markham</city>
<region>Ontario</region>
<code>L3R4G5</code>
<country>Canada</country>
</postal>
<email>henry.lum@genesyslab.com</email>
</address>
</author>
<author fullname="Charles Eckel" initials="C." surname="Eckel">
<organization>Cisco</organization>
<address>
<postal>
<street>170 West Tasman Drive</street>
<city>San Jose, CA 95134</city>
<country>United States</country>
</postal>
<email>eckelcu@cisco.com</email>
</address>
</author>
<author fullname="Alan Johnston" initials="A." surname="Johnston">
<organization>Avaya</organization>
<address>
<postal>
<street/>
<city>St. Louis</city>
<region>MO</region>
<code>63124</code>
</postal>
<email>alan.b.johnston@gmail.com</email>
</address>
</author>
<author fullname="Andrew Hutton" initials="A." surname="Hutton">
<organization>Unify</organization>
<address>
<postal>
<street>Brickhill Street</street>
<city>Milton Keynes</city>
<code>MK15 0DJ</code>
<country>United Kingdom</country>
</postal>
<email>andrew.hutton@unify.com</email>
</address>
</author>
<date/>
<area>RAI</area>
<workgroup>SIPREC</workgroup>
<keyword>siprec</keyword>
<abstract>
<t>This document specifies the use of the Session Initiation Protocol
(SIP), the Session Description Protocol (SDP), and the Real Time
Protocol (RTP) for delivering real-time media and metadata from a
Communication Session (CS) to a recording device. The Session Recording
Protocol specifies the use of SIP, SDP, and RTP to establish a Recording
Session (RS) between the Session Recording Client (SRC), which is on the
path of the CS, and a Session Recording Server (SRS) at the recording
device. This document considers only active recording, where the SRC
purposefully streams media to an SRS and all participating user agents
are notified of the recording. Passive recording, where a recording
device detects media directly from the network (e.g., using
port-mirroring techniques), is outside the scope of this document. In
addition, lawful intercept is outside the scope of this document.</t>
</abstract>
</front>
<middle>
<section anchor="Introduction" title="Introduction">
<t>This document specifies the mechanism to record a Communication
Session (CS) by delivering real-time media and metadata from the CS to a
recording device. In accordance with the architecture <xref
target="RFC7245"/>, the Session Recording Protocol specifies the use of
SIP, SDP, and RTP to establish a Recording Session (RS) between the
Session Recording Client (SRC), which is on the path of the CS, and a
Session Recording Server (SRS) at the recording device. SIP is also used
to deliver metadata to the recording device, as specified in <xref
target="I-D.ietf-siprec-metadata"/>. Metadata is information that
describes recorded media and the CS to which they relate. The Session
Recording Protocol intends to satisfy the SIP-based Media Recording
requirements listed in <xref target="RFC6341"/>. In addition to the
Session Recording Protocol, this document specifies extensions for user
agents that are participants in a CS to receive recording indications
and to provide preferences for recording.</t>
<t>This document considers only active recording, where the SRC
purposefully streams media to an SRS and all participating user agents
are notified of the recording. Passive recording, where a recording
device detects media directly from the network (e.g., using
port-mirroring techniques), is outside the scope of this document. In
addition, lawful intercept is outside the scope of this document, in
accordance with <xref target="RFC2804"/>.</t>
</section>
<section title="Terminology">
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in <xref
target="RFC2119"/>.</t>
</section>
<section anchor="Definitions" title="Definitions">
<t>This document refers to the core definitions provided in the
architecture document <xref target="RFC7245"/>.</t>
<t>The RTP Handling section uses the definitions provided in "RTP: A
Transport Protocol for Real-Time Application" <xref
target="RFC3550"/>.</t>
</section>
<section title="Scope">
<t>The scope of the Session Recording Protocol includes the
establishment of the recording sessions and the reporting of the
metadata. The scope also includes extensions supported by User Agents
participating in the CS such as indication of recording. The user agents
need not be recording-aware in order to participate in a CS being
recorded.</t>
<t>The following items, which are not an exhaustive list, do not
represent the protocol itself and are considered out of the scope of the
Session Recording Protocol:</t>
<t><list style="symbols">
<t>Delivering recorded media in real-time as the CS media</t>
<t>Specifications of criteria to select a specific CS to be recorded
or triggers to record a certain CS in the future</t>
<t>Recording policies that determine whether the CS should be
recorded and whether parts of the CS are to be recorded</t>
<t>Retention policies that determine how long a recording is
stored</t>
<t>Searching and accessing the recorded media and metadata</t>
<t>Policies governing how CS users are made aware of recording</t>
<t>Delivering additional recording session metadata through a
non-SIP mechanism</t>
</list></t>
</section>
<section anchor="Overview" title="Overview of operations">
<t>This section is informative and provides a description of recording
operations.</t>
<t>Section 6 describes the SIP communication in a recording session
between an SRC and an SRS, and the procedures for recording-aware user
agents participating in a CS. Section 7 describes the SDP in a recording
session, and the procedures for recording indications and recording
preferences. Section 8 describes the RTP handling in a recording
session. Section 9 describes the mechanism to deliver recording metadata
from the SRC to the SRS.</t>
<t>As mentioned in the architecture document <xref target="RFC7245"/>,
there are a number of types of call flows based on the location of the
Session Recording Client. The following sample call flows provide a
quick overview of the operations between the SRC and the SRS.</t>
<section title="Delivering recorded media">
<t>When a SIP Back-to-Back User Agent (B2BUA) with SRC functionality
routes a call from UA(A) to UA(B), the SRC has access to the media
path between the user agents. When the SRC is aware that it should be
recording the conversation, the SRC can cause the B2BUA to relay the
media between UA(A) and UA(B). The SRC then establishes the Recording
Session with the SRS and sends replicated media towards the SRS.</t>
<t>An endpoint may also have SRC functionality, where the endpoint
itself establishes the Recording Session to the SRS. Since the
endpoint has access to the media in the Communication Session, the
endpoint can send replicated media towards the SRS.</t>
<t>The following example call flows shows an SRC establishing a
recording session towards an SRS. The first call flow illustrates
UA(A) acting as the SRC. The second illustrates a B2BUA acting as the
SRC. Note that the SRC can choose when to establish the Recording
Session independent of the Communication Session, even though the
following call flows suggest that the SRC is establishing the
Recording Session (message #5) after the Communication Session is
established.</t>
<figure align="left" anchor="figure_basic_ua_callflow"
title="Basic recording call flow with UA as SRC">
<artwork align="center">
UA A/SRC UA B SRS
|(1)CS INVITE | |
|---------------------->| |
| (2) 200 OK | |
|<----------------------| |
| | |
|(3)RS INVITE with SDP | |
|--------------------------------------------->|
| | (4) 200 OK with SDP |
|<---------------------------------------------|
|(5)CS RTP | |
|======================>| |
|<======================| |
|(6)RS RTP | |
|=============================================>|
|=============================================>|
| | |
|(7)CS BYE | |
|---------------------->| |
|(8)RS BYE | |
|--------------------------------------------->|
| | |
</artwork>
</figure>
<figure align="left" anchor="figure_basic_b2bua_callflow"
title="Basic recording call flow with B2BUA as SRC">
<artwork align="center">
UA A SRC UA B SRS
|(1)CS INVITE | | |
|------------->| | |
| |(2)CS INVITE | |
| |---------------------->| |
| | (3) 200 OK | |
| |<----------------------| |
| (4) 200 OK | | |
|<-------------| | |
| |(5)RS INVITE with SDP | |
| |--------------------------------------------->|
| | | (6) 200 OK with SDP |
| |<---------------------------------------------|
|(7)CS RTP | | |
|=============>|======================>| |
|<=============|<======================| |
| |(8)RS RTP | |
| |=============================================>|
| |=============================================>|
|(9)CS BYE | | |
|------------->| | |
| |(10)CS BYE | |
| |---------------------->| |
| |(11)RS BYE | |
| |--------------------------------------------->|
| | | |
</artwork>
</figure>
<t>The above call flow can also apply to the case of a centralized
conference with a mixer. For clarity, ACKs to INVITEs and 200 OKs to
BYEs are not shown. The conference focus can provide the SRC
functionality since the conference focus has access to all the media
from each conference participant. When a recording is requested, the
SRC delivers the metadata and the media streams to the SRS. Since the
conference focus has access to a mixer, the SRC may choose to mix the
media streams from all participants as a single mixed media stream
towards the SRS.</t>
<t>An SRC can use a single recording session to record multiple
communication sessions. Every time the SRC wants to record a new call,
the SRC updates the recording session with a new SDP offer to add new
recorded streams to the recording session, and correspondingly also
update the metadata for the new call.</t>
<t>An SRS can also establish a recording session to an SRC, although
it is beyond the scope of this document to define how an SRS would
specify which calls to record.</t>
</section>
<section title="Delivering recording metadata">
<t>The SRC is responsible for the delivery of metadata to the SRS. The
SRC may provide an initial metadata snapshot about recorded media
streams in the initial INVITE content in the recording session.
Subsequent metadata updates can be represented as a stream of events
in UPDATE <xref target="RFC3311"/> or reINVITE requests sent by the
SRC. These metadata updates are normally incremental updates to the
initial metadata snapshot to optimize on the size of updates. However,
the SRC may also decide to send a new metadata snapshot any time.</t>
<t>Metadata is transported in the body of INVITE or UPDATE messages.
Certain metadata, such as the attributes of the recorded media stream,
are located in the SDP of the recording session.</t>
<t>The SRS has the ability to send a request to the SRC to request for
a new metadata snapshot update from the SRC. This can happen when the
SRS fails to understand the current stream of incremental updates for
whatever reason, for example, when the SRS loses the current state due
to internal failure. The SRS may optionally attach a reason along with
the snapshot request. This request allows both SRC and SRS to
synchronize the states with a new metadata snapshot so that further
metadata incremental updates will be based on the latest metadata
snapshot. Similar to the metadata content, the metadata snapshot
request is transported as content in UPDATE or INVITE sent by the SRS
in the recording session.</t>
<figure align="left" anchor="figure_metadata"
title="Delivering metadata via SIP UPDATE">
<artwork align="center">
SRC SRS
| |
|(1) INVITE (metadata snapshot 1) |
|---------------------------------------------------->|
| (2)200 OK |
|<----------------------------------------------------|
|(3) ACK |
|---------------------------------------------------->|
|(4) RTP |
|====================================================>|
|====================================================>|
|(5) UPDATE (metadata update 1) |
|---------------------------------------------------->|
| (6) 200 OK |
|<----------------------------------------------------|
|(7) UPDATE (metadata update 2) |
|---------------------------------------------------->|
| (8) 200 OK |
|<----------------------------------------------------|
| (9) UPDATE (metadata snapshot request) |
|<----------------------------------------------------|
| (10) 200 OK |
|---------------------------------------------------->|
| (11) INVITE (metadata snapshot 2 + SDP offer) |
|---------------------------------------------------->|
| (12) 200 OK (SDP answer) |
|<----------------------------------------------------|
| (13) UPDATE (metadata update 1 based on snapshot 2) |
|---------------------------------------------------->|
| (14) 200 OK |
|<----------------------------------------------------|
</artwork>
</figure>
</section>
<section title="Receiving recording indications and providing recording preferences">
<t>The SRC is responsible to provide recording indications to the
participants in the CS. A recording-aware UA supports receiving
recording indications via the SDP attribute a=record, and it can
specify a recording preference in the CS by including the SDP
attribute a=recordpref. The recording attribute is a declaration by
the SRC in the CS to indicate whether recording is taking place. The
recording preference attribute is a declaration by the recording-aware
UA in the CS to indicate its recording preference. A UA that does not
want to be recorded may still be notified recording is occurring for a
number of reasons (e.g., it was not capable of indicating its
preference, its preference was ignored, etc.) If this occurs, the UA's
only mechanism to avoid being recorded is to terminate its
participation in the session.</t>
<t>To illustrate how the attributes are used, if a UA (A) is
initiating a call to UA (B) and UA (A) is also an SRC that is
performing the recording, then UA (A) provides the recording
indication in the SDP offer with a=record:on. Since UA (A) is the SRC,
UA (A) receives the recording indication from the SRC directly. When
UA (B) receives the SDP offer, UA (B) will see that recording is
happening on the other endpoint of this session. Since UA (B) is not
an SRC and does not provide any recording preference, the SDP answer
does not contain a=record nor a=recordpref.</t>
<figure align="left" anchor="figure_indication"
title="Recording indication and recording preference">
<artwork align="center">
UA A UA B
(SRC) |
| |
| [SRC recording starts] |
|(1) INVITE (SDP offer + a=record:on) |
|---------------------------------------------------->|
| (2) 200 OK (SDP answer) |
|<----------------------------------------------------|
|(3) ACK |
|---------------------------------------------------->|
|(4) RTP |
|<===================================================>|
| |
| [UA B wants to set preference to no recording] |
| (5) INVITE (SDP offer + a=recordpref:off) |
|<----------------------------------------------------|
| [SRC honors the preference and stops recording] |
|(6) 200 OK (SDP answer + a=record:off) |
|---------------------------------------------------->|
| (7) ACK |
|<----------------------------------------------------|
</artwork>
</figure>
<t>After the call is established and recording is in progress, UA (B)
later decides to change the recording preference to no recording and
sends a reINVITE with the a=recordpref attribute. It is up to the SRC
to honor the preference, and in this case SRC decides to stop the
recording and updates the recording indication in the SDP answer.</t>
<t>Note that UA (B) could have explicitly indicated a recording
preference in (2), the 200 OK for the original INVITE. Indicating a
preference of no recording in an initial INVITE or an initial response
to an INVITE may reduce the chance of a user being recorded in the
first place. </t>
</section>
</section>
<section title="SIP Handling">
<section title="Procedures at the SRC">
<section title="Initiating a Recording Session">
<t>A recording session is a SIP session with specific extensions
applied, and these extensions are listed in the procedures for SRC
and SRS below. When an SRC or an SRS receives a SIP session that is
not a recording session, it is up to the SRC or the SRS to determine
what to do with the SIP session.</t>
<t>The SRC can initiate a recording session by sending a SIP INVITE
request to the SRS. The SRC and the SRS are identified in the From
and To headers, respectively.</t>
<t>The SRC MUST include the '+sip.src' feature tag in the Contact
URI, defined in this specification as an extension to <xref
target="RFC3840"/>, for all recording sessions. An SRS uses the
presence of the '+sip.src' feature tag in dialog creating and
modifying requests and responses to confirm that the dialog being
created is for the purpose of a Recording Session. In addition, when
an SRC sends a REGISTER request to a registrar, the SRC MAY include
the '+sip.src' feature tag to indicate the that it is an SRC.</t>
<t>Since SIP Caller Preferences extensions are optional to implement
for routing proxies, there is no guarantee that a recording session
will be routed to an SRC or SRS. A new options tag is introduced:
"siprec". As per <xref target="RFC3261"/>, only an SRC or an SRS can
accept this option tag in a recording session. An SRC MUST include
the "siprec" option tag in the Require header when initiating a
Recording Session so that UA's which do not support the session
recording protocol extensions will simply reject the INVITE request
with a 420 Bad Extension.</t>
<t>When an SRC receives a new INVITE, the SRC MUST only consider the
SIP session as a recording session when both the '+sip.srs' feature
tag and 'siprec' option tag are included in the INVITE request.</t>
</section>
<section title="SIP extensions for recording indication and preference">
<t>For the communication session, the SRC MUST provide recording
indications to all participants in the CS. A participant UA in a CS
can indicate that it is recording-aware by providing the
"record-aware" option tag, and the SRC MUST provide recording
indications in the new SDP a=record attribute described in the SDP
Handling section. In the absence of the "record-aware" option tag,
meaning that the participant UA is not recording-aware, an SRC MUST
provide recording indications through other means, such as playing a
tone in-band, having a signed participant contract in place,
etc.</t>
<t>An SRC in the CS may also indicate itself as a session recording
client by including the '+sip.src' feature tag. A recording-aware
participant can learn that an SRC is in the CS, and can set the
recording preference for the CS with the new SDP a=recordpref
attribute described in the SDP Handling section below.</t>
</section>
</section>
<section title="Procedures at the SRS">
<t>When an SRS receives a new INVITE, the SRS MUST only consider the
SIP session as a recording session when both the '+sip.src' feature
tag and 'siprec' option tag are included in the INVITE request.</t>
<t>The SRS can initiate a recording session by sending a SIP INVITE
request to the SRC. The SRS and the SRC are identified in the From and
To headers, respectively.</t>
<t>The SRS MUST include the '+sip.srs' feature tag in the Contact URI,
as per <xref target="RFC3840"/>, for all recording sessions. An SRC
uses the presence of this feature tag in dialog creating and modifying
requests and responses to confirm that the dialog being created is for
the purpose of a Recording Session (REQ-30). In addition, when an SRS
sends a REGISTER request to a registrar, the SRS SHOULD include the
'+sip.srs' feature tag to indicate that it is an SRS.</t>
<t>An SRS MUST include the "siprec" option tag in the Require header
as per <xref target="RFC3261"/> when initiating a Recording Session so
that UA's which do not support the session recording protocol
extensions will simply reject the INVITE request with a 420 Bad
Extension.</t>
</section>
<section title="Procedures for Recording-aware User Agents">
<t>A recording-aware user agent is a participant in the CS that
supports the SIP and SDP extensions for receiving recording
indications and for requesting recording preferences for the call. A
recording-aware UA MUST indicate that it can accept reporting of
recording indication provided by the SRC with a new option tag
"record-aware" when initiating or establishing a CS, meaning including
the "record-aware" tag in the Supported header in the initial INVITE
request or response.</t>
<t>A recording-aware UA MUST provide a recording indication to the end
user through an appropriate user interface, indicating whether
recording is on, off, or paused for each medium. Appropriate user
interfaces may include real-time notification or previously
established agreements that use of the device is subject to recording.
Some user agents that are automatons (e.g., IVR, media server, PSTN
gateway) may not have a user interface to render recording indication.
When such a user agent indicates recording awareness, the UA SHOULD
render recording indication through other means, such as passing an
in-band tone on the PSTN gateway, putting the recording indication in
a log file, or raising an application event in a VoiceXML dialog.
These user agents MAY also choose not to indicate recording awareness,
thereby relying on whatever mechanism an SRC chooses to indicate
recording, such as playing a tone in-band.</t>
</section>
</section>
<section title="SDP Handling">
<section title="Procedures at the SRC">
<t>The SRC and SRS follows the SDP offer/answer model in <xref
target="RFC3264"/>. The procedures for SRC and SRS describe the
conventions used in a recording session.</t>
<section title="SDP handling in RS">
<t>Since the SRC does not expect to receive media from the SRS, the
SRC typically sets each media stream of the SDP offer to only send
media, by qualifying them with the a=sendonly attribute, according
to the procedures in <xref target="RFC3264"/>.</t>
<t>The SRC sends recorded streams of participants to the SRS, and
the SRC MUST provide a label attribute (a=label), as per <xref
target="RFC4574"/>, on each media stream in order to identify the
recorded stream with the rest of the metadata. The a=label attribute
identifies each recorded media stream, and the label name is mapped
to the Media Stream Reference in the metadata as per <xref
target="I-D.ietf-siprec-metadata"/>. The scope of the a=label
attribute only applies to the SDP and Metadata conveyed in the
bodies of the SIP request or response that the label appeared in.
Note that a recorded stream is distinct from a CS stream; the
metadata provides a list of participants that contribute to each
recorded stream.</t>
<t>The following is an example SDP offer from an SRC with both audio
and video recorded streams. Note that the following example contains
unfolded lines longer than 72 characters. These are captured between
<allOneLine> tags.</t>
<figure align="left" anchor="figure_offer"
title="Sample SDP offer from SRC with audio and video streams">
<artwork align="left">
v=0
o=SRC 2890844526 2890844526 IN IP4 198.51.100.1
s=-
c=IN IP4 198.51.100.1
t=0 0
m=audio 12240 RTP/AVP 0 4 8
a=sendonly
a=label:1
m=video 22456 RTP/AVP 98
a=rtpmap:98 H264/90000
<allOneLine>
a=fmtp:98 profile-level-id=42A01E;
sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==
</allOneLine>
a=sendonly
a=label:2
m=audio 12242 RTP/AVP 0 4 8
a=sendonly
a=label:3
m=video 22458 RTP/AVP 98
a=rtpmap:98 H264/90000
<allOneLine>
a=fmtp:98 profile-level-id=42A01E;
sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==
</allOneLine>
a=sendonly
a=label:4
</artwork>
</figure>
<section title="Handling media stream updates">
<t>Over the lifetime of a recording session, the SRC can add and
remove recorded streams from the recording session for various
reasons. For example, when a CS stream is added or removed from
the CS, or when a CS is created or terminated if a recording
session handles multiple CSes. To remove a recorded stream from
the recording session, the SRC sends a new SDP offer where the
port of the media stream to be removed is set to zero, according
to the procedures in <xref target="RFC3264"/>. To add a recorded
stream to the recording session, the SRC sends a new SDP offer by
adding a new media stream description or by reusing an old media
stream which had been previously disabled, according to the
procedures in <xref target="RFC3264"/>.</t>
<t>The SRC can temporarily discontinue streaming and collection of
recorded media from the SRC to the SRS for reasons such as masking
the recording. In this case, the SRC sends a new SDP offer and
sets the media stream to inactive (a=inactive) for each recorded
stream to be paused, as per the procedures in <xref
target="RFC3264"/>. To resume streaming and collection of recorded
media, the SRC sends a new SDP offer and sets the media stream to
sendonly (a=sendonly). Note that a CS itself may change the media
stream direction by updating the SDP, for example, by setting
a=inactive for SDP hold. Media stream direction changes in CS are
conveyed in the metadata by the SRC. When a CS media stream is
changed to/from inactive, the effect on the corresponding RS media
stream is governed by SRC policy. The SRC MAY have a local policy
to pause an RS media stream when the corresponding CS media stream
is inactive, or it MAY leave the RS media stream as sendonly.</t>
</section>
</section>
<section title="Recording indication in CS">
<t>While there are existing mechanisms for providing an indication
that a CS is being recorded, these mechanisms are usually delivered
on the CS media streams such as playing an in-band tone or an
announcement to the participants. A new 'record' SDP attribute is
introduced to allow the SRC to indicate recording state to a
recording-aware UA in a CS.</t>
<t>The 'record' SDP attribute appears at the media-level or
session-level in either SDP offer or answer. When the attribute is
applied at the session-level, the indication applies to all media
streams in the SDP. When the attribute is applied at the
media-level, the indication applies to the media stream only, and
that overrides the indication if also set at the session-level.
Whenever the recording indication needs to change, such as
termination of recording, then the SRC MUST initiate a reINVITE or
UPDATE to update the SDP a=record attribute.</t>
<t>The following is the ABNF of the 'record' attribute:</t>
<t><figure>
<artwork>
attribute =/ record-attr
; attribute defined in RFC 4566
record-attr = "record:" indication
indication = "on" / "off" / "paused"
</artwork>
</figure></t>
<t><list style="hanging">
<t hangText="on:">Recording is in progress.</t>
<t hangText="off:">No recording is in progress.</t>
<t hangText="paused:">Recording is in progress but media is
paused.</t>
</list></t>
</section>
<section title="Recording preference in CS">
<t>When the SRC receives the a=recordpref SDP in an SDP offer or
answer, the SRC chooses to honor the preference to record based on
local policy at the SRC. If the SRC makes a change in recording
state, the SRC MUST report the new recording state in the a=record
attribute in the SDP answer or in a subsequent SDP offer.</t>
</section>
</section>
<section title="Procedures at the SRS">
<t>Typically the SRS only receives RTP streams from the SRC;
therefore, the SDP offer/answer from the SRS normally sets each media
stream to receive media, by setting them with the a=recvonly
attribute, according to the procedures of <xref target="RFC3264"/>.
When the SRS is not ready to receive a recorded stream, the SRS sets
the media stream as inactive in the SDP offer or answer by setting it
with an a=inactive attribute, according to the procedures of <xref
target="RFC3264"/>. When the SRS is ready to receive recorded streams,
the SRS sends a new SDP offer and sets the media streams with an
a=recvonly attribute.</t>
<t>The following is an example of an SDP answer from the SRS for the
SDP offer from the above sample. Note that the following example
contain unfolded lines longer than 72 characters. These are captured
between <allOneLine> tags.</t>
<figure align="left" anchor="figure_answer"
title="Sample SDP answer from SRS with audio and video streams">
<artwork align="left">
v=0
o=SRS 0 0 IN IP4 198.51.100.20
s=-
c=IN IP4 198.51.100.20
t=0 0
m=audio 10000 RTP/AVP 0
a=recvonly
a=label:1
m=video 10002 RTP/AVP 98
a=rtpmap:98 H264/90000
<allOneLine>
a=fmtp:98 profile-level-id=42A01E;
sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==
</allOneLine>
a=recvonly
a=label:2
m=audio 10004 RTP/AVP 0
a=recvonly
a=label:3
m=video 10006 RTP/AVP 98
a=rtpmap:98 H264/90000
<allOneLine>
a=fmtp:98 profile-level-id=42A01E;
sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==
</allOneLine>
a=recvonly
a=label:4
</artwork>
</figure>
<t>Over the lifetime of a recording session, the SRS can remove
recorded streams from the recording session for various reasons. To
remove a recorded stream from the recording session, the SRS sends a
new SDP offer where the port of the media stream to be removed is set
to zero, according to the procedures in <xref target="RFC3264"/>.</t>
<t>The SRS MUST NOT add recorded streams in the recording session when
the SRS sends a new SDP offer. Similarly, when the SRS starts a
recording session, the SRS MUST initiate the INVITE without an SDP
offer to let the SRC generate the SDP offer with the streams to be
recorded.</t>
<t>The following sequence diagram shows an example where the SRS is
initially not ready to receive recorded streams, and later updates the
recording session when the SRS is ready to record.</t>
<figure align="left" anchor="figure_inactive"
title="SRS responding to offer with a=inactive">
<artwork align="left">
SRC SRS
| |
|(1) INVITE (SDP offer) |
|---------------------------------------------------->|
| [not ready to record]
| (2)200 OK with SDP inactive |
|<----------------------------------------------------|
|(3) ACK |
|---------------------------------------------------->|
| ... |
| [ready to record]
| (4) re-INVITE with SDP recvonly |
|<----------------------------------------------------|
|(5)200 OK with SDP sendonly |
|---------------------------------------------------->|
| (6) ACK |
|<----------------------------------------------------|
|(7) RTP |
|====================================================>|
| ... |
|(8) BYE |
|---------------------------------------------------->|
| (9) OK |
|<----------------------------------------------------|
</artwork>
</figure>
</section>
<section title="Procedures for Recording-aware User Agents">
<section title="Recording indication">
<t>When a recording-aware UA receives an SDP offer or answer that
includes the a=record attribute, the UA provides an indication to
the end user whether the recording is on, off, or paused for each
medium based on the most recently received a=record SDP attribute
for that medium.</t>
<t>When a CS is traversed through multiple UAs such as a B2BUA or a
conference focus, each UA involved in the CS that is aware that the
CS is being recorded MUST provide the recording indication through
the a=record attribute to all other parties in the CS.</t>
<t>It is possible that more than one SRC is in the call path of the
same CS, but the recording indication attribute does not provide any
hint as to which SRC or how many SRCs are recording. An endpoint
knows only that the call is being recorded. Furthermore, this
attribute is not used as a request for a specific SRC to start/stop
recording.</t>
</section>
<section title="Recording preference">
<t>A participant in a CS MAY set the recording preference in the CS
to be recorded or not recorded at session establishment or during
the session. A new 'recordpref' SDP attribute is introduced, and the
participant in CS may set this recording preference attribute in any
SDP offer/answer at session establishment time or during the
session. The SRC is not required to honor the recording preference
from a participant based on local policies at the SRC, and the
participant can learn the recording indication through the a=record
SDP attribute as described in the above section.</t>
<t>The SDP a=recordpref attribute can appear at the media-level or
session-level and can appear in an SDP offer or answer. When the
attribute is applied at the session-level, the recording preference
applies to all media stream in the SDP. When the attribute is
applied at the media-level, the recording preference applies to the
media stream only, and that overrides the recording preference if
also set at the session-level. The user agent can change the
recording preference by changing the a=recordpref attribute in
subsequent SDP offer or answer. The absence of the a=recordpref
attribute in the SDP indicates that the UA has no recording
preference.</t>
<t>The following is the ABNF of the recordpref attribute:</t>
<t><figure>
<artwork>
attribute =/ recordpref-attr
; attribute defined in RFC 4566
recordpref-attr = "a=recordpref:" pref
pref = "on" / "off" / "pause" / "nopreference"
</artwork>
</figure></t>
<t><list style="hanging">
<t hangText="on:">Sets the preference to record if it has not
already been started. If the recording is currently paused, the
preference is to resume recording.</t>
<t hangText="off:">Sets the preference for no recording. If
recording has already been started, then the preference is to
stop the recording.</t>
<t hangText="pause:">If the recording is currently in progress,
sets the preference to pause the recording.</t>
<t hangText="nopreference:">To indicate that the UA has no
preference on recording.</t>
</list></t>
</section>
</section>
</section>
<section title="RTP Handling">
<t>This section provides recommendations and guidelines for RTP and RTCP
in the context of SIPREC. In order to communicate most effectively, the
Session Recording Client (SRC), the Session Recording Server (SRS), and
any Recording-aware User Agents (UAs) should utilize the mechanisms
provided by RTP in a well-defined and predicable manner. It is the goal
of this document to make the reader aware of these mechanisms and
provide recommendations and guidelines.</t>
<section anchor="mechanisms" title="RTP Mechanisms">
<t>This section briefly describes important RTP/RTCP constructs and
mechanisms that are particularly useful within the context of
SIPREC.</t>
<section anchor="rtcp" title="RTCP">
<t>The RTP data transport is augmented by a control protocol (RTCP)
to allow monitoring of the data delivery. RTCP, as defined in <xref
target="RFC3550"/>, is based on the periodic transmission of control
packets to all participants in the RTP session, using the same
distribution mechanism as the data packets. Support for RTCP is
REQUIRED, per <xref target="RFC3550"/>, and it provides, among other
things, the following important functionality in relation to
SIPREC:</t>
<t>1) Feedback on the quality of the data distribution</t>
<t>This feedback from the receivers may be used to diagnose faults
in the distribution. As such, RTCP is a well-defined and efficient
mechanism for the SRS to inform the SRC, and for the SRC to inform
Recording-aware UAs, of issues that arise with respect to the
reception of media that is to be recorded.</t>
<t>2) Carries a persistent transport-level identifier for an RTP
source called the canonical name or CNAME</t>
<t>The SSRC identifier may change if a conflict is discovered or a
program is restarted, in which case receivers can use the CNAME to
keep track of each participant. Receivers may also use the CNAME to
associate multiple data streams from a given participant in a set of
related RTP sessions, for example to synchronize audio and video.
Synchronization of media streams is also facilitated by the NTP and
RTP timestamps included in RTCP packets by data senders.</t>
</section>
<section anchor="rtp_profile" title="RTP Profile">
<t>The RECOMMENDED RTP profiles for the SRC, SRS, and
Recording-aware UAs are "Extended Secure RTP Profile for Real-time
Transport Control Protocol (RTCP)-Based Feedback (RTP/SAVPF)" <xref
target="RFC5124"/>, when using encrypted RTP streams, and "Extended
RTP Profile for Real-time Transport Control Protocol (RTCP)-Based
Feedback (RTP/AVPF)" <xref target="RFC4585"/>, when using
non-encrypted media streams. However, as these are not requirements,
some implementations may use "The Secure Real-time Transport
Protocol (SRTP)" <xref target="RFC3711"/>, and "RTP Profile for
Audio and Video Conferences with Minimal Control" <xref
target="RFC3551"/>. Therefore, it is RECOMMENDED that the SRC, SRS,
and Recording-aware UAs not rely entirely on RTP/SAVPF or RTP/AVPF
for core functionality that may be at least partially achievable
using RTP/SAVP and RTP/AVP.</t>
<t>AVPF and SAVPF provide an improved RTCP timer model that allows
more flexible transmission of RTCP packets in response to events,
rather than strictly according to bandwidth. AVPF-based codec
control messages provide efficient mechanisms for an SRC, SRS, and
Recording-aware UAs to handle events such as scene changes, error
recovery, and dynamic bandwidth adjustments. These messages are
discussed in more detail later in this document.</t>
<t>SAVP and SAVPF provide media encryption, integrity protection,
replay protection, and a limited form of source authentication. They
do not contain or require a specific keying mechanism.</t>
</section>
<section anchor="ssrc" title="SSRC">
<t>The synchronization source (SSRC), as defined in <xref
target="RFC3550"/>, is carried in the RTP header and in various
fields of RTCP packets. It is a random 32-bit number that is
required to be globally unique within an RTP session. It is crucial
that the number be chosen with care in order that participants on
the same network or starting at the same time are not likely to
choose the same number. Guidelines regarding SSRC value selection
and conflict resolution are provided in <xref
target="RFC3550"/>.</t>
<t>The SSRC may also be used to separate different sources of media
within a single RTP session. For this reason as well as for conflict
resolution, it is important that the SRC, SRS, and Recording-aware
UAs handle changes in SSRC values and properly identify the reason
of the change. The CNAME values carried in RTCP facilitate this
identification.</t>
</section>
<section anchor="csrc" title="CSRC">
<t>The contributing source (CSRC), as defined in <xref
target="RFC3550"/>, identifies the source of a stream of RTP packets
that has contributed to the combined stream produced by an RTP
mixer. The mixer inserts a list of the SSRC identifiers of the
sources that contributed to the generation of a particular packet
into the RTP header of that packet. This list is called the CSRC
list. It is RECOMMENDED that an SRC or Recording-aware UA, when
acting as a mixer, set the CSRC list accordingly, and that the SRC
and SRS interpret the CSRC list per <xref target="RFC3550"/> when
received.</t>
</section>
<section anchor="sdes" title="SDES">
<t>The Source Description (SDES), as defined in <xref
target="RFC3550"/>, contains an SSRC/CSRC identifier followed by a
list of zero or more items, which carry information about the
SSRC/CSRC. End systems send one SDES packet containing their own
source identifier (the same as the SSRC in the fixed RTP header). A
mixer sends one SDES packet containing a chunk for each contributing
source from which it is receiving SDES information, or multiple
complete SDES packets if there are more than 31 such sources.</t>
<t>The ability to identify individual contributing sources is
important in the context of SIPREC. Metadata <xref
target="I-D.ietf-siprec-metadata"/> provides a mechanism to achieve
this at the signaling level. SDES provides a mechanism at the RTP
level.</t>
<section anchor="cname" title="CNAME">
<t>The Canonical End-Point Identifier (CNAME), as defined in <xref
target="RFC3550"/>, provides the binding from the SSRC identifier
to an identifier for the source (sender or receiver) that remains
constant. It is important the SRC and Recording-aware UAs generate
CNAMEs appropriately and that the SRC and SRS interpret and use
them for this purpose. Guidelines for generating CNAME values are
provided in "Guidelines for Choosing RTP Control Protocol (RTCP)
Canonical Names (CNAMEs)" <xref target="RFC7022"/>.</t>
</section>
</section>
<section anchor="keepalive" title="Keepalive">
<t>It is anticipated that media streams in SIPREC may exist in an
inactive state for extended periods of times for any of a number of
valid reasons. In order for the bindings and any pinholes in
NATs/firewalls to remain active during such intervals, it is
RECOMMENDED that the SRC, SRS, and Recording-aware UAs follow the
keep-alive procedure recommended in "Application Mechanism for
Keeping Alive the NAT Mappings Associated to RTP/RTP Control
Protocol (RTCP) Flows" <xref target="RFC6263"/> for all RTP media
streams.</t>
</section>
<section anchor="feedback" title="RTCP Feedback Messages">
<t>"Codec Control Messages in the RTP Audio-Visual Profile with
Feedback (AVPF)" <xref target="RFC5104"/> specifies extensions to
the messages defined in AVPF <xref target="RFC4585"/>. Support for
and proper usage of these messages is important to SRC, SRS, and
Recording-aware UA implementations. Note that these messages are
applicable only when using the AVPF or SAVPF RTP profiles</t>
<section anchor="fir" title="Full Intra Request">
<t>A Full Intra Request (FIR) Command, when received by the
designated media sender, requires that the media sender sends a
Decoder Refresh Point at the earliest opportunity. Using a decoder
refresh point implies refraining from using any picture sent prior
to that point as a reference for the encoding process of any
subsequent picture sent in the stream.</t>
<t>Decoder refresh points, especially Intra or IDR pictures for
H.264 video codecs, are in general several times larger in size
than predicted pictures. Thus, in scenarios in which the available
bit rate is small, the use of a decoder refresh point implies a
delay that is significantly longer than the typical picture
duration.</t>
<section anchor="sip_info" title="SIP INFO for FIR">
<t>"XML Schema for Media Control" <xref target="RFC5168"/>
defines an Extensible Markup Language (XML) Schema for video
fast update. Implementations are discouraged from using the
method described except for backward compatibility purposes.
Implementations SHOULD use FIR messages instead.</t>
<t>To make sure a common mechanism exists between the SRC and
SRS, the SRS MUST support both mechanisms (FIR and SIP INFO),
using FIR when negotiated successfully with the SRC, and using
SIP INFO otherwise.</t>
</section>
</section>
<section anchor="pli" title="Picture Loss Indicator">
<t>Picture Loss Indication (PLI), as defined in <xref
target="RFC4585"/>, informs the encoder of the loss of an
undefined amount of coded video data belonging to one or more
pictures. <xref target="RFC4585"/> recommends using PLI instead of
FIR to recover from errors. FIR is appropriate only in situations
where not sending a decoder refresh point would render the video
unusable for the users. Examples where sending FIR is appropriate
include a multipoint conference when a new user joins the
conference and no regular decoder refresh point interval is
established, and a video switching MCU that changes streams.</t>
<t>Appropriate use of PLI and FIR is important to ensure with
minimum overhead that the recorded video is usable (e.g., the
necessary reference frames exist for a player to render the
recorded video).</t>
</section>
<section anchor="tmmbr"
title="Temporary Maximum Media Stream Bit Rate Request">
<t>A receiver, translator, or mixer uses the Temporary Maximum
Media Stream Bit Rate Request (TMMBR) to request a sender to limit
the maximum bit rate for a media stream to the provided value.
Appropriate use of TMMBR facilitates rapid adaptation to changes
in available bandwidth.</t>
<section anchor="bandwidth"
title="Renegotiation of SDP bandwidth attribute">
<t>If it is likely that the new value indicated by TMMBR will be
valid for the remainder of the session, the TMMBR sender is
expected to perform a renegotiation of the session upper limit
using the session signaling protocol. Therefore for SIPREC,
implementations are RECOMMENDED to use TMMBR for temporary
changes, and renegotiation of bandwidth via SDP offer/answer for
more permanent changes.</t>
</section>
</section>
</section>
<section anchor="symmetric_rtp"
title="Symmetric RTP/RTCP for Sending and Receiving">
<t>Within an SDP offer/answer exchange, RTP entities choose the RTP
and RTCP transport addresses (i.e., IP addresses and port numbers)
on which to receive packets. When sending packets, the RTP entities
may use the same source port or a different source port as those
signaled for receiving packets. When the transport address used to
send and receive RTP is the same, it is termed "symmetric RTP" <xref
target="RFC4961"/>. Likewise, when the transport address used to
send and receive RTCP is the same, it is termed "symmetric RTCP"
<xref target="RFC4961"/>.</t>
<t>When sending RTP, it is REQUIRED to use symmetric RTP. When
sending RTCP, it is REQUIRED to use symmetric RTCP. Although an SRS
will not normally send RTP, it will send RTCP as well as receive RTP
and RTCP. Likewise, although an SRC will not normally receive RTP
from the SRS, it will receive RTCP as well as send RTP and RTCP.</t>
<t><list style="empty">
<t>Note: Symmetric RTP and symmetric RTCP are different from
RTP/RTCP multiplexing <xref target="RFC5761"/>.</t>
</list></t>
</section>
</section>
<section anchor="roles" title="Roles">
<t>An SRC has the task of gathering media from the various UAs in one
or more Communication Sessions (CSs) and forwarding the information to
the SRS within the context of a corresponding Recording Session (RS).
There are numerous ways in which an SRC may do this, including but not
limited to appearing as a UA within a CS, or as a B2BUA between UAs
within a CS.</t>
<t><figure align="left" anchor="ua_src" title="UA as SRC">
<artwork align="center">
(Recording Session) +---------+
+------------SIP------->| |
| +------RTP/RTCP----->| SRS |
| | +-- Metadata -->| |
| | | +---------+
v v |
+---------+
| SRC |
|---------| (Communication Session) +---------+
| |<----------SIP---------->| |
| UA-A | | UA-B |
| |<-------RTP/RTCP-------->| |
+---------+ +---------+
</artwork>
</figure></t>
<t><figure align="left" anchor="b2bua_src" title="B2BUA as SRC">
<artwork align="center">
(Recording Session) +---------+
+------------SIP------->| |
| +------RTP/RTCP----->| SRS |
| | +-- Metadata -->| |
| | | +---------+
v v |
+---------+
| SRC |
+---------+ |---------| +---------+
| |<----SIP----->| |<----SIP----->| |
| UA-A | | B2BUA | | UA-B |
| |<--RTP/RTCP-->| |<--RTP/RTCP-->| |
+---------+ +---------+ +---------+
|_______________________________________________|
(Communication Session)
</artwork>
</figure></t>
<t>The following subsections define a set of roles an SRC may choose
to play based on its position with respect to a UA within a CS, and an
SRS within an RS. A CS and a corresponding RS are independent
sessions; therefore, an SRC may play a different role within a CS than
it does within the corresponding RS.</t>
<section title="SRC acting as an RTP Translator">
<t>The SRC may act as a translator, as defined in <xref
target="RFC3550"/>. A defining characteristic of a translator is
that it forwards RTP packets with their SSRC identifier intact.
There are two types of translators, one that simply forwards, and
another that performs transcoding (e.g., from one codec to another)
in addition to forwarding.</t>
<section title="Forwarding Translator">
<t>When acting as a forwarding translator, RTP received as
separate streams from different sources (e.g., from different UAs
with different SSRCs) cannot be mixed by the SRC and MUST be sent
separately to the SRS. All RTCP reports MUST be passed by the SRC
between the UAs and the SRS, such that the UAs and SRS are able to
detect any SSRC collisions.</t>
<t>RTCP Sender Reports generated by a UA sending a stream MUST be
forwarded to the SRS. RTCP Receiver Reports generated by the SRS
MUST be forwarded to the relevant UA.</t>
<t>UAs may receive multiple sets of RTCP Receiver Reports, one or
more from other UAs participating in the CS, and one from the SRS
participating in the RS. A UA SHOULD process the RTCP Receiver
Reports from the SRS if it is recording-aware.</t>
<t>If SRTP is used on both the CS and the RS, decryption and/or
re-encryption may occur. For example, if different keys are used,
it will occur. If the same keys are used, it need not occur. <xref
target="Security"/> provides additional information on SRTP and
keying mechanisms.</t>
<t>If packet loss occurs, either from the UA to the SRC or from
the SRC to the SRS, the SRS SHOULD detect and attempt to recover
from the loss. The SRC does not play a role in this other than
forwarding the associated RTP and RTCP packets.</t>
</section>
<section title="Transcoding Translator">
<t>When acting as a transcoding translator, an SRC MAY perform
transcoding (e.g., from one codec to another), and this may result
in a different rate of packets between what the SRC receives on
the CS and what the SRC sends on the RS. As when acting as a
forwarding translator, RTP received as separate streams from
different sources (e.g., from different UAs with different SSRCs)
cannot be mixed by the SRC and MUST be sent separately to the SRS.
All RTCP reports MUST be passed by the SRC between the UAs and the
SRS, such that the UAs and SRS are able to detect any SSRC
collisions.</t>
<t>RTCP Sender Reports generated by a UA sending a stream MUST be
forwarded to the SRS. RTCP Receiver Reports generated by the SRS
MUST be forwarded to the relevant UA. The SRC may need to
manipulate the RTCP Receiver Reports to take account of any
transcoding that has taken place.</t>
<t>UAs may receive multiple sets of RTCP Receiver Reports, one or
more from other UAs participating in the CS, and one from the SRS
participating in the RS. A Recording-aware UA SHOULD be prepared
to process the RTCP Receiver Reports from the SRS, whereas a
recording unaware UA may discard such RTCP packets as not of
relevance.</t>
<t>If SRTP is used on both the CS and the RS, decryption and/or
re-encryption may occur. For example, if different keys are used,
it will occur. If the same keys are used, it need not occur. <xref
target="Security"/> provides additional information on SRTP and
keying mechanisms.</t>
<t>If packet loss occurs, either from the UA to the SRC or from
the SRC to the SRS, the SRS SHOULD detect and attempt to recover
from the loss. The SRC does not play a role in this other than
forwarding the associated RTP and RTCP packets.</t>
</section>
</section>
<section title="SRC acting as an RTP Mixer">
<t>In the case of the SRC acting as a RTP mixer, as defined in <xref
target="RFC3550"/>, the SRC combines RTP streams from different UAs
and sends them towards the SRS using its own SSRC. The SSRCs from
the contributing UA SHOULD be conveyed as CSRCs identifiers within
this stream. The SRC may make timing adjustments among the received
streams and generate its own timing on the stream sent to the SRS.
Optionally an SRC acting as a mixer can perform transcoding, and can
even cope with different codings received from different UAs. RTCP
Sender Reports and Receiver Reports are not forwarded by an SRC
acting as mixer, but there are requirements for forwarding RTCP
Source Description (SDES) packets. The SRC generates its own RTCP
Sender and Receiver reports toward the associated UAs and SRS.</t>
<t>The use of SRTP between the SRC and the SRS for the RS is
independent of the use of SRTP between the UAs and SRC for the CS.
<xref target="Security"/> provides additional information on SRTP
and keying mechanisms.</t>
<t>If packet loss occurs from the UA to the SRC, the SRC SHOULD
detect and attempt to recover from the loss. If packet loss occurs
from the SRC to the SRS, the SRS SHOULD detect and attempt to
recover from the loss.</t>
</section>
<section title="SRC acting as an RTP Endpoint">
<t>The case of the SRC acting as an RTP endpoint, as defined in
<xref target="RFC3550"/>, is similar to the mixer case, except that
the RTP session between the SRC and the SRS is considered completely
independent from the RTP session that is part of the CS. The SRC
can, but need not, mix RTP streams from different participants prior
to sending to the SRS. RTCP between the SRC and the SRS is
completely independent of RTCP on the CS.</t>
<t>The use of SRTP between the SRC and the SRS for the RS is
independent of the use of SRTP between the UAs and SRC for the CS.
<xref target="Security"/> provides additional information on SRTP
and keying mechanisms.</t>
<t>If packet loss occurs from the UA to the SRC, the SRC SHOULD
detect and attempt to recover from the loss. If packet loss occurs
from the SRC to the SRS, the SRS SHOULD detect and attempt to
recover from the loss.</t>
</section>
</section>
<section title="RTP Session Usage by SRC">
<t>There are multiple ways that an SRC may choose to deliver recorded
media to an SRS. In some cases, it may use a single RTP session for
all media within the RS, whereas in others it may use multiple RTP
sessions. The following subsections provide examples of basic RTP
session usage by the SRC, including a discussion of how the RTP
constructs and mechanisms covered previously are used. An SRC may
choose to use one or more of the RTP session usages within a single
RS. For the purpose of base interoperability between SRC and SRS, an
SRC MUST support separate m-lines in SDP, one per CS media direction.
The set of RTP session usages described is not meant to be
exhaustive.</t>
<section title="SRC Using Multiple m-lines">
<t>When using multiple m-lines, an SRC includes each m-line in an
SDP offer to the SRS. The SDP answer from the SRS MUST include all
m-lines, with any rejected m-lines indicated with a zero port, per
<xref target="RFC3264"/>. Having received the answer, the SRC starts
sending media to the SRS as indicated in the answer. Alternatively,
if the SRC deems the level of support indicated in the answer to be
unacceptable, it may initiate another SDP offer/answer exchange in
which an alternative RTP session usage is negotiated.</t>
<t>In order to preserve the mapping of media to participant within
the CSs in the RS, the SRC SHOULD map each unique CNAME within the
CSs to a unique CNAME within the RS. Additionally, the SRC SHOULD
map each unique combination of CNAME/SSRC within the CSs to a unique
CNAME/SSRC within the RS. In doing so, the SRC may act as an RTP
translator or as an RTP endpoint.</t>
<t>The following figure illustrates a case in which each UA
represents a participant contributing two RTP sessions (e.g., one
for audio and one for video), each with a single SSRC. The SRC acts
as an RTP translator and delivers the media to the SRS using four
RTP sessions, each with a single SSRC. The CNAME and SSRC values
used by the UAs within their media streams are preserved in the
media streams from the SRC to the SRS.</t>
<t><figure align="left" anchor="m_lines"
title="SRC Using Multiple m-lines">
<artwork align="center">
+---------+
+------------SSRC Aa--->| |
| + --------SSRC Av--->| |
| | +------SSRC Ba--->| SRS |
| | | +---SSRC Bv--->| |
| | | | +---------+
| | | |
| | | |
+---------+ +----------+ +---------+
| |---SSRC Aa-->| SRC |<--SSRC Ba---| |
| UA-A | |(CNAME-A, | | UA-B |
|(CNAME-A)|---SSRC Av-->| CNAME-B) |<--SSRC Bv---|(CNAME-B)|
+---------+ +----------+ +---------+
</artwork>
</figure></t>
</section>
<section title="SRC Using Mixing">
<t>When using mixing, the SRC combines RTP streams from different
participants and sends them towards the SRS using its own SSRC. The
SSRCs from the contributing participants SHOULD be conveyed as CSRCs
identifiers. The SRC includes one m-line for each RTP session in an
SDP offer to the SRS. The SDP answer from the SRS MUST include all
m-lines, with any rejected m-lines indicated with the zero port, per
<xref target="RFC3264"/>. Having received the answer, the SRC starts
sending media to the SRS as indicated in the answer.</t>
<t>In order to preserve the mapping of media to participant within
the CSs in the RS, the SRC SHOULD map each unique CNAME within the
CSs to a unique CNAME within the RS. Additionally, the SRC SHOULD
map each unique combination of CNAME/SSRC within the CSs to a unique
CNAME/SSRC within the RS. The SRC MUST avoid SSRC collisions,
rewriting SSRCs if necessary when used as CSRCs in the RS. In doing
so, the SRC acts as an RTP mixer.</t>
<t>In the event the SRS does not support this usage of CSRC values,
it relies entirely on the SIPREC metadata to determine the
participants included within each mixed stream.</t>
<t>The following figure illustrates a case in which each UA
represents a participant contributing two RTP sessions (e.g., one
for audio and one for video), each with a single SSRC. The SRC acts
as an RTP mixer and delivers the media to the SRS using two RTP
sessions, mixing media from each participant into a single RTP
session containing a single SSRC and two CSRCs.</t>
<t><figure align="left" anchor="mixing" title="SRC Using Mixing">
<artwork align="center">
SSRC Sa +---------+
+-------CSRC Aa,Ba--->| |
| | |
| SSRC Sv | SRS |
| +---CSRC Av,Bv--->| |
| | +---------+
| |
+----------+
+---------+ | SRC | +---------+
| |---SSRC Aa-->|(CNAME-S, |<--SSRC Ba---| |
| UA-A | | CNAME-A, | | UA-B |
|(CNAME-A)|---SSRC Av-->| CNAME-B) |<--SSRC Bv---|(CNAME-B)|
+---------+ +----------+ +---------+
</artwork>
</figure></t>
</section>
</section>
<section title="RTP Session Usage by SRS">
<t>An SRS that supports recording an audio CS MUST support SRC usage
of separate audio m-lines in SDP, one per CS media direction. An SRS
that supports recording a video CS MUST support SRC usage of separate
video m-lines in SDP, one per CS media direction. Therefore, for an
SRS supporting a typical audio call, the SRS has to support receiving
at least two audio m-lines. For an SRS supporting a typical audio and
video call, the SRS has to support receiving at least four total
m-lines in the SDP, two audio m-lines and two video m-lines.</t>
<t>These requirements allow an SRS to be implemented that supports
video only, without requiring support for audio recording. They also
allow an SRS to be implemented that supports recording only one
direction of one stream in a CS; for example, an SRS designed to
record security monitoring cameras that only send (not receive) video
without any audio. These requirements were not written to prevent
other modes being implemented and used, such as using a single m-line
and mixing the separate audio streams together. Rather, the
requirements were written to provide a common base mode to implement
for the sake of interoperability. It is important to note that an SRS
implementation supporting the common base may not record all media
streams in a CS if a participant supports more than one m-line in a
video call, such as one for camera and one for presentation. SRS
implementations may support other modes as well, but have to at least
support the ones above such that they interoperate in the common base
mode for basic interoperability.</t>
</section>
</section>
<section title="Metadata">
<t>Some metadata attributes are contained in SDP, and others are
contained in a new content type "application/rs-metadata". The format of
the metadata is described as part of the mechanism in <xref
target="I-D.ietf-siprec-metadata"/>. A new "disposition-type" of
Content-Disposition is defined for the purpose of carrying metadata. The
value is "recording-session", which indicates the
"application/rs-metadata" content contains metadata to be handled by the
SRS.</t>
<section title="Procedures at the SRC">
<t>The SRC MUST send metadata to the SRS in an RS. The SRC SHOULD send
metadata as soon as it becomes available and whenever it changes.
Cases in which an SRC may be justified in waiting temporarily before
sending metadata include:<list style="symbols">
<t>waiting for a previous metadata exchange to complete (i.e., the
SRC cannot send another SDP offer until the previous offer/answer
completes, and may prefer not to send an UPDATE during this time
either).</t>
<t>constraining the signaling rate on the RS.</t>
<t>sending metadata when key events occur rather than for every
event that has any impact on metadata.</t>
</list></t>
<t>The SRC may also be configured to suppress certain metadata out of
concern for privacy or perceived lack of need for it to be included in
the recording.</t>
<t>Metadata sent by the SRC is categorized as either a full metadata
snapshot or a partial update. A full metadata snapshot describes all
metadata associated with the RS. The SRC MAY send a full metadata
snapshot at any time. The SRC MAY send a partial update only if a full
metadata snapshot has been sent previously.</t>
<t>The SRC MAY send metadata (either a full metadata snapshot or a
partial update) in an INVITE request, an UPDATE request <xref
target="RFC3311"/>, or a 200 response to an offerless INVITE from the
SRS. If the metadata contains a reference to any SDP labels, the
request containing the metadata MUST also contain an SDP offer that
defines those labels.</t>
<t>When a SIP message contains both an SDP offer and metadata, the
request body MUST have content type "multipart/mixed", with one
subordinate body part containing the SDP offer and another containing
the metadata. When a SIP message contains only an SDP offer or
metadata, the "multipart/mixed" container is optional.</t>
<t>The SRC SHOULD include a full metadata snapshot in the initial
INVITE request establishing the RS. If metadata is not yet available
(e.g., an RS established in absence of a CS), the SRC SHOULD send a
full metadata snapshot as soon as metadata becomes available.</t>
<t>If the SRC receives a snapshot request from the SRS, it MUST
immediately send a full metadata snapshot.</t>
<t>The following is an example of a full metadata snapshot sent by the
SRC in the initial INVITE request:</t>
<figure align="left" anchor="figure_invite"
title="Sample INVITE request for the recording session">
<artwork align="left">
INVITE sip:recorder@example.com SIP/2.0
Via: SIP/2.0/TCP src.example.com;branch=z9hG4bKdf6b622b648d9
From: <sip:2000@example.com>;tag=35e195d2-947d-4585-946f-09839247
To: <sip:recorder@example.com>
Call-ID: d253c800-b0d1ea39-4a7dd-3f0e20a
CSeq: 101 INVITE
Max-Forwards: 70
Require: siprec
Accept: application/sdp, application/rs-metadata-request
Contact: <sip:2000@src.example.com>;+sip.src
Content-Type: multipart/mixed;boundary=foobar
Content-Length: [length]
--foobar
Content-Type: application/sdp
v=0
o=SRS 2890844526 2890844526 IN IP4 198.51.100.1
s=-
c=IN IP4 198.51.100.1
t=0 0
m=audio 12240 RTP/AVP 0 4 8
a=sendonly
a=label:1
--foobar
Content-Type: application/rs-metadata
Content-Disposition: recording-session
[metadata content]
</artwork>
</figure>
</section>
<section title="Procedures at the SRS">
<t>The SRS receives metadata updates from the SRC in INVITE and UPDATE
requests. Since the SRC can send partial updates based on the previous
update, the SRS needs to keep track of the sequence of updates from
the SRC.</t>
<t>In the case of an internal failure at the SRS, the SRS may fail to
recognize a partial update from the SRC. The SRS may be able to
recover from the internal failure by requesting a full metadata
snapshot from the SRC. Certain errors, such as syntax errors or
semantic errors in the metadata information, are likely caused by an
error on the SRC side, and it is likely the same error will occur
again even when a full metadata snapshot is requested. In order to
avoid repeating the same error, the SRS can simply terminate the
recording session when a syntax error or semantic error is detected in
the metadata.</t>
<t>The SRS MAY explicitly request a full metadata snapshot by sending
an UPDATE request. This request MUST contain a body with content
disposition type "recording-session", and MUST NOT contain an SDP
body. The SRS MUST NOT request a full metadata snapshot in an UPDATE
response or in any other SIP transaction. The format of the content is
"application/rs-metadata-request", and the body format is a simple
text-based format. The following shows an example:</t>
<figure align="left" anchor="figure_metadatareq"
title="Metadata Request">
<artwork align="left">
UPDATE sip:2000@src.exmaple.com SIP/2.0
Via: SIP/2.0/UDP srs.example.com;branch=z9hG4bKdf6b622b648d9
To: <sip:2000@exmaple.com>;tag=35e195d2-947d-4585-946f-098392474
From: <sip:recorder@example.com>;tag=1234567890
Call-ID: d253c800-b0d1ea39-4a7dd-3f0e20a
CSeq: 1 UPDATE
Max-Forwards: 70
Require: siprec
Contact: <sip:recorder@srs.example.com>;+sip.srs
Accept: application/sdp, application/rs-metadata
Content-Disposition: recording-session
Content-Type: application/rs-metadata-request
Content-Length: [length]
SRS internal error
</artwork>
</figure>
<t>Note that UPDATE was chosen for the SRS to request metadata
snapshot because it can be sent regardless of the state of the dialog.
This was seen as better than requiring support for both UPDATE and
re-INVITE for this operation.</t>
<t>When the SRC receives a request for a metadata snapshot, it MUST
immediately provide a full metadata snapshot in a separate INVITE or
UPDATE transaction. Any subsequent partial updates will not be
dependent on any metadata sent prior to this full metadata
snapshot.</t>
<t>The metadata received by the SRS can contain ID elements used to
cross reference one element to another. An element containing the
definition of an ID, and an element containing a reference to that ID
will often be received from the same SRC. It is also valid for those
elements to be received from different SRCs, for example, when each
endpoint in the same CS act as an SRC to record the call and a common
ID refers to the same CS. The SRS MUST NOT consider this an error.</t>
<section title="Formal Syntax">
<t>The formal syntax for the application/rs-metadata-request MIME is
described below using the Augmented Backus-Naur Form (ABNF) as
described in <xref target="RFC5234"/>.</t>
<t><figure>
<artwork>
snapshot-request = srs-reason-line CRLF
srs-reason-line = [TEXT-UTF8-TRIM]
; TEXT-UTF8-TRIM defined in RFC 3261
</artwork>
</figure></t>
</section>
</section>
</section>
<section title="Persistent Recording">
<t>Persistent recording is a specific use case outlined in REQ-005 or
Use Case 4 in <xref target="RFC6341"/>, where a recording session can be
established in the absence of a communication session. The SRC
continuously records media in a recording session to the SRS even in the
absence of a CS for all user agents that are part of persistent
recording. By allocating recorded streams and continuously sending
recorded media to the SRS, the SRC does not have to prepare new recorded
streams with a new SDP offer when a new communication session is created
and also does not impact the timing of the CS. The SRC only needs to
update the metadata when new communication sessions are created.</t>
<t>When there is no communication session running on the devices with
persistent recording, there is no recorded media to stream from the SRC
to the SRS. In certain environments where Network Address Translator
(NAT) is used, typically a minimum of flow activity is required to
maintain the NAT binding for each port opened. Agents that support
Interactive Connectivity Establishment (ICE) solve this problem. For
non-ICE agents, in order not to lose the NAT bindings for the RTP/RTCP
ports opened for the recorded streams, the SRC and SRS SHOULD follow the
recommendations provided in <xref target="RFC6263"/> to maintain the NAT
bindings.</t>
</section>
<section anchor="iana" title="IANA Considerations">
<section title="Registration of Option Tags">
<t>This specification registers two option tags. The required
information for this registration, as specified in <xref
target="RFC3261"/>, is as follows.</t>
<section title="siprec Option Tag">
<t><list>
<t>Name: siprec</t>
<t>Description: This option tag is for identifying that the SIP
session is for the purpose of a recording session. This is
typically not used in a Supported header. When present in a
Require header in a request, it indicates that the UA is either
an SRC or SRS capable of handling a recording session.</t>
</list></t>
</section>
<section title="record-aware Option Tag">
<t><list>
<t>Name: record-aware</t>
<t>Description: This option tag is to indicate the ability for
the user agent to receive recording indicators in media-level or
session-level SDP. When present in a Supported header, it
indicates that the UA can receive recording indicators in
media-level or session-level SDP.</t>
</list></t>
</section>
</section>
<section title="Registration of media feature tags">
<t>This document registers two new media feature tags in the SIP tree
per the process defined in <xref target="RFC2506"/> and <xref
target="RFC3840"/></t>
<section title="src feature tag">
<t><list>
<t>Media feature tag name: sip.src</t>
<t>ASN.1 Identifier: TBD at registration</t>
<t>Summary of the media feature indicated by this tag: This
feature tag indicates that the user agent is a Session Recording
Client for the purpose of a Recording Session.</t>
<t>Values appropriate for use with this feature tag: boolean</t>
<t>The feature tag is intended primarily for use in the
following applications, protocols, services, or negotiation
mechanisms: This feature tag is only useful for a Recording
Session.</t>
<t>Examples of typical use: Routing the request to a Session
Recording Server.</t>
<t>Security Considerations: Security considerations for this
media feature tag are discussed in Section 11.1 of RFC 3840.</t>
</list></t>
</section>
<section title="srs feature tag">
<t><list>
<t>Media feature tag name: sip.srs</t>
<t>ASN.1 Identifier: TBD at registration</t>
<t>Summary of the media feature indicated by this tag: This
feature tag indicates that the user agent is a Session Recording
Server for the purpose of a Recording Session.</t>
<t>Values appropriate for use with this feature tag: boolean</t>
<t>The feature tag is intended primarily for use in the
following applications, protocols, services, or negotiation
mechanisms: This feature tag is only useful for a Recording
Session.</t>
<t>Examples of typical use: Routing the request to a Session
Recording Client.</t>
<t>Security Considerations: Security considerations for this
media feature tag are discussed in Section 11.1 of RFC 3840.</t>
</list></t>
</section>
</section>
<section title="New Content-Disposition Parameter Registrations">
<t>This document registers a new "disposition-type" value in
Content-Disposition header: recording-session.<list style="hanging">
<t hangText="recording-session:">The body describes either:<list
style="symbols">
<t>metadata about the recording session</t>
<t>reason for metadata snapshot request</t>
</list></t>
<t>as determined by the MIME value indicated in the
Content-Type.</t>
</list></t>
</section>
<section anchor="MIME-reg" title="Media Type Registration">
<section title="Registration of MIME Type application/rs-metadata-request">
<t>This document registers the application/rs-metadata-request MIME
media type in order to describe a recording session metadata
snapshot request. This media type is defined by the following
information:</t>
<t>Media type name: application</t>
<t>Media subtype name: rs-metadata-request</t>
<t>Required parameters: none</t>
<t>Options parameters: none</t>
</section>
</section>
<section title="SDP Attributes">
<t>This document registers the following new SDP attributes.</t>
<section title="'record' SDP Attribute">
<t>Contact names: Leon Portman leon.portman@gmail.com, Henry Lum
henry.lum@genesyslab.com</t>
<t>Attribute name: record</t>
<t>Long form attribute name: Recording Indication</t>
<t>Type of attribute: session or media-level</t>
<t>Subject to charset: no</t>
<t>This attribute provides the recording indication for the session
or media stream.</t>
<t>Allowed attribute values: on, off, paused</t>
</section>
<section title="'recordpref' SDP Attribute">
<t>Contact names: Leon Portman leon.portman@nice.com, Henry Lum
henry.lum@genesyslab.com</t>
<t>Attribute name: recordpref</t>
<t>Long form attribute name: Recording Preference</t>
<t>Type of attribute: session or media-level</t>
<t>Subject to charset: no</t>
<t>This attribute provides the recording preference for the session
or media stream.</t>
<t>Allowed attribute values: on, off, pause, nopreference</t>
</section>
</section>
</section>
<section anchor="Security" title="Security Considerations">
<t>The recording session is fundamentally a standard SIP dialog <xref
target="RFC3261"/>; therefore, the recording session can reuse any of
the existing SIP security mechanisms available for securing the session
signaling, the recorded media, and the metadata. The use cases and
requirements document <xref target="RFC6341"/> outlines the general
security considerations, and this document describes specific security
recommendations.</t>
<t>The SRC and SRS MUST support SIP with TLS version 1.2, SHOULD follow
the best practices when using TLS as per <xref target="RFC7525"/>, and
MAY use SIPS with TLS as per <xref target="RFC5630"/>. The Recording
Session SHOULD be at least as secure as the Communication Session,
meaning using at least the same strength of cipher suite as the CS if
the CS is secured. For example, if the CS uses SIPS for signaling and
RTP/SAVP for media, then the RS SHOULD NOT downgrade the level of
security in the RS to SIP or plain RTP since doing so will mean an
effective security downgrade for the CS. In deployments where the SRC
and the SRS are in the same administrative domain and the same physical
switch that prevents outside user access, some SRCs may choose to lower
the level of security when establishing a recording session. While
physically securing the SRC and SRS may prevent an outside attacker from
accessing important call recordings, this still does not prevent an
inside attacker from accessing the internal network to gain access to
the call recordings.</t>
<section title="Authentication and Authorization">
<t>At the transport level, the recording session uses TLS
authentication to validate the authenticity of the SRC and SRS. The
SRC and SRS MUST implement TLS mutual authentication for establishing
the recording session. Whether the SRC/SRS chooses to use TLS mutual
authentication is a deployment decision. In deployments where a UA
acts as its own SRC, this requires the UA have its own certificate as
needed for TLS mutual authentication. In deployments where the SRC and
the SRS are in the same administrative domain and have some other
means of assuring authenticity, the SRC and SRS may choose not to
authenticate each other, or to have the SRC authenticate the SRS only.
In deployments where the SRS can be hosted on a different
administrative domain, it is important to perform mutual
authentication to ensure the authenticity of both the SRC and the SRS
before transmitting any recorded media. The risk of not authenticating
the SRS is that the recording may be sent to an entity other than the
intended SRS, allowing a sensitive call recording to be received by an
attacker. On the other hand, the risk of not authenticating the SRC is
that an SRS will accept calls from an unknown SRC and allow potential
forgery of call recordings.</t>
<t>There may be scenarios in which the signaling between the SRC and
SRS is not direct, e.g., a SIP proxy exists between the SRC and the
SRS. In such scenarios, each hop is subject to the TLS mutual
authentication constraint and transitive trust at each hop is
utilized. Additionally, an SRC or SRS may use other existing SIP
mechanisms available, including but not limited to, Digest
Authentication <xref target="RFC3261"/>, Asserted Identity <xref
target="RFC3325"/>, and Connected Identity <xref
target="RFC4916"/>.</t>
<t>The SRS may have its own set of recording policies to authorize
recording requests from the SRC. The use of recording policies is
outside the scope of the Session Recording Protocol.</t>
</section>
<section title="RTP handling">
<t>In many scenarios it will be critical for the media transported
between the SRC and the SRS to be protected. Media encryption is an
important element in the overall SIPREC solution; therefore the SRC
and the SRS MUST support RTP/SAVP <xref target="RFC3711"/> and
RTP/SAVPF <xref target="RFC5124"/>. RTP/SAVP and RTP/SAVPF provide
media encryption, integrity protection, replay protection, and a
limited form of source authentication. They do not contain or require
a specific keying mechanism. At a minimum, the SRC and SRS MUST
support the SDP Security Descriptions (SDES) key negotiation mechanism
<xref target="RFC4568"/>. For cases in which DTLS-SRTP is used to
encrypt a CS media stream, an SRC may use SRTP Encrypted Key Transport
(EKT) <xref target="I-D.ietf-avtcore-srtp-ekt"/> in order to use
SRTP-SDES in the RS without needing to re-encrypt the media.</t>
<t>When RTP/SAVP or RTP/SAVPF is used, an SRC can choose to use the
same or different keys in the RS than the ones used in the CS. Some
SRCs are designed to simply replicate RTP packets from a CS media
stream to the SRS, in which case the SRC will use the same key in the
RS as used in the CS. In this case, the SRC MUST secure the SDP
containing the keying material in the RS with at least the same level
of security as in the CS. The risk of lowering the level of security
in the RS is that it will effectively become a downgrade attack on the
CS since the same key is used for both CS and RS.</t>
<t>SRCs that decrypt an encrypted CS media stream and re-encrypt it
when sending it to the SRS MUST use a different key than what is used
for the CS media stream, to ensure that it is not possible for someone
who has the key for the CS media stream to access recorded data they
are not authorized to access. In order to maintain a comparable level
of security, the key used in the RS SHOULD of equivalent or greater
strength than that used in the CS.</t>
</section>
<section title="Metadata">
<t>Metadata contains sensitive information such as the address of
record of the participants and other extension data placed by the SRC.
It is essential to protect the content of the metadata in the RS.
Since metadata is a content type transmitted in SIP signaling,
metadata SHOULD be protected at the transport level by SIPS/TLS.</t>
</section>
<section title="Storage and playback">
<t>While storage and playback of the call recording is beyond the
scope of this document, it is worthwhile to mention here that it is
also important for the recording storage and playback to provide a
level of security that is comparable to the communication session. It
would defeat the purpose of securing both the communication session
and the recording session mentioned in the previous sections if the
recording can be easily played back with a simple, unsecured HTTP
interface without any form of authentication or authorization.</t>
</section>
</section>
<section anchor="acknowledgement" title="Acknowledgements">
<t>We want to thank John Elwell, Paul Kyzivat, Partharsarathi R, Ram
Mohan R, Hadriel Kaplan, Adam Roach, Miguel Garcia, Thomas Stach, Muthu
Perumal, Dan Wing, and Magnus Westerlund for their valuable comments and
inputs to this document.</t>
</section>
</middle>
<!-- *****BACK MATTER ***** -->
<back>
<references title="Normative References">
&RFC2119;
&RFC2506;
&RFC3261;
&RFC3264;
&RFC3550;
&RFC3840;
&RFC4574;
&RFC5234;
&RFC7245;
&I-D.ietf-siprec-metadata;
</references>
<references title="Informative References">
&I-D.ietf-avtcore-srtp-ekt;
&RFC2804;
&RFC3311;
&RFC3325;
&RFC3551;
&RFC3711;
&RFC4568;
&RFC4585;
&RFC4916;
&RFC4961;
&RFC5104;
&RFC5124;
&RFC5168;
&RFC5630;
&RFC5761;
&RFC6263;
&RFC6341;
&RFC7022;
&RFC7525;
</references>
</back>
</rfc>
| PAFTECH AB 2003-2026 | 2026-04-24 17:35:18 |