One document matched: draft-ietf-payload-g7110-01.xml
<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
There has to be one entity for each item to be referenced.
An alternate method (rfc include) is described in the references. -->
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC4566 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4566.xml">
<!ENTITY RFC4288 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4288.xml">
<!ENTITY RFC4855 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4855.xml">
<!ENTITY RFC4856 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4856.xml">
<!ENTITY RFC3550 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml">
<!ENTITY RFC3551 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3551.xml">
<!ENTITY RFC2629 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3711 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3711.xml">
<!ENTITY RFC4585 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4585.xml">
<!ENTITY RFC3264 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3264.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc toc="yes"?>
<?rfc strict="yes" ?>
<?rfc symrefs="yes"?>
<!-- <?rfc iprnotified="yes" ?> -->
<?rfc iprnotified="no" ?>
<?rfc strict="yes" ?>
<?rfc compact="no"?>
<?rfc subcompact="no" ?>
<?rfc sortrefs="no" ?>
<?rfc colonspace='yes' ?>
<?rfc tocindent='yes' ?>
<?rfc tocdepth="4"?>
<!--
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
-->
<rfc category="std" docName="draft-ietf-payload-g7110-01" ipr="trust200902">
<front>
<!-- The abbreviated title is used in the page header - it is only necessary if the
full title is longer than 39 characters -->
<title abbrev="G.711.0 Payload Format">RTP Payload Format for G.711.0</title>
<!-- add 'role="editor"' below for the editors if appropriate -->
<!-- Another author who claims to be an editor -->
<author fullname="Michael A. Ramalho" initials="M. A." role="editor"
surname="Ramalho">
<organization abbrev="Cisco Systems">Cisco Systems, Inc.</organization>
<address>
<postal>
<street>8000 Hawkins Road</street>
<!-- Reorder these if your country does things differently -->
<city>Sarasota</city>
<region>FL</region>
<code>34241</code>
<country>USA</country>
</postal>
<phone>+1 919 476 2038</phone>
<email>mramalho@cisco.com</email>
<!-- uri and facsimile elements may also be added -->
</address>
</author>
<author fullname="Paul E. Jones" initials="P. E." surname="Jones">
<organization abbrev="Cisco Systems">Cisco Systems, Inc.</organization>
<address>
<postal>
<street>7025 Kit Creek Rd.</street>
<city>Research Triangle Park</city>
<region>NC</region>
<code>27709</code>
<country>USA</country>
</postal>
<phone>+1 919 476 2048</phone>
<email>paulej@packetizer.com</email>
</address>
</author>
<author fullname="Noboru Harada" initials="N." surname="Harada">
<organization abbrev="NTT">NTT Communications Science Labs.</organization>
<address>
<postal>
<street>3-1 Morinosato-Wakamiya</street>
<city>Atsugi</city>
<region>Kanagawa</region>
<code>243-0198</code>
<country>JAPAN</country>
</postal>
<phone>+81 46 240 3676</phone>
<email>harada.noboru@lab.ntt.co.jp</email>
</address>
</author>
<!--
<author fullname="Muthu Arul Mozhi Perumal" initials="P." surname="Muthu Arul Mozhi">
-->
<author fullname="Muthu Arul Mozhi Perumal" initials="M" surname="Perumal">
<organization abbrev="Cisco Systems">Cisco Systems, Inc.</organization>
<address>
<postal>
<street>Cessna Business Park</street>
<street>Sarjapur-Marathahalli Outer Ring Road</street>
<city>Bangalore</city>
<region>Karnataka</region>
<code>560103</code>
<country>India</country>
</postal>
<phone>+91 9449288768</phone>
<email>mperumal@cisco.com</email>
</address>
</author>
<author fullname="Lei Miao" initials="L." surname="Miao">
<organization abbrev="Huawei Technologies">Huawei Technologies Co. Ltd</organization>
<address>
<postal>
<street>Q22-2-A15R, Enviroment Protection Park</street>
<street>No. 156 Beiqing Road</street>
<street>HaiDian District</street>
<city>Beijing</city>
<!--
<region>Beijing</region>
-->
<code>100095</code>
<country>China</country>
</postal>
<phone>+86 1059728300</phone>
<email>lei.miao@huawei.com</email>
</address>
</author>
<date day="11" month="December" year="2013" />
<!-- If the month and year are both specified and are the current ones, xml2rfc will fill
in the current day for you. If only the current year is specified, xml2rfc will fill
in the current day and month for you. If the year is not the current one, it is
necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the
purpose of calculating the expiry date). With drafts it is normally sufficient to
specify just the year. -->
<!-- Meta-data Declarations -->
<area>RAI</area>
<!--
<workgroup>Payload</workgroup>
-->
<workgroup>Network Working Group</workgroup>
<!-- WG name at the upperleft corner of the doc,
IETF is fine for individual submissions.
If this element is not present, the default is "Network Working Group",
which is used by the RFC Editor as a nod to the history of the IETF. -->
<keyword>template</keyword>
<!-- Keywords will be incorporated into HTML output
files in a meta tag but they have no effect on text or nroff
output. If you submit your draft to the RFC Editor, the
keywords will be used for the search engine. -->
<abstract>
<t>This document specifies the Real-Time Transport Protocol (RTP) payload
format for ITU-T Recommendation G.711.0.
ITU-T Rec. G.711.0 defines a lossless
and stateless compression for G.711 packet payloads typically used in IP networks.
This document also defines a storage mode format for G.711.0 and a media
type registration for the G.711.0 RTP payload format.
</t>
</abstract>
</front>
<middle>
<section title="Introduction">
<t>The International Telecommunication Union (ITU-T) Recommendation
<xref target="G.711.0">G.711.0</xref> specifies
a stateless and lossless compression for G.711 packet payloads typically
used in Voice over IP (VoIP) networks.
This document specifies the Real-Time Transport Protocol (RTP)
<xref target="RFC3550">RFC 3550</xref> payload format and storage modes for this compression.
</t>
</section>
<section title="Requirements Language">
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in <xref
target="RFC2119">RFC 2119</xref>.</t>
</section>
<?rfc needLines="8" ?>
<section anchor="Background" title="G.711.0 Codec Background">
<t>ITU-T Recommendation <xref target="G.711.0">G.711.0</xref> is a lossless and stateless compression
mechanism for ITU-T Recommendation <xref target="G.711">G.711</xref> and thus is not a
"codec" in the sense of "lossy" codecs typically carried by RTP.
When negotiated end-to-end ITU-T Rec. G.711.0 is negotiated as if it were a codec, with
the understanding that ITU-T Rec. G.711.0 losslessly encoded the underlying (lossy)
G.711 pulse code modulation (PCM) sample representation of an audio signal.
For this reason ITU-T Rec. G.711.0 will be interchangeably referred to in this document as a
"lossless data compression algorithm" or a "codec", depending on context.
Within this document, individual G.711 PCM samples will be referred to
as "G.711 symbols" or just "symbols" for brevity.</t>
<t> This section describes the ITU-T Recommendation <xref target="G.711">G.711</xref> codec, its properties,
typical uses cases and its key design properties.</t>
<section anchor="G.711.0_Codec" title="General Information and Use of the ITU-T G.711.0 Codec">
<t>ITU-T Recommendation G.711 is the benchmark standard for narrowband telephony.
It has been successful for many decades because of its proven voice quality, ubiquity and utility.
A new ITU-T recommendation, G.711.0, has been established for defining a stateless and lossless
compression for G.711 packet payloads typically used in VoIP networks.
ITU-T Rec. G.711.0 is also known as ITU-T Rec. <xref target="G.711-A1">G.711 Annex A</xref>, as
ITU-T Rec. G.711 Annex A is effectively a pointer ITU-T Rec. G.711.0.
Henceforth in this document, ITU-T Rec. G.711.0 will simply be referred to as "G.711.0" and
ITU-T Rec. G.711 simply as "G.711". </t>
<t> G.711.0 may be employed end-to-end; in which case the RTP payload format specification and use is nearly
identical to the G.711 RTP specification found in <xref target="RFC3550">RFC 3550</xref>.
The only significant difference for G.711.0 is the use of a dynamic payload type (the static
PT of 0 or 8 are virtually always used with G.711) and the recommendation not to
use Voice Activity Detection (see <xref target="RTP_Header"></xref>). </t>
<t> G.711.0, being both lossless and stateless, may also be employed as a lossless compression mechanism
anywhere in between end systems which have negotiated use of G.711.
Because the only significance between the G.711 RTP payload format header and the G.711.0
payload format header is the payload type, a G.711 RTP packet can be losslessly converted
to a G.711.0 RTP packet simply by compressing the G.711 payload (thus creating a G.711.0
payload), changing the payload type to the dynamic value desired and copying all the
remaining G.711 RTP header fields into the corresponding G.711.0 RTP header.
Conversely, the corresponding decompression of a G.711.0 RTP packet back to the original source
G.711 RTP packet can be accomplished by losslessly decompressing the G.711.0 payload
back to the original source G.711 payload, changing the payload type back to the
payload type of the original G.711 RTP packet and copying all the remaining G.711.0 RTP
header fields into the corresponding G.711 RTP header. </t>
<t> It is special to note that G.711.0, being both lossless and stateless, can be employed
multiple times (e.g., on multiple, individual hops or series of hops) of a given
flow with no degradation of quality relative to end-to-end G.711.
Stated another way, multiple "lossless transcodes" from/to G.711.0/G.711 do not
affect voice quality as typically occurs with lossy transcodes to/from dissimilar codecs. </t>
<t> Lastly, it is expected that G.711.0 will be used as an archival format for recorded G.711 streams.
Therefore, a G.711.0 Storage Mode Format is also included in this document. </t>
</section>
<section anchor="G.711.0_Design" title="Key Properties of G.711.0 Design">
<t>
The fundamental design of G.711.0 resulted from the desire to losslessly encode and compress frames of G.711 symbols
independent of what types of signals those G.711 frames contained.
The primary G.711.0 use case is for G.711 encoded, zero-mean, acoustic signals (such as speech and music).
</t>
<t>G.711.0 attributes are below: <list counter="G.711.0 Attributes" hangIndent="6" style="format A%d">
<t>Compression for zero-mean acoustic signals: G.711.0 was designed as its primary use case for
the compression of G.711 payloads which contained "speech" or other zero-mean acoustic signals.
G.711.0 obtains greater than 50% average compression in service provider environments
<xref target="ICASSP"></xref>.</t>
<t>Lossless for any G.711 payload: G.711.0 was designed to be lossless for any valid G.711 payload - even if the payload consisted
of apparently random G.711 symbols (e.g., a modem or FAX payload).
G.711.0 could be used for "aggregate 64 kbps G.711 channels" carried over IP without explicit concern if
a subset of these channels happened to be carrying something other than voice or general audio.
To the extent that a particular channel carried something other than voice or general audio,
G.711.0 ensured that it was carried losslessly, if not significantly compressed.</t>
<t>Stateless: Compression of a frame of G.711 symbols was only to be dependent on that frame and not on any prior frame.
Although greater compression is usually available by observing a longer history of past G.711 symbols, it was decided
that the compression design would be stateless to completely eliminate error propagation common
in many lossy codec designs (e.g., ITU-T Rec. <xref target="G.729">G.729</xref>,
ITU-T Rec. <xref target="G.722">G.722</xref>).
That is, the decoding process need not be concerned about lost prior packets
because the decompression of a given G.711.0 frame is not dependent on potentially lost prior G.711.0 frames.
Owing to this stateless property, the frames input to the G.711.0 encoder may be changed "on-the-fly" (a
5 ms encoding could be followed by a 20 ms encoding).</t>
<t>Self-describing: This property is defined as the ability to determine how many source G.711 samples are contained within the
G.711.0 frame solely by information contained within the G.711.0 frame.
Generally, the number of source G.711 symbols can be determined by decoding the initial octets of the
compressed G.711.0 frame (these octets are called "prefix codes" in the standard) <xref target="ICASSP"></xref>.
A G.711.0 decoder need not know what ptime is, as it is able to decompress the
G.711.0 frame presented to it without signaling knowledge.</t>
<t>Accommodate G.711 payload sizes typically used in IP: G.711 input frames of length typically found in VoIP
applications represent SDP ptimes (see <xref target="RFC4566">RFC 4566</xref>) of 5 ms, 10 ms, 20 ms, 30 ms or 40 ms.
Since the dominant sampling frequency for G.711 is 8000 samples per second, G.711.0 was designed to compress
G.711 input frames of 40, 80, 160, 240 or 320 samples.</t>
<t>Bounded expansion: Since attribute A2 above requires G.711.0 to be lossless for any payload, by definition
there exists at least one potential G.711 payload which must be "uncompressible". Since the quantum of compression is an
octet, the minimum expansion of such an uncompressible payload was designed to be the minimum possible
of one octet. Thus G.711.0 "compressed" frames can be of length one octet to X+1 octets, where X is the size
of the input G.711 frame in octets.
G.711.0 can therefore be viewed as a Variable Bit Rate (VBR) encoding in which
the size of the G.711.0 output frame is a function of the G.711 symbols input to it.</t>
<t>Algorithmic delay: G.711.0 was designed to have the algorithmic delay equal to the time represented by the
number of samples in the G.711 input frame (i.e., no "look-ahead").</t>
<t>Low Complexity: Less than 1.0 WMOPS average and low memory footprint (~5k octets RAM, ~5.7k octets ROM and
~3.6 basic operations) <xref target="ICASSP"></xref> <xref target="G.711.0"></xref>.</t>
<t>Both A-law and Mu-law supported: G.711 has two operating laws, A-law and Mu-law. These two laws are also known
as PCMA and PCMU in RTP applicaitons <xref target="RFC3550">RFC 3550</xref>.</t>
</list>
These attributes generally make it trivial to compress a G.711 input frame consisting of 40, 80, 160, 240 or 320 samples.
After the input frame is presented to a G.711.0 encoder, a G.711.0 "self-describing" output frame is produced.
The number of samples contained within this frame is easily determined at the G.711.0 decoder by virtue of attribute A4.
The G.711.0 decoder can decode the G.711.0 frame back to a G.711 frame by using only data within the G.711.0 frame.
</t>
<t>
Lastly we note that losing a G.711.0 encoded packet is identical in effect of losing a G.711 packet (when using RTP); this is
because a G.711.0 payload, like the corresponding G.711 payload, is stateless.
Thus, it is anticipated that existing G.711 PLC mechanisms will be employed when a G.711.0 packet is lost and an
identical MOS degradation relative to G.711 loss will be achieved.
</t>
</section>
<section anchor="G.711_2_G.711.0" title="G.711 Input Frames to G.711.0 Output Frames">
<t>
G.711.0 is a lossless and stateless compression of G.711 frames.
The following figure depicts this where "A" is the process
of G.711.0 encoding and "B" is the process of G.711.0 decoding.
</t>
<figure align="center" anchor="g711.0_encoding">
<preamble>1:1 Mapping from G.711 Input Frame to G.711.0 Output Frame</preamble>
<artwork align="center"><![CDATA[
|--------------------------| A |------------------------------|
| G.711 Input Frame |----->| G.711.0 Output Frame |
| of X Octets | | containing 1 to X+1 Octets |
| (where X MUST be 40, 80, | | (precise value dependent on |
| 160, 240 or 320 octets) |<-----| G.711.0 ability to compress) |
|__________________________| B |______________________________|
]]></artwork>
</figure>
<t>
Note that the mapping is 1:1 (lossless) in both directions, subject to two constraints.
The first constraint is that the input frame provided to the G.711.0 encoder
(process "A") has a specific number of input G.711 symbols consistent with attribute
A5 (40, 80, 160, 240 or 320 octets). The second constraint is that the compression
law used to create the G.711 input frame (A-law or Mu-law) must be known,
consistent with attribute A9.</t>
<t> Subject to these two constraints, the input G.711 frame is processed by the G.711.0
encoder ("A") and produces a "self-describing" G.711.0 output frame, consistent with
attribute A4. Depending on the source G.711 symbols, the G.711.0 output frame
can contain anywhere from 1 to X+1 octets, where X is the number of input
G.711 symbols. Compression results for virtually every zero-mean acoustic
signal encoded by G.711.0.</t>
<t> Since the G.711.0 output frame is "self-describing", a G.711.0 decoder (process "B")
can losslessly reproduce the original G.711 input frame with only the knowledge of
which companding law was used (A-law or Mu-law). The G.711.0 frame, being "self-describing",
allows for the G.711.0 decoder ("B") to know precisely how many G.711 symbols to create.</t>
<t>Since G.711.0 was designed with typical G.711 payload lengths as a design constraint (attribute A5),
this lossless encoding can be performed only with knowledge of the companding
law being used. This information is anticipated to be signaled in SDP and
will be described later in this document.</t>
<t>If the original inputs were known to be from a zero-mean
acoustic signal coded by G.711, an intelligent G.711.0 encoder could
infer the G.711 companding law in use (via G.711 input signal amplitude histogram statistics).
Likewise, an intelligent G.711.0 decoder producing G.711 from the G.711.0 frames could
also infer which encoding law in use.
Thus G.711.0 could be designed for use in applications that have limited stream
signaling between the G.711 endpoints (i.e., they only know "G.711 at 8k sampling is
being used", but nothing more).
Such usage is not further described in this document.
Additionally, if the original inputs were known to come from zero-mean acoustic signals,
an intelligent G.711.0 encoder could tell if the G.711.0 payload had been encrypted - as
the symbols would not have the distribution expected in either companding law and
would appear random. Such determination is also not further discussed in this document.</t>
<t>It is easily seen that this process is 1:1 and that G.711.0 based lossless compression
can be employed multiple times, as the original G.711 input symbols are always
reproduced with 100% fidelity.</t>
<t>G.711.0 frames containing more source G.711 symbols from a given channel will typically
result in higher compression as a general rule, but there are exceptions.
For example, an intelligent G.711.0 encoder may choose to encode 20 ms of G.711
as two individual 10 ms G.711.0 frames if a higher overall compression will
result (this might occur if the first 10 ms was "silence" and two, 10 ms G.711.0
frames contained fewer octets than one 20 ms G.711.0 frame).
For this reason, we will explicitly allow multiple G.711.0 encoded frames
in the G.711.0 RTP payload in
<xref target="RTP_Payload_Compound"></xref> below even though the
usual case is anticipated to be only one G.711.0 frame per RTP payload.</t>
</section>
</section>
<?rfc needLines="8" ?>
<section anchor="RTP" title="RTP Header and Payload">
<t>In this section we describe the precise format for G.711.0 frames carried via RTP. We begin with
RTP header description relative to G.711, then provide two G.711.0 payload examples.</t>
<section anchor="RTP_Header" title="G.711.0 RTP Header">
<t>Relative to G.711 RTP headers, the utilization of G.711.0 does not create any
special requirements with respect to the contents of the RTP packet header.
The only significant difference is that the payload type (PT) RTP header
field will have a value corresponding to the dynamic payload type assigned
to the flow (whereas G.711 PCMU typically has a static PT = 0 and
G.711 PCMA typically has a static PT = 8 <xref target="RFC3551"></xref>).</t>
<t>Voice Activity Detection (VAD) SHOULD NOT be used when
G.711.0 is negotiated because G.711.0 obtains high compression
during "VAD silence intervals" and one of the advantages of G.711.0 over
G.711 with VAD is the lack of any VAD-inducing artifacts in the received
signal.
However, if VAD is employed, the Marker bit (M) MUST be set
in the first packet of a talkspurt (the first packet after a silence
period in which packets have not been transmitted contiguously
as per rules specified in <xref target="RFC3550"></xref> for G.711 payloads).
This definition, being consistent with the G.711 RTP VAD use, further
allows lossless transcoding between G.711 RTP packets and G.711.0 RTP
packets as described in <xref target="G.711.0_Codec"></xref>.</t>
<t> With this introduction, the RTP packet header fields are defined as follows:
<list counter="RTP Stuff" hangIndent="4">
<t> V - As per <xref target="RFC3550"></xref> </t>
<t> P - As per <xref target="RFC3550"></xref> </t>
<t> X - As per <xref target="RFC3550"></xref> </t>
<t> CC - As per <xref target="RFC3550"></xref> </t>
<t> M - As per <xref target="RFC3550"></xref> </t>
<t> PT- Dynamic PT assigned, consistent with MIME allocation for G711.0 defined in Media Type Definition (<xref target="Registration"></xref>).</t>
<t> SN - As per <xref target="RFC3550"></xref> </t>
<t> timestamp - As per <xref target="RFC3550"></xref> </t>
<t> SSRC - As per <xref target="RFC3550"></xref> </t>
<t> CSRC - As per <xref target="RFC3550"></xref> </t>
</list>
Where V (version bits), P (padding bit), X (extension bit), CC (CSRC count), M (marker bit),
PT (payload type), SN (sequence number), timestamp, SSRC (synchronizing source) and CSRC (contributing sources)
are as defined in <xref target="RFC3550"></xref> and as typically used with G.711. PT (payload type) is
as defined in <xref target="RFC3550"></xref>.
</t>
</section>
<section anchor="RTP_Payload" title="G.711.0 RTP Payload">
<t>In this section we provide two examples for carrying G.711.0 frames in RTP payloads.
The first example is used when it is desired to carry
only one G.711.0 frame in the RTP payload.
This example is a subset of the second and shown separately for clarity.</t>
<section anchor="RTP_Payload_Simple" title="Single G.711.0 Frame per RTP Payload Example">
<t>This example depicts a single G.711.0 frame in the RTP payload.
This is expected to be the dominant RTP payload case for G.711.0, as the G.711.0
encoding process supports the SDP packet times
(ptime and maxptime, see <xref target="RFC4566"></xref>) commonly
used when G.711 is transported in RTP.
Additionally, as mentioned previously, larger G.711.0 frames generally
compress more effectively than a multiplicity of smaller G.711.0 frames.</t>
<t> The following Figure illustrates the single G.711.0 frame per RTP payload case.</t>
<figure align="center" anchor="g711.0_simple">
<preamble>Single G.711.0 Frame in RTP Payload Case</preamble>
<artwork align="center"><![CDATA[
|-------------------|-------------------|
| One G.711.0 Frame | Zero or more 0x00 |
| | Padding Octets |
|___________________|___________________|
]]></artwork>
</figure>
<t>Encoding Process: A single G.711.0 frame is inserted into the RTP payload.
The amount of time represented by the G.711 symbols compressed in the G.711.0 frame
MUST correspond to the ptime signaled for applications using SDP.
Although generally not desired, padding desired in the RTP payload after the
G.711.0 frame MAY be created by placing one or more 0x00 octets after the G.711.0 frame.
Such padding may be desired based on security considerations (see <xref target="Security"></xref>).</t>
<t>Decoding Process: Passing the entire RTP payload to the G.711.0 decoder is sufficient
for the G.711.0 decoder to create the source G.711 symbols. Any padding inserted
after the G.711.0 frame (i.e., the 0x00 octets) present in the RTP payload is silently
ignored by the G.711.0 decoding process.
The decoding process is fully described in <xref target="RTP_Payload_Decoding"></xref> below.</t>
</section>
<section anchor="RTP_Payload_Compound" title="Multiple G.711.0 Frames per RTP Payload Example">
<t>This example depicts the case where multiple G.711.0 frames are desired in the RTP payload.</t>
<t>As described in <xref target="G.711_2_G.711.0"></xref>, an "intelligent G.711.0 encoder" can
decide to encode, let's say, 20 ms
of G.711 symbols as two, 10 ms G.711.0 frames because a greater compression is attained
for that particular 20 ms segment.
The "smart encoding" of such inputs is accommodated by the ability to have multiple G.711.0
frames in the RTP payload.</t>
<t>Note that since each G.711.0 frame is self-describing
(see Attribute A4 in <xref target="G.711.0_Design"></xref>), the
individual G.711.0 frames in the RTP payload need not represent the same duration of
time (i.e., a 5 ms G.711.0 frame could be followed by a 20 ms G.711.0 frame). Owing to
this, the amount of time represented in the RTP payload MAY be any integer multiple of
5 ms (as 5 ms is the smallest interval of time that can be represented in a G.711.0 frame).</t>
<t> The following Figure illustrates the multiple G.711.0 frame per RTP payload case where
the number of G.711.0 frames placed in the RTP payload is N.</t>
<figure align="center" anchor="g711.0_compound">
<preamble>Multiple G.711.0 Frames in RTP Payload Case</preamble>
<artwork align="center"><![CDATA[
|----------|---------|----------|---------|----------------|
| First | Second | | Nth | Zero or more |
| G.711.0 | G.711.0 | ... | G.711.0 | 0x00 |
| Frame | Frame | | Frame | Padding Octets |
|__________|_________|__________|_________|________________|
]]></artwork>
</figure>
<t>We note here that the individual G.711.0 frames can be, and generally are, of different lengths.
The decoding process in the following section is used to determine the frame boundaries.</t>
<t>Encoding Process: One or more G.711.0 frames are placed in the RTP payload simply by
concatenating the G.711.0 frames together.
The amount of time represented by the G.711 symbols compressed in all the G.711.0 frames
in the RTP payload MUST correspond to the ptime signaled for applications using SDP.
Although not generally desired, padding
in the RTP payload SHOULD be placed after the last G.711.0 frame in the payload
and MAY be created by placing one or more 0x00 octets after the last G.711.0 frame.
Such padding may be desired based on security
considerations (see <xref target="Security"></xref>).</t>
<t>Decoding Process: As G.711.0 frames can be of varying length, the payload decoding process described in the
following section is used to determine where the individual G.711.0 frame boundaries are.</t>
</section>
<section anchor="RTP_Payload_Decoding" title="G.711.0 RTP Payload Decoding Process">
<t>The G.711.0 decoding process is a standard part of G.711.0 bit stream decoding and is implemented in the
ITU-T Rec. G.711.0 reference code.
The decoding process heuristic described in this section
is a slight enhancement of the ITU-T reference code to explicitly accommodate RTP padding (as described
above).</t>
<t> Before describing the decoding, we note here that the largest possible G.711.0 frame
is created whenever the largest number of G.711 symbols is encoded
(320 from <xref target="G.711.0_Design"></xref>, property A5)
and these 320 symbols are "uncompressible" by the G.711.0 encoder.
In this case (via property A6 in <xref target="G.711.0_Design"></xref>) the
G.711.0 output frame will be 321 octets long.
We also note that the value 0x00 chosen for the optional padding
cannot be the first octet of a valid ITU-T Rec. G.711.0 frame (see <xref target="G.711.0"></xref>).
We also note that whenever more than one G.711.0 frame is contained in the RTP payload, the decoding of
the individual G.711.0 frames will occur multiple times. </t>
<t> For the decoding heuristic below, let N be the number of octets in the RTP payload (i.e., excluding any RTP padding, but including
any RTP payload padding), let P equal the number of RTP payload octets processed by the G.711.0 decoding process,
let K be the number of G.711 symbols presently in the output buffer,
let Q be the number of octets contained in the G.711.0 frame being processed and let "!=" represent
not equal to.
The keyword "STOP" is used below to indicate the end of the processing of G.711.0 frames in the RTP payload.
The heuristic below assumes an output buffer for the decoded G.711 source symbols of length sufficient
to accommodate the expected number of G.711 symbols and an input buffer of length 321 octets.</t>
<t>G.711.0 RTP Payload Decoding Heuristic: <list counter="G.711.0_Heuristic" hangIndent="6" style="format H%d">
<t>Initialization:
Initialize the number of processed octets to zero (P = 0).
Initialize the counter for how many G.711 symbols are in the output buffer to zero (K = 0).
Initialize N to the number of octets in the RTP payload.
Go to H2.</t>
<t>Read internal buffer:
Read min{320+1, (N-P)} octets into the internal buffer from the (P+1) octet of the RTP payload.
We note at this point, N-P octets have yet to be processed and that 320+1 octets is the largest
possible G.711.0 frame.
Go to H3.</t>
<t>Analyze the first octet in the internal buffer:
If this octet 0x00 (a padding octet) go to H4,
otherwise go to H5 (process a G.711.0 frame).</t>
<t>Process padding octet (no G.711 symbols generated):
Increment the processed packets counter by one (set P = P + 1).
If the result of this increment results in P ≥ N then STOP (as all RTP Payload octets have been processed),
otherwise go to H2.</t>
<t>Process an individual G.711.0 frame (produce G.711 samples in the output frame): Pass the internal buffer to the
G.711.0 decoder.
The G.711.0 decoder will read the first octet (called the
"prefix code" octet in <xref target="G.711.0"> ITU-T Rec. G.711.0</xref>) to determine the number of source G.711
samples M are contained in this G.711.0 frame.
The G.711.0 decoder will produce exactly M G.711 source symbols.
If K = 0, these M symbols will be the first in the output buffer and are placed at the beginning of the output buffer.
If K != 0, concatenate these M symbols with the prior symbols in the output buffer
(there are K prior symbols in the buffer).
Set K = K + M (as there are now this many G.711 source symbols in the output buffer).
The G.711.0 decoder will have consumed some number of packets, Q, in the internal buffer to
produce the M G.711 symbols.
Increment the number of payload octet processed counter by this quantity (set P = P + Q).
If the result of this increment results in P ≥ N then STOP (as all RTP Payload octets have been processed),
otherwise go to H2.</t>
</list>
At this point, the output buffer will contain precisely K G.711 source symbols which should correspond to the ptime
signaled if SDP was used and the encoding process was without error.</t>
<t> We also note, as an aside, that the heuristic above (and the ITU-T G.711.0 reference code) accommodates
padding octets (0x00) placed anywhere in between G.711.0 frames in the RTP payload as well as prior to or after
any or all G.711.0 frames.
The ITU-T G.711.0 reference code does not have Step H3 and H4 as separate steps (i.e., Step H5
immediately follows H2) at the added computational cost of some additional buffer passing to/from
the G.711.0 frame decoder functions.
That is the G.711.0 decoder in the reference code "silently ignores" 0x00 padding octets
at the beginning of what it believes to be a G.711.0 encoded frame boundary.
Thus Step H3 and Step H4 above are an optimization over the reference code shown for clarity.</t>
<t> If the decoder is at a playout endpoint location, this G.711 buffer SHOULD be used in the same manner as a received G.711
RTP payload would have been used (passed to a playout buffer, to a PLC implementation, etc.). </t>
</section>
<section anchor="Multiple_Channels" title="G.711.0 RTP Payload for Multiple Channels">
<t>In this section we describe the use of multiple "channels" of G.711 data encoded
by G.711.0 compression.</t>
<t>The dominant use of G.711 in RTP transport has been for single channel use cases. For this
case, the above G.711.0 encoding and decoding process is used.
However, the multiple channel case for G.711.0 (a frame-based compression) is
different from G.711 (a sample-based encoding) and is described separately here.</t>
<t><xref target="RFC3551">RFC 3551</xref> provides guidelines for encoding audio channels
(Section 4) and for the ordering of the channels within the RTP payload (Section 4.1).
The ordering guidelines in RFC 3551, Section 4.1 SHOULD be used unless an
application-specific channel ordering is more appropriate.</t>
<t>An implicit assumption in RFC 3551 is that all the channel data multiplexed
into a RTP payload MUST represent the same physical time span.
The case for G.711.0 is no different; the underlying G.711 data for all channels
in a G.711.0 RTP payload MUST span the same interval in time (e.g., the
same "ptime" for a SDP-specified codec negotiation).</t>
<t>RFC 3551 provides guidelines for sample-based encodings such as G.711 in
Section 4.2. This guidance is tantamount to interleaving the individual samples in
that they SHOULD be packed in consecutive octets.</t>
<t>RFC 3551 provides guidelines for frame-based encodings in which the
frames are interleaved.
However, this guidance stems from the assumption that
"the frame size for frame-oriented codecs is a given".
However, this assumption is not valid for G.711.0 in that
individual consecutive G.711.0 frames (as per
<xref target="RTP_Payload_Compound"></xref>) can:
<list counter="Channels" hangIndent="4">
<t>1) represent different time spans (e.g., two 5 ms G.711.0 frames in lieu
of one 10 ms G.711.0 frame), and</t>
<t>2) be of different lengths in octets (and typically are).</t>
</list>
Therefore a different, but also simple, concatenation-based approach is specified in this RFC.</t>
<t>
For the multiple channel G.711.0 case, each G.711 channel is independently
encoded into one or more G.711.0 frames defined here as a
"G.711.0 channel superframe". Each one of these superframes
is identical to the multiple G.711.0 frame case illustrated in
<xref target="g711.0_compound"></xref> of
<xref target="RTP_Payload_Compound"></xref> in which
each superframe can have one or more individual G.711.0 frames within it.
Then each G.711.0 channel superframe is concatenated - in channel order -
into a G.711.0 RTP payload. Then, if optional G.711.0
padding octets (0x00) are desired, it is RECOMMENDED that these octets
are placed after the last G.711.0 channel superframe.
As per above, such padding may be desired based on security
considerations (see <xref target="Security"></xref>).
This is depicted in the following Figure 4 below.</t>
<figure align="center" anchor="g711.0_channels">
<preamble>Multiple G.711.0 Channel Superframes in RTP Payload</preamble>
<artwork align="center"><![CDATA[
|----------|---------|----------|---------|---------|
| First | Second | | Nth | Zero |
| G.711.0 | G.711.0 | ... | G.711.0 | or more |
| Channel | Channel | | Channel | 0x00 |
| Super- | Super- | | Super | Padding |
| Frame | Frame | | Frame | Octets |
|__________|_________|__________|_________|_________|
]]></artwork>
</figure>
<t>The G.711.0 decoder at the receiving end simply decodes the entire G.711.0 (multiple
channel) payload into individual G.711 symbols.
If M such G.711 symbols result and there were N channels, then the first M/N G.711
samples would be from the first channel, the second M/N G.711 samples would
be from the second channel, and so on until the Nth set of G.711 samples are found.
Similarly, if the number of channels was not known, but the payload "ptime"
was known, one could infer (knowing the sampling rate) how many G.711
symbols each channel contained; then with this knowledge determine how many
channels of data were contained in the payload.
When SDP is used, the number of channels is known because the optional
parameter is a MUST when there is more than one channel negotiated (see
<xref target="Registration"></xref>).
Additionally, when SDP is used the parameter ptime is a RECOMMENDED optional parameter.
We note that if both parameters channels and ptime are known that one could
provide a check for the other and the converse.</t>
<t>Lastly we note that although any padding for the multiple channel G.711.0
payload is RECOMMENDED to be placed at the end of the payload, the G.711.0
decoding heuristic described
in <xref target="RTP_Payload_Decoding"></xref> will successfully
decode the payload in <xref target="g711.0_channels"></xref>
if the 0x00 padding octet is placed anywhere before
or after any individual G.711.0 frame in the RTP payload. The number of
padding octets introduced at any G.711.0 frame boundary therefore does not
affect the number M of the source G.711 symbols produced.
Thus the decision for padding MAY be made on a per-superframe basis.</t>
</section>
</section>
</section>
<?rfc needLines="8" ?>
<section anchor="Parameters" title="Payload Format Parameters">
<t>This section defines the parameters that may be used to configure optional
features in the G.711.0 RTP transmission.</t>
<t>The parameters defined here as a part of the media subtype registration for the G.711.0 codec.
Mapping of the parameters into Session Description Protocol (SDP) <xref target="RFC4566">RFC 4566</xref> is
also provided for those applications that use SDP.</t>
<section anchor="Registration" title="Media Type Registration">
<!-- CHECK THIS LATER FOR CONFORMITY*****
<t> This registration is to be done using the template defined in <xref target="RFC4288">RFC 4288</xref> and
following <xref target="RFC4855">RFC 4855</xref>.</t>
****ALSO ... there may be a better way to format these lists ... see http://xml.resource.org/xml2rfcFAQ.html#anchor20
-->
<!-- PEJ Modified -->
<t>Type name: audio</t>
<t>Subtype name: G7110</t>
<t>Required Parameters:
<list style="empty">
<t>rate: The RTP timestamp clock rate, which is equal to the
sampling rate. The typical rate used with G.711 encoding is 8000, but other rates
may be specified. The default rate is 8000.</t>
<t>complaw: Indicates the companding
law (A-law or mu-law) employed.
The case-insensitive values are "al" or "mu" for A-law and mu-law, respectively.</t>
</list>
</t>
<t>Optional parameters:
<list style="empty">
<t>channels: See <xref target="RFC4566">RFC 4566</xref> for definition.
Specifies how many audio streams are represented in the G.711.0 payload and MUST be present
if the number of channels is greater than one.
This parameter defaults to 1 if not present (as per RFC 4566) an is typically a non-zero
small-valued positive integer.
It is expected that implementations that specify multiple channels will also define a mechanism to
map the channels appropriately within their system design, otherwise the channel order specified
in <xref target="RFC3551">RFC 3551</xref> Section 4.1 will be assumed (e.g., left, right, center, ... ).</t>
<t>maxptime: See <xref target="RFC4566">RFC 4566</xref> for definition.</t>
<t>ptime: See <xref target="RFC4566">RFC 4566</xref> for definition.
The inclusion of "ptime" is RECOMMENDED and SHOULD be in the SDP unless there is
an application specific reason not to include it (e.g., an application that has a variable
ptime on a packet-by-packet basis).
For constant ptime applications, it is considered good form to include "ptime"
in the SDP for session diagnostic purposes.
For the constant ptime multiple channel case described in
<xref target="RTP_Payload_Compound"></xref>, the inclusion of
"ptime" can provide a desirable payload check.</t>
</list>
</t>
<t>Encoding considerations:
<list style="empty">
<t>This media type is framed binary data (see Section 4.8 in
<xref target="RFC4288">RFC 4288</xref>) compressed as per
ITU-T Rec. G.711.0.</t>
</list>
</t>
<t>Security considerations:
<list style="empty">
<t>This media type does not carry active content. It does
transfer compressed data. See Section 4 of <xref
target="RFC4856">RFC 4856</xref>.</t>
</list>
</t>
<t>Interoperability considerations: none</t>
<t>Published specification:
<list style="empty">
<t>ITU-T Rec. G.711.0 and RFC QQQQ.</t>
<t>[ RFC Editor: please replace QQQQ with a reference to
this RFC ]</t>
</list>
</t>
<t>Applications that use this media type:
<list style="empty">
<t>Audio and video streaming and conferencing tools.</t>
</list>
</t>
<t>Additional information: none</t>
<t>Person & email address to contact for further
information:
<list style="empty">
<t>Michael Ramalho <mramalho@cisco.com> or <mar42@cornell.edu></t>
</list>
</t>
<t>Intended usage: COMMON</t>
<t>Restrictions on usage:
<list style="empty">
<t>This media type depends on RTP framing, and hence is only
defined for transfer via RTP [RFC3550]. Transport within
other framing protocols is not defined at this time.</t>
</list>
</t>
<t>Author: Michael Ramalho</t>
<t>Change controller:
<list style="empty">
<t>IETF Audio/Video Transport working group delegated from
the IESG.</t>
</list>
</t>
<!-- End PEJ Modified -->
</section>
<section anchor="SDP" title="Mapping to SDP Parameters">
<t>The information carried in the media type specification has a specific mapping to fields
in the Session Description Protocol (SDP), which is commonly used to describe RTP sessions.
When SDP is used to specify sessions employing G.711.0, the mapping is as follows:</t>
<t><list style="symbols">
<t>The media type ("audio") goes in SDP "m=" as the media name.</t>
<t>The media subtype ("G7110") goes in SDP "a=rtpmap" as the encoding name.</t>
<t>The required parameter "rate" also goes in "a=rtpmap" as the clock rate.</t>
<t>The parameters "ptime" and "maxptime" go in the
SDP "a=ptime" and "a=maxptime" attributes, respectively.</t>
<t>Remaining parameters go in the SDP "a=fmtp" attribute by copying them directly from the
media type string as a semicolon-separated list of parameter=value pairs.</t>
</list></t>
</section>
<section anchor="Offer_Answer" title="Offer/Answer Considerations">
<t>The following considerations apply when using the SDP
offer/answer <xref target="RFC3264">RFC 3264</xref> mechanism to negotiate
the "channels" attribute.</t>
<t><list style="symbols">
<t>If the offering endpoint specifies a value for the optional channels parameter greater than one
and the answering endpoint both understands the parameter and cannot support that value
requested, the answer MUST contain the optional channels parameter with the
highest value it can support.</t>
<t>If the offering endpoint specifies a value for the optional channels parameter
the answer MUST contain the optional channels parameter unless the only
value the answering endpoint can support is one, in which case the answer
MAY contain the optional channels parameter with value of 1.</t>
<t>If the offering endpoint specifies a value for the ptime parameter that the
answering endpoint cannot support, the answer MUST contain the optional ptime parameter.</t>
<t>If the offering endpoint specifies a value for the maxptime parameter that the
answering endpoint cannot support, the answer MUST contain the optional maxptime parameter.</t>
</list></t>
</section>
<section anchor="SDP_Example" title="SDP Examples">
<t>The following examples illustrate how to signal G.711.0 via SDP.</t>
<section anchor="SDP_Example_1" title="SDP Example 1">
<t><list style="hanging" hangIndent="6">
<t>m=audio RTP/AVP 98<vspace />
a=rtpmap: 98 G7110/8000<vspace />
a=fmtp:98 complaw = mu</t>
</list></t>
<t>In the above example the dynamic payload type 98 is mapped to G.711.0 via the "a=rtpmap" parameter.
The mandatory "complaw" is on the "a=fmtp" parameter line.
Note that neither optional parameters "ptime" nor "channels" is present; although it is generally good form
to include "ptime" in the SDP for session diagnostic purposes.</t>
</section>
<section anchor="SDP_Example_2" title="SDP Example 2">
<t>The following example illustrates an offering endpoint requesting 2 channels, but the
answering endpoint can only support (or render) one channel.</t>
<t>Offer:</t>
<t><list style="hanging" hangIndent="6">
<t>m=audio RTP/AVP 98<vspace />
a=rtpmap: 98 G7110/8000/2<vspace />
a=ptime: 20<vspace />
a=fmtp:98 complaw = al</t>
</list></t>
<t>Answer:</t>
<t><list style="hanging" hangIndent="6">
<t>m=audio RTP/AVP 98<vspace />
a=rtpmap: 98 G7110/8000/1<vspace />
a=ptime: 20<vspace />
a=fmtp:98 complaw = al</t>
</list></t>
<t>In this example the offer had an optional channels parameter.
The answer must have the optional channels parameter also unless the value in the answer is one.
Shown here is when the answer explicitly contains the channels parameter (it need not have and it
would be interpreted as one channel).
As mentioned previously, it is considered good form to include "ptime" in the SDP for
session diagnostic purposes if the session is a contstant ptime session.</t>
</section>
</section>
</section>
<?rfc needLines="8" ?>
<section anchor="Storage" title="G.711.0 Storage Mode Conventions and Definition">
<t>The G.711.0 storage mode definition in this section is similar to many other IETF codecs
(e.g., iLBC, EVRC-NW) and is essentially a concatenation of individual G.711.0 frames.</t>
<t>We note that something must be stored for any G.711.0 frames that not received at the
receiving endpoint, no matter what the cause. In this section we describe two
mechanisms, a "G.711.0 PLC Frame" and a "G.711.0 Erasure Frame".
These G.711.0 PLC and G.711.0 Erasure Frames are described prior to the G.711.0
storage mode definition for clarity. </t>
<section anchor="PLC" title="G.711.0 PLC Frame">
<t>When G.711 RTP payloads not received by a rendering endpoint a Packet Loss Concealment (PLC)
mechanism is typically employed to "fill in" the
missing G.711 symbols with something that is auditorially pleasing and thus the loss
may be not noticed by a listener.
Such a PLC mechanism for G.711 is specified in
<xref target="G.711-AP1"> ITU-T Rec. G.711 - Appendix 1</xref>.</t>
<t>An natural extension when creating G.711.0 frames for storage environments is to employ such a
PLC mechanism to create G.711 symbols for the span of time in which G.711.0 payloads
were not received - and then to compress the resulting "G.711 PLC symbols"
via G.711.0 compression.
The G.711.0 frame(s) created by such a process are called "G.711.0 PLC Frames".</t>
<t>Since PLC mechanisms are designed to render missing audio data with the best fidelity and
intelligibility, G.711.0 frames created via such processing is likely best for most
recording situations (such as voicemail storage) unless there is a requirement not
to fabricate (audio) data not actually received.</t>
<t>After such PLC G.711 symbols have been generated and then encoded by a G.711.0 encoder, the
resulting frames may be stored in G.711.0 frame format.
As a result, there is nothing to specify here - the G.711.0 PLC Frames are stored
as if they were received by the receiving endpoint.
In other words, PLC-generated G.711.0 frames appear as "normal" or "ordinary"
G.711.0 frames in the storage mode file.</t>
</section>
<section anchor="Erasure" title="G.711.0 Erasure Frame">
<t>"Erasure Frames", or equivalently "Null Frames", have been designed
for many frame-based codecs since G.711 was standardized.
These null/erasure frames explicitly represent data from incoming audio that were either not received
by the receiving system or represent data that a transmitting system decided not to send.
Transmitting systems may choose not to send data for a variety of reasons
(e.g., not enough wireless link capacity in radio-based systems) and can choose
to send a "null frame" in lieu of the actual audio.
It is also envisioned that erasure frames would be used in storage mode applications for specific
archival purposes where there is a requirement not to fabricate audio data that was not
actually received.</t>
<t>Thus, a G.711.0 erasure frame is a representation of the amount of time in G.711.0 frames that were not
received or not encoded by the transmitting system.</t>
<t>Prior to defining a G.711.0 erasure frame it is beneficial to note what many G.711 RTP systems
send when the endpoint is "muted".
When muted, many of these systems will send an entire
G.711 payload of either 0+ or 0- (i.e., one of the two levels closest to "analog zero" in
either G.711 companding law).
Next we note that a desirable property for a G.711.0 erasure
frame is for "non G.711.0 Erasure Frame aware" endpoints to be able to playback a
G.711.0 erasure frame with the existing G.711.0 ITU-T reference code.</t>
<t>A G.711.0 Erasure Frame is defined as any G.711.0 frame for which the corresponding
G.711 sample values are either the value 0++ or the value 0-- for the entirety
of the G.711.0 frame.
The levels of 0++ and 0-- are defined two levels above or below analog zero, respectively.
An entire frame of value 0++ or 0-- is expected to be extraordinarily rare when the
frame was in fact generated by a natural signal
(on the order of one in 2^{ptime in samples, minus one}), as analog inputs such as
speech and music are zero-mean and are typically acoustically coupled to digital
sampling systems.
Note that the playback of a G.711.0 frame characterized as an erasure frame is
auditorially equivalent to a muted signal (a very low value constant).</t>
<t>These G.711.0 erasure frames can be reasonably characterized as null or erasure frames while meeting
the desired playback goal of being decoded by the G.711.0 ITU-T reference code.
Thus, similarly to G.711 PLC frames, the G.711.0 erasure frames appear
as "normal" or "ordinary" G.711.0 frames in the storage mode format.</t>
</section>
<section anchor="Storage_Mode" title="G.711.0 Storage Mode Definition">
<t>The storage format is used for storing G.711.0 encoded frames.
The format for the G.711.0 storage mode file defined by this RFC is shown below.</t>
<figure align="center" anchor="g711.0_storage">
<preamble>G.711.0 Storage Mode Format</preamble>
<artwork align="center"><![CDATA[
|---------------------------|----------|--------------|
| Magic Number | | |
| | Version | Concatenated |
| "#!G7110A\n" (for A-law) | Octet | G.711.0 |
| or | | Frames |
| "#!G7110M\n" (for Mu-law) | "0x00" | |
|___________________________|__________|______________|
]]></artwork>
</figure>
<t> The storage mode file consists of a magic number and a version octet
followed by the individual G.711.0 frames concatenated together.</t>
<t>The magic number for G.711.0 A-law corresponds to the ASCII
character string "#!G7110A\n", i.e., "0x23 0x21 0x47 0x37 0x31 0x31
0x30 0x41 0x0A".
Likewise, the magic number for G.711.0 MU-law corresponds to the ASCII
character string "#!G7110M\n", i.e., "0x23 0x21 0x47 0x37 0x31 0x31
0x4E 0x4D 0x0A".
</t>
<t>The version number octet allows for the future specification of other
G.711.0 storage mode formats.
The specification of other storage mode formats may be desireable
as G.711.0 frames are of variable length and a future format may
include an indexing methodology that would enable playout far into
a long G.711.0 recording without the necessity of decoding all the
G.711.0 frames since the beginning of the recording.
Other future format specification may include support for
multiple channels, metadata and the like.
For these reasons it was determined that a versioning strategy was
desirable for the G.711.0 storage mode definition specified by this RFC.
This RFC only specifies Version 0 and thus the value of "0x00" must
be used for the storage mode defined by this RFC.</t>
<t>The G.711.0 codec data frames, including any necessary erasure or PLC frames, are stored in
consecutive order concatenated together
as shown in <xref target="RTP_Payload_Compound"></xref>.</t>
<t>To decode the individual G.711.0 frames, the heuristic presented
in <xref target="RTP_Payload_Compound"></xref> may be used
to decode the individual G.711.0 frames.
If the version octet is determined not to be zero, the remainder
of the payload MUST NOT be passed to the G.711.0 decoder, as
the ITU-T G.711.0 reference decoder can only decode concatenated
G.711.0 frames and has not been designed to decode elements in
yet to be specified future storage mode formats.</t>
</section>
</section>
<section anchor="Acknowledgements" title="Acknowledgements">
<t>There have been many people contributing to G.711.0 in the course of its
development. The people listed here deserve special mention:
Takehiro Moriya,
Claude Lamblin,
Herve Taddei,
Simao Campos,
Yusuke Hiwasaki,
Jacek Stachurski,
Lorin Netsch,
Paul Coverdale,
Patrick Luthi,
Paul Barrett,
Jari Hagqvist,
Pengjun (Jeff) Huang,
John Gibbs,
Yutaka Kamamoto, and
Csaba Kos.</t>
</section>
<section anchor="Contributors" title="Contributors">
<t>The authors thank everyone who have contributed to this document.
The people listed here deserve special mention:
Ali Begen, Roni Even, and Hadriel Kaplan.</t>
</section>
<?rfc needLines="8" ?>
<section anchor="IANA" title="IANA Considerations">
<t>One media type (audio/G7110) has been defined and requires IANA registration
in the media types registry. See <xref target="Registration"></xref>
for details.</t>
</section>
<?rfc needLines="8" ?>
<section anchor="Security" title="Security Considerations">
<t>RTP packets using the payload format defined in this specification
are subject to the security considerations discussed in the RTP
specification <xref target="RFC3550"></xref>, and in
any appropriate RTP profile (for example <xref target="RFC3551">RFC 3551</xref>
or <xref target="RFC4585"></xref>.
This implies that confidentiality of the media streams is achieved by encryption;
for example, through the application of SRTP <xref target="RFC3711"></xref>.
Because the data compression used with this payload format is applied end-to-end,
any encryption needs to be performed after compression.</t>
<t>Note that the appropriate mechanism to ensure confidentiality and
integrity of RTP packets and their payloads is very dependent on the
application and on the transport and signaling protocols employed.
Thus, although SRTP is given as an example above, other possible
choices exist. </t>
<t> Note that end-to-end security with either authentication, integrity or
confidentiality protection will prevent a network element not
within the security context from performing media-aware operations
other than discarding complete packets. To allow any (media-aware)
intermediate network element to perform its operations, it is
required to be a trusted entity which is included in the security
context establishment.</t>
<t>G.711.0 has no known denial-of-service attacks due to decoding, as data posing
as a desired G711.0 payload will be decoded into something (as per the
decoding algorithm) with a finite amount of computation.
This is due to the decompression algorithm having a finite worst-case processing path (no
infinite computational loops are possible).</t>
<t>G.711.0 is a variable bit rate (VBR) audio codec.
There have been recent concerns with VBR speech codecs where a passive observer
can identify phrases from a standard speech corpus by means of the lengths
produced by the encoder even when the payload is encrypted <xref target="IEEE"></xref>.
In this paper, it was determined that some code excited linear prediction (CELP)
codecs would produce discrete packet lengths for some phonemes.
And furthermore with the use of appropriately designed Hidden Markov Models (HMMs)
that such a system could predict phrases with unexpected accuracy.
One CELP codec studied, SPEEX, had the property that
it produced 21 different packet lengths in its wideband mode and that these
packet lengths probabilistically mapped to phonemes that a HMM system
could be trained on.
In this paper it was determined that a mitigation technique would be to
pad the output of the encoder with random padding lengths to the effect:
1) that more discrete payload sizes would result, and 2) that the
probabilistic mapping to phonemes would become less clear.
As G.711 is not a speech model based codec, neither is G.711.0.
A G.711.0 encoding, during talking periods, produces frames of varying
frame lengths which are not likely to have a strong mapping to phonemes.
Thus G.711.0 is not expected to have this same vulnerability.
It should be noted that "silence" (only one value of G.711 in the entire G.711
input frame)" or "near silence" (only a few G.711 values)
is easily detectable as G.711.0 frame lengths or one or a few octets.
If one desires to mitigate for silence/non-silence detection, statistically variable
padding should be added to G.711.0 frames that resulted in very small G.711.0 frames
(less than about 20% of the symbols of the corresponding G.711 input frame).
Methods of introducing padding in the G.711.0 payloads have been provided in the
G.711.0 RTP payload definitions in <xref target="RTP_Payload_Simple"></xref> and
<xref target="RTP_Payload_Compound"></xref>.</t>
</section>
</middle>
<!-- *****BACK MATTER ***** -->
<back>
<!-- References split into informative and normative -->
<!-- There are 2 ways to insert reference entries from the citation libraries:
1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
(for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")
Both are cited textually in the same manner: by using xref elements.
If you use the PI option, xml2rfc will, by default, try to find included files in the same
directory as the including file. You can also define the XML_LIBRARY environment variable
with a value containing a set of directories to search. These can be either in the local
filing system or remote ones accessed by http (http://domain/dir/... ).-->
<references title="Normative References">
<!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->
&RFC2119;
&RFC4566;
&RFC4288;
&RFC4855;
&RFC4856;
&RFC3550;
&RFC3551;
&RFC4585;
&RFC3711;
&RFC3264;
<!-- for when draft exists, put the following lines in document
&I-D.ramalho-segments-g7110;
<! XXXX ENTITY I-D.ramalho-segments-g7110 SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ramalho-segments-g7110.xml">
-->
<reference anchor="G.711.0">
<front>
<title>Recommendation ITU-T G.711.0 - Lossless Compression of G.711 Pulse Code Modulation</title>
<author fullname="" surname="ITU-T G.711.0">
<organization></organization>
</author>
<date month="September" year="2009" />
</front>
</reference>
<reference anchor="G.711">
<front>
<title>Recommendation ITU-T G.711: Pulse Code Modulation (PCM) of Voice Frequencies</title>
<author fullname="" surname="ITU-T G.711.0">
<organization></organization>
</author>
<date month="November" year="1988" />
</front>
</reference>
<reference anchor="G.711-AP1">
<front>
<title>Recommendation G.711 Appendix 1: A high quality low-complexity algorithm for packet loss concealment with G.711</title>
<author fullname="" surname="ITU-T G.711 Appendix 1">
<organization></organization>
</author>
<date month="September" year="1999" />
</front>
</reference>
<reference anchor="G.711-A1">
<front>
<title>Recommendation ITU-T G.711 Amendment 1 - Amendment 1: New Annex A on Lossless Encoding of PCM Frames</title>
<author fullname="" surname="ITU-T G.711 Amendment 1">
<organization></organization>
</author>
<date month="September" year="2009" />
</front>
</reference>
<!--
<reference anchor="I-D.ramalho-segments-g7110">
<front>
<title>"G.711.0 Compression Segments", draft-ramalho-g7110-segments (work in progress)., March, 2012.</title>
<author surname="M. Ramalho"></author>
<author surname="D. Wing"></author>
<author surname="M. Permual"></author>
<author surname="N. Harada"></author>
<author surname="H. kaplan"></author>
<date month="March" year="2012" />
</front>
</reference>
-->
</references>
<references title="Informative References">
<!-- Here we use entities that we defined at the beginning. -->
&RFC2629;
<reference anchor="G.729">
<front>
<title>Recommendation ITU-T G.729 - Coding of speech at 8 kbit/s using conjugate-structure
algebraic-code-excited linear prediction (CS-ACELP)</title>
<author fullname="" surname="ITU-T G.729">
<organization></organization>
</author>
<date month="January" year="2007" />
</front>
</reference>
<reference anchor="G.722">
<front>
<title>Recommendation ITU-T G.722 - 7 kHz audio-coding within 64 kbit/s</title>
<author fullname="" surname="ITU-T G.722">
<organization></organization>
</author>
<date month="November" year="1988" />
</front>
</reference>
<reference anchor="ICASSP">
<front>
<title>Emerging ITU-T Standard G.711.0 - Lossless Compression of G.711 Pulse Code Modulation, International Conference
on Acoustics Speech and Signal Processing (ICASSP), 2010, ISBN 978-1-4244-4244-4295-9</title>
<!-- ID upload barfs on Author 1 if I keep the authors like I have it below - Moving all authors to one line above
<author surname="N. Harada, M. A. Ramalho, L. Netsch, Y. Stachurski, Miao Lei, H. Taddei, and Q. Fengyan"></author>
-->
<author surname="N. Harada"></author>
<author surname="Y. Yamamoto"></author>
<author surname="T. Moriya"></author>
<author surname="Y. Hiwasaki"></author>
<author surname="M. A. Ramalho"></author>
<author surname="L. Netsch"></author>
<author surname="Y. Stachurski"></author>
<author surname="Miao Lei"></author>
<author surname="H. Taddei"></author>
<author surname="Q. Fengyan"></author>
<date month="March" year="2010" />
</front>
</reference>
<reference anchor="IEEE">
<front>
<title>Spot Me if You Can: Uncovering Spoken Phrases in Encrypted VoIP Conversations,
IEEE Symposium on Security and Privacy, 2008, ISBN: 978-0-7695-3168-7</title>
<author surname="C.V. Wright"></author>
<author surname="L. Ballard"></author>
<author surname="S.E. Coull"></author>
<author surname="F. Monrose"></author>
<author surname="G.M. Masson"></author>
<date month="May" year="2008" />
</front>
</reference>
</references>
<!-- Undo this ignore sections if I want to create an Apendix.
<section anchor="app-additional" title="Additional Stuff">
<t>This becomes an Appendix.</t>
</section>
-->
<!-- Change Log
v00 2011-06- 01 MAR Initial version
-->
</back>
</rfc>
| PAFTECH AB 2003-2026 | 2026-04-24 02:55:12 |