http://stupid.domain.name/ietf/

One document matched: draft-garcia-mmusic-multiple-ptimes-problem-01.xml
<?xml version="1.0" encoding="us-ascii"?>
<!DOCTYPE rfc SYSTEM "http://xml.resource.org/authoring/rfc2629.dtd" [
  <!ENTITY RFC4566 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4566.xml">
  <!ENTITY RFC3264 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3264.xml">
  <!ENTITY RFC3890 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3890.xml">
  <!ENTITY RFC3108 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3108.xml">
  <!ENTITY RFC3485 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3485.xml">
  <!ENTITY RFC4504 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4504.xml">

  <!ENTITY RFC2326 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2326.xml">
  <!ENTITY RFC3435 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3435.xml">
  <!ENTITY RFC3441 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3441.xml">
  <!ENTITY RFC3525 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3525.xml">
  <!ENTITY RFC4102 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4102.xml">

  <!ENTITY RFC3551 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3551.xml">
  <!ENTITY RFC3016 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3016.xml">
  <!ENTITY RFC3047 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3047.xml">
  <!ENTITY RFC3267 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3267.xml">
  <!ENTITY RFC3557 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3557.xml">
  <!ENTITY RFC3558 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3558.xml">
  <!ENTITY RFC3952 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3952.xml">
  <!ENTITY RFC4040 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4040.xml">
  <!ENTITY RFC4060 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4060.xml">
  <!ENTITY RFC4184 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4184.xml">
  <!ENTITY RFC4298 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4298.xml">
  <!ENTITY RFC4348 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4348.xml">
  <!ENTITY RFC4352 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4352.xml">
  <!ENTITY RFC4856 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4856.xml">
]>
<?xml-stylesheet type="text/xsl" href="http://xml.resource.org/authoring/rfc2629.xslt" ?>

<?rfc strict="yes" ?>
<?rfc toc="yes" ?>
<?rfc tocdepth="4" ?>
<?rfc symrefs="no" ?>
<?rfc sortrefs="no" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="yes" ?>

<rfc category="info" docName="draft-garcia-mmusic-multiple-ptimes-problem-01.txt" ipr="full3978">
  <front>
    <title abbrev="Multiple ptime in SDP">
      Multiple Packetization Times in the Session Description Protocol (SDP):
      Problem Statement & Requirements
    </title>
    <author initials="M." surname="Garcia-Martin" fullname="Miguel A. Garcia-Martin">
      <organization>Nokia Siemens Networks</organization>
      <address>
        <postal>
          <street>P.O.Box 6</street>
          <city>Nokia Siemens Networks</city>
          <region>FIN</region>
          <code>02022</code>
          <country>Finland</country>
        </postal>
        <email>miguel.garcia@nsn.com</email>
      </address>
    </author>
    <author initials="M." surname="Willekens" fullname="Marc Willekens">
      <organization>Nokia Siemens Networks</organization>
      <address>
        <postal>
          <street>Atealaan 34</street>
          <city>Herentals</city>
          <region>BE</region>
          <code>2200</code>
          <country>Belgium</country>
        </postal>
        <email>marc.willekens@nsn.com</email>
      </address>
    </author>
    <author initials="P." surname="Xu" fullname="Peili Xu">
      <organization>Huawei Technologies</organization>
      <address>
        <postal>
          <street>Bantian</street>
          <city>Longgang</city>
          <region>Shenzhen</region>
          <code>518129</code>
          <country>China</country>
        </postal>
        <email>xupeili@huawei.com</email>
      </address>
    </author>
    <date day="16" month="November" year="2007" />
    <area>RAI</area>
    <workgroup>MMUSIC Working Group</workgroup>
    <keyword>SDP</keyword>
    <keyword>ptime</keyword>
    <keyword>multiple</keyword>
    <abstract>
      <t>
        This document provides a problem statement and requirements with respect to the
        presence of a single packetization time (ptime/maxptime) attribute in SDP media
        descriptions that contain several media formats (audio codecs).
      </t>
    </abstract>
  </front>
  <middle>
    <section title="Introduction">
      <t>
        The <xref target="RFC4566">Session Description Protocol (SDP)</xref>
        provides a protocol to describe multimedia sessions
        for the purposes of session announcement, session invitation,
        and other forms of multimedia session initiation. A session
        description in SDP includes the session name and purpose, the
        media comprising the session, information needed to receive the
        media (addresses, ports, formats, etc.) and some other
        information.
      </t>
      <t>
        In the SDP media description part, the m-line contains the
        media type (e.g. audio), a transport port, a transport
        protocol (e.g. RTP/AVP) and a media format description which
        depends on the transport protocol.
      </t>
      <t>
        For the transport protocol RTP/AVP or RTP/SAVP, the media
        format sub-field can contain a list of RTP payload type
        numbers.
        See <xref target="RFC3551">
          RTP Profile for Audio and
          Video Conferences with Minimal Control
        </xref>, Table 4. For
        example: "m=audio 49232 RTP/AVP 3 15 18" indicates the audio
        encoders GSM, G728 and G729.
      </t>
      <t>
        Further, the media description part can contain additional
        attribute lines that complement or modify the media
        description line. Of interest for this memo are the 'ptime'
        and 'maxptime' attributes.
        According to <xref target="RFC4566">RFC 4566</xref>, the
        'ptime' attribute gives
        the length of time in milliseconds represented by the media in
        a packet, and the 'maxptime' gives the maximum amount of media
        that can be encapsulated in each packet, expressed as time in
        milliseconds. These attributes modify the whole media
        description line, which can contain an extensive list of
        payload types. In other words, these attributes are not
        specific to a given codec.
      </t>
      <t>
        The <xref target="RFC4566">RFC 4566</xref> also indicates that it
        should not be necessary to know ptime to decode RTP or vat
        audio since the 'ptime' attribute is intended as a
        recommendation for the encoding/packetization of
        audio. However, once more, the existing 'ptime' attribute
        defines the desired packetization time for all the payload
        types defined in the corresponding media description line.
      </t>
      <t>
        End-devices can sometimes be configured with different codecs and for
        each codec a different packetization time can be
        indicated. However, there is no clear way to exchange this
        type of information between different user agents and this can
        result in lower voice quality, network problems or performance
        problems in the end-devices.
      </t>
    </section>
    <section title="Some Definitions">
      <t>
        The
        <xref target="RFC4566">Session Description Protocol (SDP)</xref>
        defines the ptime and maxptime as:
      </t>
      <t>a=ptime:[packet time]</t>
      <t>
        This gives the length of time in milliseconds represented by
        the media in a packet. This is probably only meaningful for
        audio data, but may be used with other media types if it makes
        sense. It should not be necessary to know ptime to decode RTP
        or vat audio, and it is intended as a recommendation for the
        encoding/packetization of audio. It is a media-level
        attribute, and it is not dependent on charset.
      </t>
      <t>a=maxptime:[maximum packet time]</t>
      <t>
        This gives the maximum amount of media that can be encapsulated
        in each packet, expressed as time in milliseconds. The time
        SHALL be calculated as the sum of the time the media present in
        the packet represents. For frame-based codecs, the time SHOULD
        be an integer multiple of the frame size. This attribute is
        probably only meaningful for audio data, but may be used with
        other media types if it makes sense. It is a media-level
        attribute, and it is not dependent on charset. Note that this
        attribute was introduced after RFC 2327, and non-updated
        implementations will ignore this attribute.
      </t>
      <t>
        Additional encoding parameters MAY be defined in the future,
        but codec-specific parameters SHOULD NOT be added. Parameters
        added to an "a=rtpmap:" attribute SHOULD only be those required
        for a session directory to make the choice of appropriate media
        to participate in a session. Codec-specific parameters should
        be added in other attributes (for example, "a=fmtp:").
      </t>
      <t>
        Note: RTP audio formats typically do not include information
        about the number of samples per packet. If a non-default (as
        defined in the RTP Audio/Video Profile) packetization is
        required, the "ptime" attribute is used as given above.
      </t>
    </section>
    <section title="Some references">
      <t>
        Many RFCs make references to the "ptime/maxptime" attribute to 
        give some definitions, recommendations, requirements, default values.
      </t>
      <t>
        <xref target="RFC4566">SDP</xref>
        gives definitions for ptime/maxptime.
      </t>
      <t>
        <xref target="RFC3264">SDP Offer/answer model</xref>
        gives some requirements for the ptime for the offerer and answerer.

        If the ptime attribute is present for a stream, it indicates the
        desired packetization interval that the offerer would like to
        receive. The ptime attribute MUST be greater than zero.

        The answerer MAY include a non-zero ptime attribute for any media
        stream; this indicates the packetization interval that the answerer
        would like to receive.
        There is no requirement that the packetization interval be the same
        in each direction for a particular stream.
      </t>
      <t>
        <xref target="RFC3890">SDP Transport independent bandwidth modifier</xref>
        indicates that the ptime may be a possible candidate for the bandwidth but 
        it should be avoided to be used for that purpose. The use of another
        parameter is proposed.
      </t>
      <t>
        <xref target="RFC3108">SDP Conversions for ATM bearer</xref>
        It is not recommended that the ptime be
        used in ATM applications since packet period information is
        provided with other parameters (e.g., the profile type and
        number in the 'm' line, and the 'vsel', 'dsel' and 'fsel'
        attributes).  Also, for AAL1 applications, 'ptime' is not
        applicable and should be flagged as an error.  If used in AAL2
        and AAL5 applications, 'ptime' should be consistent with the
        rest of the SDP description.

        The 'vsel', 'dsel' and 'fsel' attributes refer generically
        to codec-s.  These can be bed for service-specific codec negotiation and
        assignment in non-ATM s well as ATM applications.

        The 'vsel' attribute indicates a prioritized list of one or more 3-
        tuples for voice service.  Each 3-tuple indicates a codec, an
        optional packet length and an optional packetization period.  This
        complements the 'm' line information and should be consistent with
        it.

        The 'vsel' attribute refers to all directions of a connection.  For a
        bidirectional connection, these are the forward and backward
        directions.  For a unidirectional connection, this can be either the
        backward or forward direction.

        The 'vsel' attribute is not meant to be used with bidirectional
        connections that have asymmetric codec configurations described in a
        single SDP descriptor.  For these, the 'onewaySel' attribute 
        should be used.
        
        The 'vsel' line is structured with an encodingName, a packetLength and a
        packetTime.
        
        The packetLength is a decimal integer
        representation of the packet length in octets.  The packetTime is a
        decimal integer representation of the packetization interval in
        microseconds.  The parameters packetLength and packetTime can be
        set to "-" when not needed.  Also, the entire 'vsel' media attribute
        line can be omitted when not needed.
      </t>
      <t>
        <xref target="RFC3485">SIP/SDP static dictionary for SigComp</xref>
      </t>
      <t>
        <xref target="RFC4504">SIP device requirements and configuration</xref>
        In some cases, operators want to control which codecs may be used in
        their network.  The desired subset of codecs supported by the device
        SHOULD be configurable along with the order of preference.  Service
        providers SHOULD have the possibility of plugging in their own codecs
        of choice.  The codec settings MAY include the packet length and
        other parameters like silence suppression or comfort noise
        generation.

        The set of available codecs will be used in the codec negotiation
        according to RFC3264.
        Example: Codecs="speex/8000;ptime=20;cng=on,gsm;ptime=30"
      </t>
      <t>
        <xref target="RFC2326">RTSP</xref>
        Format-specific parameters are conveyed using the "fmtp" media
        attribute. The syntax of the "fmtp" attribute is specific to the
        encoding(s) that the attribute refers to. Note that the packetization
        interval is conveyed using the "ptime" attribute.
      </t>
      <t>
        <xref target="RFC3435">MGCP</xref>
        The packetization period in milliseconds, encoded as the keyword
        "p", followed by a colon and a decimal number.  If the Call Agent
        specifies a range of values, the range will be specified as two
        decimal numbers separated by a hyphen (as specified for the "ptime"
        parameter for SDP).
      </t>
      <t>
        <xref target="RFC3441">MGCP ATM package</xref>
        Packet time changed ("ptime(#)"):

        If armed via an R:atm/ptime, a media gateway signals a packetization
        period change through an O:atm/ptime.  The decimal number in
        parentheses is optional.  It is the new packetization period in
        milliseconds.  In AAL2 applications, the pftrans event can be used to
        cover packetization period changes (and codec changes).

        Voice codec selection (vsel): This is a prioritized list of one or
        more 3-tuples describing voice service.  Each vsel 3-tuple indicates
        a codec, an optional packet length and an optional packetization
        period.
      </t>
      <t>
        <xref target="RFC3525">Gateway control protocol</xref>
      </t>
      <t>
        <xref target="RFC4102">Registration MIME text/red sub-type</xref>
      </t>
      <t>
        <xref target="RFC3551">RTP/AVP</xref>
      </t>
      <t>
        <xref target="RFC3016">RTP payload for MPEG4 A/V</xref>
      </t>
      <t>
        <xref target="RFC3047">RTP payload for G.711.1</xref>
      </t>
      <t>
        <xref target="RFC3267">RTP payload for AMR, AMR-WB</xref>
        The maxptime SHOULD be a multiple of the frame size. If this parameter is not
        present, the sender MAY encapsulate any number of speech
        frames into one RTP packet.
      </t>
      <t>
        <xref target="RFC3557">RTP payload for distributed speech recognition</xref>
        The maxptime SHOULD be a multiple of the frame pair size (20 ms)
        If this parameter is not present, maxptime is assumed to be 80ms.

        Note, since the performance of most speech recognizers are
        extremely sensitive to consecutive FP losses, if the user of the
        payload format expects a high packet loss ratio for the session,
        it MAY consider to explicitly choose a maxptime value for the
        session that is shorter than the default value.
        </t>
      <t>
        <xref target="RFC3558">RTP payload for EVRC and SMV</xref>
        The parameters maxptime and maxinterleave are exchanged at the
        initial setup of the session.  In one-to-one sessions, the sender
        MUST respect these values set be the receiver, and MUST NOT
        interleave/bundle more packets than what the receiver signals that it
        can handle.  This ensures that the receiver can allocate a known
        amount of buffer space that will be sufficient for all
        interleaving/bundling used in that session.  During the session, the
        sender may decrease the bundling value or interleaving length (so
        that less buffer space is required at the receiver), but never exceed
        the maximum value set by the receiver.  This prevents the situation
        where a receiver needs to allocate more buffer space in the middle of
        a session but is unable to do so.

        Additionally, senders have the following restrictions:

        MUST NOT bundle more codec data frames in a single RTP packet than
        indicated by maxptime (see Section 12) if it is signaled.

        SHOULD NOT bundle more codec data frames in a single RTP packet
        than will fit in the MTU of the underlying network.

        If maxptime is not signaled, the default maxptime value SHALL be 200 milliseconds.
      </t>
      <t>
        <xref target="RFC3952">RTP payload for iLBC</xref>
        The maxptime SHOULD be a multiple of
        the frame size.  This attribute is probably only meaningful
        for audio data, but may be used with other media types if it
        makes sense.  It is a media attribute, and is not dependent
        on charset.  Note that this attribute was introduced after
        RFC 2327, and non updated implementations will ignore this
        attribute.

        Parameter ptime can not be used for the purpose of specifying iLBC
        operating mode, due to fact that for the certain values it will be
        impossible to distinguish which mode is about to be used (e.g., when
        ptime=60, it would be impossible to distinguish if packet is carrying
        2 frames of 30 ms or 3 frames of 20 ms, etc.).
      </t>
      <t>
        <xref target="RFC4040">RTP payload for 64 kbps transparent call</xref>
      </t>
      <t>
        <xref target="RFC4060">RTP payload for distributed speech recognition</xref>
        If maxptime is not present, maxptime is assumed to be 80ms.

        Note, since the performance of most speech recognizers are
        extremely sensitive to consecutive FP losses, if the user of the
        payload format expects a high packet loss ratio for the session,
        it MAY consider to explicitly choose a maxptime value for the
        session that is shorter than the default value.
      </t>
      <t>
        <xref target="RFC4184">RTP payload for AC-3</xref>
      </t>
      <t>
        <xref target="RFC4298">RTP payload for broadVoice speech</xref>
        The maxptime
        SHOULD be a multiple of the duration of a single codec data
        frame (5 ms).
      </t>
      <t>
        <xref target="RFC4348">RTP payload for VMR-WB</xref>
        The parameters "maxptime" and "ptime" should in most cases not
        affect the interoperability; however, the setting of the parameters
        can affect the performance of the application.
      </t>
      <t>
        <xref target="RFC4352">RTP payload for AMR-WB+</xref>
      </t>
      <t>
        <xref target="RFC4856">RTP payload MIME type registration</xref>
      </t>

    </section>
    <section title="Problem Statement">
      <t>
        The packetization time is an important parameter which helps
        in reducing the packet overhead. Many voice codecs use a
        certain frame length to determine the coded voice filter
        parameters and try to find a certain optimum between the
        perceived voice quality (measured by the Mean Option Score
        (MOS) factor), and the required bitrate. When a packet
        oriented network is used for the transfer, the packet header
        induces an additional overhead.  As such, it makes sense to
        try to combine different voice frame data in one packet (up to
        a Maximum Transmission Unit (MTU)) to find a good balance
        between the required network resources, end-device resources
        and the perceived voice quality influenced by packet loss,
        packet delay, jitter.  When the packet size decreases, the
        bandwidth efficiency is reduced. When the packet size
        increases, the packetization delay can have a negative impact
        on the perceived voice quality.
      </t>
      <t>
        The <xref target="RFC3551">
          RTP Profile for Audio and Video
          Conferences with Minimal Control
        </xref>, Table 1, indicates
        the frame size and default packetization time for different
        codecs. The G728 codec has a frame size of 2.5 ms/frame and
        a default packetization time of 20 ms/packet.  For G729
        codec, the frame size is 10 ms/frame and a default
        packetization time of 20 ms/packet.
      </t>
      <t>
        When more and more telephony traffic is carried over
        IP-networks, the quality as perceived by the end-user should
        be no worse as the classical telephony services. For VoIP
        service providers, it is very important that endpoints receive
        audio with the best possible codec and packetization time. In
        particular, the packetization time depends on the selected
        codec for the audio communication and other factors, such as
        the Maximum Transmission Unit (MTU) of the network and the
        type of access network technology.
      </t>
      <t>
        As such, the packetization time is clearly a function of the
        codec and the network access technology. During the
        establishment of a new session or a modification of an existing
        session, an endpoint should be able to express its preference
        with respect to the packetization time for each codec. This would
        mean that the creator of the SDP prefers the remote endpoint to
        use certain packetization time when sending media with that
        codec.
      </t>
      <t>
        The <xref target="RFC4566">RFC 4566</xref> provides the means for
        expressing a packetization time that affects all the payload
        types declared in the media description line. So, there are no
        means to indicate the desired packetization time on a per
        payload type basis. Implementations have been using
        proprietary mechanisms for indicating the packetization time
        per payload type, leading to lack of interoperability in this
        area. One of these mechanisms is the 'maxmptime' attribute,
        defined in the
        <xref target="ITU.V152">ITU-T Recommendation V.152</xref>,
        which "indicates the supported packetization
        period for all codec payload types". Another one is the
        'mptime' attribute, defined in the
        <xref target="PKT.PKT-SP-EC-MGCP">
          PacketCable Network-Based Call
          Signaling Protocol Specification
        </xref>,
        which indicates "a
        list of packetization period values the endpoint is capable of
        using (sending and receiving) for this connection". While all
        have similar semantics, there is obviously no interoperability
        between them, creating a nightmare for the implementer who
        happens to be defining a common SDP stack for different
        applications.
      </t>
      <t>
        A few RTP payload format descriptions, such as
        <xref target="RFC3267">RFC 3267</xref>,
        <xref target="RFC3016">RFC 3016</xref>, and
        <xref target="RFC3952">RFC 3952</xref>
        indicate that the packetization time for such payload should
        be indicated in the 'ptime' attribute in SDP. However, since
        the 'ptime' attribute affects all the payload formats included
        in the media description line, it would not be possible to
        create a media description line that contains all the
        mentioned payload formats and different packetization
        times. The solutions range from considering a single
        packetization time for all the payload types, or creating a
        media description line that contains a single payload type.
      </t>
      <t>
        The issue of a given packetization for a specific codec has
        been captured in past RFCs. For example,
        <xref target="RFC4504">RFC 4504</xref>
        contains a set of
        requirements for SIP telephony devices. Section 3.8 in that
        RFC also provides background information for the need of
        packetization time, which could be set by either the user or
        the administrator of the device, on a per codec
        basis. However, once more, if several payload formats are
        offered in the same media description line in SDP, there is no
        way to indicate different packetizations per payload format..
      </t>
      <t>
        Below is an example which indicates how the ptime can cause
        interworking problems between different implementations.
      </t>
      <figure align="center" title="Example1">
        <artwork>
          <![CDATA[
m=audio 1234 RTP/AVP 0 4 8
ptime=30 ]]>
        </artwork>
      </figure>
      <t>
        The media formats 0 and 8 are PCM U and A-law which are sample based codecs
        with a default packetization time of 20 ms. However, a packetization time of
        30 ms can also be used. The media format 4 is a G723 frame based codec with a
        frame size of 30 ms. As such, the most common ptime for all these different
        codecs is 30 ms.
        When the receiver uses this ptime to initialize its buffer for its voice
        samples based on this 30 ms value and when the sender however is sending the
        media with the PCMU codec with its default packetization time of 20 ms, then
        the receiver has to wait for another voice packet before its buffer can be
        filled-up for a total duration of 30 ms. And this can cause disruptions in the
        synchronous playback of the digitized voice.
      </t>
    </section>
    <section title="Requirements">
      <t>
        The main requirement is coming from the implementation and media gateway
        community making use of hardware based solutions, e.g. DSP or FPGA
        implementations with silicon constraints for the amount of buffer space.
      </t>
      <t>
        Some are making use of the ptime/codec information to make certain QoS budget
        calculations.
        When the packetization time is known for a codec with a certain
        frame size and frame datarate, the efficiency of the throughput
        can be calculated.
      </t>
      <t>
        Currently, the ptime and maxptime are "indication" attributes are optional.
        When these parameters are used for resource reservation and for hardware
        initializations, a negotiated value between the "offerer" and "answerer" becomes
        a requirement.
      </t>
      <t>
        There could be different sources for the ptime/maxptime, i.e. from RTP/AVP
        profile, from end-user device configuration, from network operator, from
        intermediaries, from receiver.
      </t>
      <t>
        The codec and ptime/maxptime in uplink and downlink can be different.
      </t>
    </section>
    <section title="Solutions already proposed">
      <t>
        During last years, different solutions were already proposed and
        implemented with the goal to make the ptime in function of the codec
        instead of the media containing is list of codecs.
        The purpose of this list is only to indicate what kind of logical
        proposals were already made to find a solution for the SDP interworking
        issues due to implementation and RFC interpretations.
        It's just a list and does not impose any preference for a certain solution.
      </t>
      <t>
        In all these proposals, a semantic grouping of the codec specific
        information is made by giving a new interpretation of the sequence
        of the parameters or by providing new additional attributed.
      </t>
      <t>
        All these methods are against the basic rule indicated in the RFCs which
        state that a ptime and maxptime are media specific and NOT codec specific.
        It does not solve the interworking issues. Instead, it makes it worse due
        to many new interpretations and implementations.
      </t>
      <t>
        To avoid a further divergence, the implementation community is strongly
        asking for a standardized solution.
      </t>
      <section title="Method 1">
        <t>
          Write the rtpmap first, followed by the ptime when it is related to the
          codec.
        </t>
        <figure align="center" title="Method1">
          <artwork>
            <![CDATA[
m=audio 1234 RTP/AVP 4 0
a=rtpmap:4 G723/8000
a=rtpmap:0 PCMU/8000
a=ptime:20
a=fmtp:4 bitrate=6400 ]]>
          </artwork>
        </figure>
        <t>
          Some SDP encoders first write the media line, followed by the rtpmaps and
          then the value attributes.
        </t>
      </section>
      <section title="Method 2">
        <t>
          Grouping of all codec specific information together.
        </t>
        <figure align="center" title="Method2">
          <artwork>
            <![CDATA[
m=audio 1234 RTP/AVP 4 0
a=rtpmap:4 G723/8000
a=fmtp:4 bitrate=6400
a=rtpmap:0 PCMU/8000
a=ptime:20 ]]>
          </artwork>
        </figure>
        <t>
          Most implementers are in favor of this proposal, i.e. writing the value
          attributes associated with an rtpmap listed immediately after it.
        </t>
      </section>
      <section title="Method 3">
        <t>
          Use the ptime for every codec after its rtpmap definition.
        </t>
        <figure align="center" title="Method3">
          <artwork>
            <![CDATA[
m=audio 1234 RTP/AVP 0 18  4
a=rtpmap:18 G729/8000
a=ptime:30

a=rtpmap:0 PCMU/8000
a=ptime:40

a=rtpmap:4 G723/8000
a=ptime:60 ]]>
          </artwork>
        </figure>
      </section>
      <section title="Method 4">
        <t>
          Create a new "mptime" (multiple ptime) attribute with a construct
          similar to the m-line.
        </t>
        <figure align="center" title="Method4">
          <artwork>
            <![CDATA[
m=audio 1234 RTP/AVP 0 18  4
a=mptime 40 30 60 ]]>
          </artwork>
        </figure>
      </section>
      <section title="Method 5">
        <t>
          Use of a new "x-ptime" attribute
        </t>
      </section>
      <section title="Method 6">
        <t>
          Use of different m-lines with one codec per m-line
        </t>
        <figure align="center" title="Method6">
          <artwork>
            <![CDATA[
m=audio 1234 RTP/AVP 0
a=rtpmap:0 PCMU/8000
a=ptime:40

m=audio 1234 RTP/AVP 18
a=rtpmap:18 G729/8000
a=ptime:30

m=audio 1234 RTP/AVP 4
a=rtpmap:4 G723/8000
a=ptime:60 ]]>
          </artwork>
        </figure>
      </section>
      <section title="Method 7">
        <t>
          Use of the ptime in the fmtp attribute
        </t>
        <figure align="center" title="Method7">
          <artwork>
            <![CDATA[
m=audio 1234 RTP/AVP 4 18
a=rtpmap:18 G729/8000
a=fmtp:18 annexb=yes;ptime=20
a=maxptime:40

a=rtpmap 4 G723/8000
a=fmtp:4 bitrate=6.3;annexa=yes;ptime=30
a=maxptime:60 ]]>
        </artwork>
        </figure>
      </section>
      <section title="Method 8">
        <t>
          Use of the vsel parameter as done for ATM bearer connections
          Following example indicates first preference of G.729 or G.729a (both are
          interoperable) as the voice encoding scheme.  A packet length of 10
          octets and a packetization interval of 10 ms are associated with this
          codec.  G726-32 is the second preference stated in this line, with an
          associated packet length of 40 octets and a packetization interval of
          10 ms.  If the packet length and packetization interval are intended
          to be omitted, then this media attribute line contains '-'.
        </t>
        <figure align="center" title="Method8">
            <artwork>
              <![CDATA[
a=vsel:G729 10 10000 G726-32 40 10000
a=vsel:G729 - - G726-32 - -]]>
            </artwork>
          </figure>
      </section>
      <section title="Method 9">
        <t>
          Method 9: use of V.152 "maxmptime" attribute
        </t>
      </section>
      <section title="Method 10">
        <t>
          Method 10: use of PacketCable "mptime" attribute
        </t>
      </section>
    </section>
    <section title="Conclusion and next steps">
      <t>
        This memo advocates for the need of a standardized mechanism to
        indicate the packetization time on a per codec basis, allowing
        the creator of SDP to include several payload formats in the
        same media description line with different packetization
        times.
      </t>
      <t>
        This memo encourage discussion in the MMUSIC WG mailing list
        in the IETF. The ultimate goal is to define a standard
        mechanism that fulfils the requirements highlighted in this
        memo.
      </t>
      <t>
        The goal is finding a solution which does not require changes in
        implementations which have followed the existing RFC guidelines and
        which are able to receive any packetization time.
      </t>
      <t>
        A clear solution has to be described for the resource constraint problem
        in hardware based solutions. Either this is an extension/modification of
        the current SDP or a clarification how certain issues can be solved with
        the existing RFCs.
      </t>
    </section>
    <section title="Security Considerations" anchor="sec-security">
      <t>
        This memo discusses a problem statement and requirements. As
        such, no protocol that can suffer attacks is defined.
      </t>
    </section>
    <section title="IANA Considerations" anchor="sec-iana">
      <t>
        This document does not request IANA to take any action.
      </t>
    </section>
  </middle>
  <back>
    <references title="Normative References">
      &RFC4566;
      &RFC3264;
    </references>
    <references title="Informative References">
      <reference anchor="ITU.V152">
        <front>
          <title>Procedures for supporting voice-band data over IP networks</title>
          <author fullname="ITU-T">
            <organization>ITU-T</organization>
          </author>
          <date year="2005" month="January" />
        </front>
        <seriesInfo name="ITU-T Recommendation" value="V.152"/>
        <format type="pdf" target="http://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-V.152-200501-I!!PDF-E&type=items"/>
      </reference>
      <reference anchor="PKT.PKT-SP-EC-MGCP">
        <front>
          <title>PacketCable Network-Based Call Signaling Protocol Specification</title>
          <author fullname="PacketCable">
            <organization>PacketCable</organization>
          </author>
          <date year="2005" month="August" day="12" />
        </front>
        <seriesInfo name="PacketCable" value="PKT-SP-EC-MGCP-I11-050812"/>
        <format type="pdf" target="http://www.packetcable.com/downloads/specs/PKT-SP-MGCP-I11-050812.pdf" />
      </reference>

      &RFC3890;
      &RFC3108;
      &RFC3485;
      &RFC4504;

      &RFC2326;
      &RFC3435;
      &RFC3441;
      &RFC3525;
      &RFC4102;

      &RFC3551;
      &RFC3016;
      &RFC3047;
      &RFC3267;
      &RFC3557;
      &RFC3558;
      &RFC3952;
      &RFC4040;
      &RFC4060;
      &RFC4184;
      &RFC4298;
      &RFC4348;
      &RFC4352;
      &RFC4856;
    </references>
  </back>
</rfc>
PAFTECH AB 2003-2026
2026-04-23 08:26:56