vdr-plugin-softhddevice-drm-gles 1.6.7
pes.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2
10#include <functional>
11#include <map>
12#include <stdexcept>
13
14#include "pes.h"
15#include "logger.h"
16#include "misc.h"
17
18#include <vdr/remux.h>
19
20extern "C" {
21#include <libavutil/avutil.h>
22}
23
35{
37 std::function<bool(const uint8_t*)> MatchSyncWord;
38 std::function<int(const uint8_t*)> GetFrameSize;
39};
40
47static const std::map<AVCodecID, CodecInfo> AudioCodecMap = {
49 .minSize = 3,
50 .MatchSyncWord = [](const uint8_t* data) -> bool {
51 constexpr uint32_t MPEG_AUDIO_SYNC_WORD = 0xFF'E000;
52 constexpr uint32_t MPEG_AUDIO_VERSION_FORBIDDEN_VALUE = 0x00'0800;
55
56 uint32_t syncWord = ReadBytes(data, 3);
57 return (syncWord & 0b1111'1111'1110'0000'0000'0000) == MPEG_AUDIO_SYNC_WORD &&
58 (syncWord & 0b0000'0000'0001'1000'0000'0000) != MPEG_AUDIO_VERSION_FORBIDDEN_VALUE &&
59 (syncWord & 0b0000'0000'0000'0110'0000'0000) != MPEG_AUDIO_LAYER_DESCRIPTION_FORBIDDEN_VALUE &&
60 (syncWord & 0b0000'0000'0000'0000'1111'0000) != MPEG_AUDIO_BITRATE_INDEX_FORBIDDEN_VALUE;
61 },
62 .GetFrameSize = [](const uint8_t* data) -> int {
63 constexpr uint16_t BitRateTable[2][4][16] = {
64 // MPEG Version 1
65 {{},
66 {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0},
67 {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0},
68 {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0}},
69 // MPEG Version 2 & 2.5
70 {{},
71 {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0},
72 {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0},
73 {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0}
74 }
75 };
76 constexpr uint16_t SampleRateTable[4] = {44100, 48000, 32000, 0};
77
78 int mpeg2 = !(data[1] & 0x08) && (data[1] & 0x10);
79 int mpeg25 = !(data[1] & 0x08) && !(data[1] & 0x10);
80 int layer = 4 - ((data[1] >> 1) & 0x03);
81 int bitRateIndex = (data[2] >> 4) & 0x0F;
82 int sampleRateIndex = (data[2] >> 2) & 0x03;
83 int padding = (data[2] >> 1) & 0x01;
84
86 if (!sampleRate)
87 throw std::invalid_argument("MPEG: invalid sample rate");
88
89 sampleRate >>= mpeg2;
91
93 if (!bitRate)
94 throw std::invalid_argument("MPEG: invalid bit rate");
95
96 bitRate *= 1000;
97 int frameSize;
98 switch (layer) {
99 case 1:
100 frameSize = (12 * bitRate) / sampleRate;
101 frameSize = (frameSize + padding) * 4;
102 break;
103 case 2:
104 frameSize = (144 * bitRate) / sampleRate;
106 break;
107 case 3:
108 default:
109 frameSize = (((mpeg2 || mpeg25) ? 72 : 144) * bitRate) / sampleRate;
111 break;
112 }
113 return frameSize;
114 }
115 }},
117 .minSize = 3,
118 .MatchSyncWord = [](const uint8_t* data) -> bool {
119 constexpr uint32_t LOAS_SYNC_WORD_MASK = 0xFFE000;
120 constexpr uint32_t LOAS_SYNC_WORD = 0x2B7 << (24-11);
121
122 uint32_t syncWord = ReadBytes(data, 3);
124 },
125 .GetFrameSize = [](const uint8_t* data) -> int {
126 return ((data[1] & 0x1F) << 8) + data[2] + 3;
127 }
128 }},
130 .minSize = 6,
131 .MatchSyncWord = [](const uint8_t* data) -> bool {
132 constexpr uint32_t AC3_SYNC_WORD_MASK = 0xFFFF00;
133 constexpr uint32_t AC3_SYNC_WORD = 0x0B77 << (24-16);
134
135 uint32_t syncWord = ReadBytes(data, 3);
137 data[5] <= (10 << 3);
138 },
139 .GetFrameSize = [](const uint8_t* data) -> int {
140 constexpr uint16_t Ac3FrameSizeTable[38][3] = {
141 {64, 69, 96}, {64, 70, 96}, {80, 87, 120}, {80, 88, 120},
142 {96, 104, 144}, {96, 105, 144}, {112, 121, 168}, {112, 122, 168},
143 {128, 139, 192}, {128, 140, 192}, {160, 174, 240}, {160, 175, 240},
144 {192, 208, 288}, {192, 209, 288}, {224, 243, 336}, {224, 244, 336},
145 {256, 278, 384}, {256, 279, 384}, {320, 348, 480}, {320, 349, 480},
146 {384, 417, 576}, {384, 418, 576}, {448, 487, 672}, {448, 488, 672},
147 {512, 557, 768}, {512, 558, 768}, {640, 696, 960}, {640, 697, 960},
148 {768, 835, 1152}, {768, 836, 1152}, {896, 975, 1344}, {896, 976, 1344},
149 {1024, 1114, 1536}, {1024, 1115, 1536}, {1152, 1253, 1728},
150 {1152, 1254, 1728}, {1280, 1393, 1920}, {1280, 1394, 1920},
151 };
152
153 int fscod = data[4] >> 6;
154 if (fscod == 0x03)
155 throw std::invalid_argument("AC3: invalid sample rate");
156
157 int frmsizcod = data[4] & 0x3F;
158 if (frmsizcod > 37)
159 throw std::invalid_argument("AC3: invalid frame size");
160
161 return Ac3FrameSizeTable[frmsizcod][fscod] * 2;
162 }
163 }},
165 .minSize = 6,
166 .MatchSyncWord = [](const uint8_t* data) -> bool {
167 constexpr uint32_t AC3_SYNC_WORD = 0x0B77 << (24-16);
168
169 uint32_t syncWord = ReadBytes(data, 3);
170 return (syncWord & 0xFFFF00) == AC3_SYNC_WORD && data[5] > (10 << 3);
171 },
172 .GetFrameSize = [](const uint8_t* data) -> int {
173 if ((data[4] & 0xF0) == 0xF0)
174 throw std::invalid_argument("E-AC3: invalid fscod fscod2");
175
176 return (((data[2] & 0x07) << 8) + data[3] + 1) * 2;
177 }
178 }},
180 .minSize = 7,
181 .MatchSyncWord = [](const uint8_t* data) -> bool {
182 constexpr uint32_t ADTS_SYNC_WORD = 0xFFF000;
183 constexpr uint32_t ADTS_LAYER = 0x000000;
185
186 uint32_t syncWord = ReadBytes(data, 3);
187 return (syncWord & 0b1111'1111'1111'0000'0000'0000) == ADTS_SYNC_WORD &&
188 (syncWord & 0b0000'0000'0000'0110'0000'0000) == ADTS_LAYER &&
189 (syncWord & 0b0000'0000'0000'0011'1100'0000) != ADTS_SAMPLING_FREQUENCY_FORBIDDEN_VALUE;
190 },
191 .GetFrameSize = [](const uint8_t* data) -> int {
192 return ((data[3] & 0x03) << 11) | ((data[4] & 0xFF) << 3) | ((data[5] & 0xE0) >> 5);
193 }
194 }},
196 .minSize = 8,
197 .MatchSyncWord = [](const uint8_t* data) -> bool {
198 constexpr uint32_t DTS_SYNC_WORD = 0x7FFE8001;
199
200 uint32_t syncWord = ReadBytes(data, 4);
201 return syncWord == DTS_SYNC_WORD;
202 },
203 .GetFrameSize = [](const uint8_t* data) -> int {
204 int frameSize = ((data[5] & 0x03) << 12) | ((data[6] & 0xFF) << 4) | ((data[7] & 0xF0) >> 4);
205 frameSize += 1;
207 throw std::invalid_argument("DTS: invalid frame size");
208
209 return frameSize;
210 }
211 }}
212};
213
214
226cPes::cPes(const uint8_t *data, int size, bool isAudio)
227 : m_data(data),
228 m_size(size),
229 m_identifier(isAudio ? "audio" : "video")
230{
231}
232
244void cPes::Init(void)
245{
246 if (IsHeaderValid() && IsStreamIdValid()) {
247 if (m_size <= 8 || PesPayloadOffset(m_data) > m_size) // header length field is at position 8 when the PES extension is present
248 LOGWARNING("pes: %s: %s packet too short: %d %02X", __FUNCTION__, m_identifier, m_size, GetStreamId());
249 else
250 m_valid = true;
251 } else if (PesLongEnough(m_size)) {
252 LOGDEBUG("pes: %s: invalid %s packet: %d %02X%02X%02X | %02X", __FUNCTION__, m_identifier, m_size, m_data[0], m_data[1], m_data[2], GetStreamId());
253 } else {
254 LOGDEBUG("pes: %s: %s packet too short: %d", __FUNCTION__, m_identifier, m_size);
255 }
256}
257
274{
275 return m_valid;
276}
277
291
300bool cPes::HasPts(void)
301{
302 return PesHasPts(m_data);
303}
304
314{
315 if (!HasPts())
316 return AV_NOPTS_VALUE;
317
318 return PesGetPts(m_data);
319}
320
330{
332}
333
343{
345}
346
365{
366 if (!PesHasLength(m_data))
367 return m_size; // Length field is 0, meaning unbounded/unspecified. Return raw data size.
368
369 return PesLength(m_data);
370}
371
372/********************************************************************************
373 * Reassembly buffer
374 *******************************************************************************/
375
386{
387 if (size == 0)
388 return nullptr;
389
391
392 if (!avpkt)
393 LOGFATAL("pes: %s: out of memory while allocating AVPacket", __FUNCTION__);
394
395 if (av_new_packet(avpkt, size)) // allocates size + AV_INPUT_BUFFER_PADDING_SIZE
396 LOGFATAL("pes: %s: out of memory while allocating AVPacket payload", __FUNCTION__);
397
398 memcpy(avpkt->data, m_buffer.Peek(), size);
399 memset(&avpkt->data[size], 0, AV_INPUT_BUFFER_PADDING_SIZE);
400
401 // Only audio:
402 // If a PES packet contains multiple frames, only the AVPacket with the first frame of that PES packet shall have a PTS value, when sending it to the decoder.
403 // The following AVPackets created from this PES packet shall have no PTS value.
404 // When retrieving PTS values from the same PES packet, they will be identical.
406 avpkt->pts = m_buffer.GetPts();
407 else
408 avpkt->pts = AV_NOPTS_VALUE;
409
411
412 m_buffer.Erase(size);
413
414 return avpkt;
415}
416
417/********************************************************************************
418 * Video specific implementation
419 *******************************************************************************/
420
433{
435 return false;
436
439
440 // Looking for the MPEG2 start code and stream type in the PES payload
443 else if (HasLeadingZero(fragment, size)) // Looking for a leading zero byte in front of the start code. Can be present in H.264/HEVC streams.
444 codecPayload++;
446 return false; // No start code: PES packet carries fragmented payload, or unknown codec.
447
448 if (size > &codecPayload[7] - fragment) {
449 if ( codecPayload[0] == H264_STREAM_TYPE && (codecPayload[1] == 0x10 || codecPayload[1] == 0xF0 || codecPayload[7] == 0x64))
451 else if (codecPayload[0] == HEVC_STREAM_TYPE && (codecPayload[1] == 0x10 || codecPayload[1] == 0x50 || codecPayload[7] == 0x40))
453 }
454
455 return m_codec != AV_CODEC_ID_NONE;
456}
457
470{
471 return size > VIDEO_FRAME_START_CODE_LEN + 1 && data[0] == 0 && ReadBytes(&data[1], VIDEO_FRAME_START_CODE_LEN) == VIDEO_FRAME_START_CODE;
472}
473
474/********************************************************************************
475 * Audio specific implementation
476 *******************************************************************************/
477
487{
489
491 return nullptr; // No sync word found in the buffer. Wait for more data.
493 LOGERROR("pes: %s: audio codec changed unexpectedly from %s to %s", __FUNCTION__, avcodec_get_name(m_codec), avcodec_get_name(detectedCodec));
494
496
497 try {
499
500 if (!packet)
501 return nullptr;
502
503 if (m_ptsInvalid) { // the PTS is invalid for this packet because the buffer was truncated before
504 packet->pts = AV_NOPTS_VALUE;
505
506 m_ptsInvalid = false;
507 }
508
509 return packet;
510 } catch (const std::invalid_argument &e) {
511 LOGWARNING("pes: %s: garbage in audio stream received: %s", __FUNCTION__, e.what());
512 // the garbage will be removed in the next call to TruncateBufferUntilFirstValidData()
513 }
514
515 return nullptr;
516}
517
528{
530
532
534
536 LOGDEBUG("pes: %s: truncated %d of %d bytes while searching for sync word", __FUNCTION__, sizeBeforeTruncation - m_buffer.GetSize(), sizeBeforeTruncation);
537 m_ptsInvalid = true;
538 }
539
540 return firstFrame.codecId;
541}
542
556{
557 while (true) {
559
560 if (firstFrame.codecId == AV_CODEC_ID_NONE) // No sync word found in the entire buffer. Keep only the last few bytes that could contain a partial sync word.
561 return SyncWordInfo{AV_CODEC_ID_NONE, std::max(0, (int)m_buffer.GetSize() - MAX_HEADER_SIZE)};
562
563 try {
564 // determine the length of the first found potential frame by reading the frame's header
565 int sizeOfFirstFrame = AudioCodecMap.at(firstFrame.codecId).GetFrameSize(&m_buffer.Peek()[firstFrame.pos]);
567
568 // check if another sync word follows immediately after the first frame to validate the header of the first frame is a real header and no random data
570 // Could not find the second sync word, because there might not be enough data in the buffer to contain a complete second sync word. Wait for more data.
571 // In case we have a false positive and the header's frame size field is invalid, we buffer the following amount of data in worst-case:
572 // - MP2: 6913 bytes (Layer 2/3: 384kbps @ 8kHz + padding)
573 // - AAC LATM: 8194 bytes (13-bit length field max: 0x1FFF + 3)
574 // - AC3: 2788 bytes (frmsizcod=37, fscod=1: 1394 * 2)
575 // - EAC3: 4096 bytes (11-bit field max: 2048 * 2)
576 // - AAC ADTS: 8191 bytes (13-bit length field max: 0x1FFF)
577 // - DTS: 8192 bytes (14-bit length field max: 0x1FFF + 1)
580 // two consecutive frames with the same sync word found, and the first frame's header length field is valid
581 return SyncWordInfo{firstFrame.codecId, firstFrame.pos};
582 } catch (const std::invalid_argument &e) {
583 // Failed to read the frame size from the first frame's header. The found sync word is a false positive.
584 }
585
586 // If we found one sync word, but did not find a second one at the expected position, the first one was a false positive in the middle of random data.
587 // In this case, continue the search one position after the start of the first found sync word.
588 m_buffer.Erase(firstFrame.pos + 1);
589 }
590}
591
604{
605 for (int i = 0; i < size; i++) {
609 }
610
611 return SyncWordInfo{AV_CODEC_ID_NONE, -1};
612}
613
626{
627 for (const auto& [codecId, codecInfo] : AudioCodecMap) {
628 if (size >= codecInfo.minSize && codecInfo.MatchSyncWord(syncWord)) {
629 return codecId;
630 }
631 }
632
633 return AV_CODEC_ID_NONE;
634}
635
650{
651 return AudioCodecMap.at(codec).GetFrameSize(data);
652}
653
665
666/********************************************************************************
667 * PTS tracking buffer
668 *******************************************************************************/
669
680void cPtsTrackingBuffer::Push(const uint8_t *data, int size, int64_t pts)
681{
682 if (pts != AV_NOPTS_VALUE) // PES packets not starting with a new frame (fragmented data) have no PTS
683 m_pts[m_data.size()] = pts;
684
685 m_data.insert(m_data.end(), data, data + size);
686}
687
702{
703 if (m_data.empty() || amount == 0)
704 return;
705
706 if (amount > m_data.size()) {
707 LOGERROR("pes: %s: %s: erase amount %zu exceeds buffer size %zu!", __FUNCTION__, m_identifier, amount, m_data.size());
708 amount = m_data.size();
709 }
710
711 // Only PES packets have PTS values, but not the (fragmented) frames inside.
712 // The reassembled frame's PTS value will become the PTS value of the PES packet where the frame starts.
713 // Therefore, always keep the PTS value for position 0 in the buffer, which is the PTS value of the PES packet where the frame starts.
714 // This is normally the largest PTS value to be removed, or, if future position 0 already has a PTS value, that value will be used.
716 auto it = m_pts.upper_bound(amount);
717 if (it == m_pts.begin())
718 LOGFATAL("pes: %s: %s: no PTS value found for position 0 after erasing %zu bytes", __FUNCTION__, m_identifier, amount);
719 else {
720 --it; // Move to the last entry before 'amount'
721 smallestPts = it->second;
722 }
723
724 std::map<size_t, int64_t> adjusted_pts;
725 for (const auto& [pos, pts] : m_pts) {
726 if (pos >= amount) // erase all PTS entries for data that will be removed
727 adjusted_pts[pos - amount] = pts; // adjust remaining PTS entries to the new data indices
728 }
729 m_pts = std::move(adjusted_pts);
730
731 m_pts[0] = smallestPts;
732
733 m_data.erase(m_data.begin(), m_data.begin() + amount);
734}
735
745{
746 if (m_pts.empty())
747 return AV_NOPTS_VALUE;
748
749 return m_pts.begin()->second;
750}
static constexpr uint32_t PES_PACKET_START_CODE_PREFIX
Definition pes.h:53
bool m_valid
flag indicating if the PES packet is valid
Definition pes.h:46
virtual bool IsStreamIdValid(void)=0
int m_size
size of the PES packet
Definition pes.h:48
const uint8_t * GetPayload(void)
Get a pointer to the PES payload data.
Definition pes.cpp:329
const uint8_t * m_data
pointer to the raw PES packet data
Definition pes.h:47
uint8_t GetStreamId(void)
Definition pes.h:39
cPes(const uint8_t *, int, bool)
Create a PES packet parser.
Definition pes.cpp:226
int GetPayloadSize(void)
Get the size of the PES payload.
Definition pes.cpp:342
bool HasPts(void)
Check if the PES packet contains a Presentation Time Stamp (PTS)
Definition pes.cpp:300
const char * m_identifier
identifier string for logging
Definition pes.h:49
bool IsHeaderValid(void)
Check if the PES header is valid.
Definition pes.cpp:287
int GetPacketLength(void)
Get the total length of the PES packet.
Definition pes.cpp:364
bool IsValid(void)
Check if the PES packet is valid.
Definition pes.cpp:273
int64_t GetPts(void)
Get the Presentation Time Stamp (PTS) from the PES header.
Definition pes.cpp:313
void Init(void)
Initialize and validate the PES packet.
Definition pes.cpp:244
int64_t GetPts(void)
Get the PTS value for the current buffer position.
Definition pes.cpp:744
int GetSize(void)
Definition pes.h:105
void Push(const uint8_t *, int, int64_t)
Push data into the PTS tracking buffer.
Definition pes.cpp:680
std::vector< uint8_t > m_data
Byte buffer.
Definition pes.h:110
std::map< size_t, int64_t > m_pts
Map of buffer positions to PTS values.
Definition pes.h:109
const uint8_t * Peek(void)
Definition pes.h:103
void Reset(void)
Definition pes.h:104
void Erase(size_t)
Erase data from the beginning of the buffer.
Definition pes.cpp:701
const char * m_identifier
Definition pes.h:108
AVCodecID TruncateBufferUntilFirstValidData(void)
Truncate buffer until the first valid audio frame.
Definition pes.cpp:527
static constexpr int MAX_HEADER_SIZE
Definition pes.h:188
AVCodecID DetectCodecFromSyncWord(const uint8_t *, int)
Detect audio codec from sync word pattern.
Definition pes.cpp:625
SyncWordInfo FindSyncWord(const uint8_t *, int)
Find the first audio sync word in data.
Definition pes.cpp:603
SyncWordInfo FindTwoConsecutiveFramesWithSameSyncWord()
Find two consecutive audio frames with the same sync word.
Definition pes.cpp:555
AVPacket * PopAvPacket(void) override
Pop an audio AVPacket from the reassembly buffer.
Definition pes.cpp:486
bool m_ptsInvalid
flag indicating if PTS is invalid for current buffer, because it was truncated
Definition pes.h:189
int GetFrameSizeForCodec(AVCodecID, const uint8_t *)
Get the frame size for a given codec and frame header.
Definition pes.cpp:649
static constexpr uint32_t VIDEO_FRAME_START_CODE
Definition pes.h:152
static constexpr int VIDEO_FRAME_START_CODE_LEN
Definition pes.h:153
static constexpr uint8_t H264_STREAM_TYPE
Definition pes.h:156
static constexpr uint8_t MPEG2_STREAM_TYPE
Definition pes.h:155
bool HasLeadingZero(const uint8_t *, int)
Check if video data has a leading zero byte before the start code.
Definition pes.cpp:469
static constexpr uint8_t HEVC_STREAM_TYPE
Definition pes.h:157
bool ParseCodecHeader(const uint8_t *, int)
Parse video codec header to detect codec type.
Definition pes.cpp:432
AVCodecID m_codec
detected codec ID
Definition pes.h:132
void Reset(void)
Reset the reassembly buffer.
Definition pes.cpp:659
int64_t m_lastPoppedPts
PTS of the last popped AVPacket.
Definition pes.h:134
virtual AVPacket * PopAvPacket(void)=0
cPtsTrackingBuffer m_buffer
fragmentation buffer
Definition pes.h:133
#define LOGDEBUG
log to LOG_DEBUG
Definition logger.h:45
#define LOGERROR
log to LOG_ERR
Definition logger.h:39
#define AV_NOPTS_VALUE
Definition misc.h:74
#define LOGWARNING
log to LOG_WARN
Definition logger.h:41
#define LOGFATAL
log to LOG_ERR and abort
Definition logger.h:37
static uint32_t ReadBytes(const uint8_t *data, int count)
Return count amount of bytes from data
Definition misc.h:149
static const std::map< AVCodecID, CodecInfo > AudioCodecMap
Map of Audio Codec Information.
Definition pes.cpp:47
Logger Header File.
Misc Functions.
PES Packet Parser Header File.
Codec Information Structure.
Definition pes.cpp:35
std::function< int(const uint8_t *)> GetFrameSize
Definition pes.cpp:38
int minSize
Definition pes.cpp:36
std::function< bool(const uint8_t *)> MatchSyncWord
Definition pes.cpp:37
Information about a detected audio sync word.
Definition pes.h:165