Source: lib/cea/mp4_cea_parser.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.cea.Mp4CeaParser');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.cea.CeaUtils');
  9. goog.require('shaka.cea.SeiProcessor');
  10. goog.require('shaka.log');
  11. goog.require('shaka.media.ClosedCaptionParser');
  12. goog.require('shaka.util.DataViewReader');
  13. goog.require('shaka.util.Error');
  14. goog.require('shaka.util.Mp4Parser');
  15. goog.require('shaka.util.Mp4BoxParsers');
  16. /**
  17. * MPEG4 stream parser used for extracting 708 closed captions data.
  18. * @implements {shaka.extern.ICeaParser}
  19. * @export
  20. */
  21. shaka.cea.Mp4CeaParser = class {
  22. /** */
  23. constructor() {
  24. /**
  25. * SEI data processor.
  26. * @private
  27. * @const {!shaka.cea.SeiProcessor}
  28. */
  29. this.seiProcessor_ = new shaka.cea.SeiProcessor();
  30. /**
  31. * Map of track id to corresponding timescale.
  32. * @private {!Map<number, number>}
  33. */
  34. this.trackIdToTimescale_ = new Map();
  35. /**
  36. * Default sample duration, as specified by the TREX box.
  37. * @private {number}
  38. */
  39. this.defaultSampleDuration_ = 0;
  40. /**
  41. * Default sample size, as specified by the TREX box.
  42. * @private {number}
  43. */
  44. this.defaultSampleSize_ = 0;
  45. /**
  46. * @private {shaka.cea.Mp4CeaParser.BitstreamFormat}
  47. */
  48. this.bitstreamFormat_ = shaka.cea.Mp4CeaParser.BitstreamFormat.UNKNOWN;
  49. }
  50. /**
  51. * Parses the init segment. Gets Default Sample Duration and Size from the
  52. * TREX box, and constructs a map of Track IDs to timescales. Each TRAK box
  53. * contains a track header (TKHD) containing track ID, and a media header box
  54. * (MDHD) containing the timescale for the track
  55. * @override
  56. */
  57. init(initSegment) {
  58. const Mp4Parser = shaka.util.Mp4Parser;
  59. const BitstreamFormat = shaka.cea.Mp4CeaParser.BitstreamFormat;
  60. const trackIds = [];
  61. const timescales = [];
  62. const codecBoxParser = (box) => this.setBitstreamFormat_(box.name);
  63. new Mp4Parser()
  64. .box('moov', Mp4Parser.children)
  65. .box('mvex', Mp4Parser.children)
  66. .fullBox('trex', (box) => {
  67. const parsedTREXBox = shaka.util.Mp4BoxParsers.parseTREX(
  68. box.reader);
  69. this.defaultSampleDuration_ = parsedTREXBox.defaultSampleDuration;
  70. this.defaultSampleSize_ = parsedTREXBox.defaultSampleSize;
  71. })
  72. .box('trak', Mp4Parser.children)
  73. .fullBox('tkhd', (box) => {
  74. goog.asserts.assert(
  75. box.version != null,
  76. 'TKHD is a full box and should have a valid version.');
  77. const parsedTKHDBox = shaka.util.Mp4BoxParsers.parseTKHD(
  78. box.reader, box.version);
  79. trackIds.push(parsedTKHDBox.trackId);
  80. })
  81. .box('mdia', Mp4Parser.children)
  82. .fullBox('mdhd', (box) => {
  83. goog.asserts.assert(
  84. box.version != null,
  85. 'MDHD is a full box and should have a valid version.');
  86. const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD(
  87. box.reader, box.version);
  88. timescales.push(parsedMDHDBox.timescale);
  89. })
  90. .box('minf', Mp4Parser.children)
  91. .box('stbl', Mp4Parser.children)
  92. .fullBox('stsd', Mp4Parser.sampleDescription)
  93. // These are the various boxes that signal a codec.
  94. .box('avc1', codecBoxParser)
  95. .box('avc3', codecBoxParser)
  96. .box('hev1', codecBoxParser)
  97. .box('hvc1', codecBoxParser)
  98. .box('dvav', codecBoxParser)
  99. .box('dva1', codecBoxParser)
  100. .box('dvh1', codecBoxParser)
  101. .box('dvhe', codecBoxParser)
  102. // This signals an encrypted sample, which we can go inside of to find
  103. // the codec used.
  104. .box('encv', Mp4Parser.visualSampleEntry)
  105. .box('sinf', Mp4Parser.children)
  106. .box('frma', (box) => {
  107. const {codec} = shaka.util.Mp4BoxParsers.parseFRMA(box.reader);
  108. this.setBitstreamFormat_(codec);
  109. })
  110. .parse(initSegment, /* partialOkay= */ true);
  111. // At least one track should exist, and each track should have a
  112. // corresponding Id in TKHD box, and timescale in its MDHD box
  113. if (!trackIds.length|| !timescales.length ||
  114. trackIds.length != timescales.length) {
  115. throw new shaka.util.Error(
  116. shaka.util.Error.Severity.CRITICAL,
  117. shaka.util.Error.Category.TEXT,
  118. shaka.util.Error.Code.INVALID_MP4_CEA);
  119. }
  120. if (this.bitstreamFormat_ == BitstreamFormat.UNKNOWN) {
  121. shaka.log.alwaysWarn(
  122. 'Unable to determine bitstream format for CEA parsing!');
  123. }
  124. // Populate the map from track Id to timescale
  125. trackIds.forEach((trackId, idx) => {
  126. this.trackIdToTimescale_.set(trackId, timescales[idx]);
  127. });
  128. }
  129. /**
  130. * Parses each video segment. In fragmented MP4s, MOOF and MDAT come in
  131. * pairs. The following logic gets the necessary info from MOOFs to parse
  132. * MDATs (base media decode time, sample sizes/offsets/durations, etc),
  133. * and then parses the MDAT boxes for CEA-708 packets using this information.
  134. * CEA-708 packets are returned in the callback.
  135. * @override
  136. */
  137. parse(mediaSegment) {
  138. const Mp4Parser = shaka.util.Mp4Parser;
  139. const BitstreamFormat = shaka.cea.Mp4CeaParser.BitstreamFormat;
  140. if (this.bitstreamFormat_ == BitstreamFormat.UNKNOWN) {
  141. // We don't know how to extract SEI from this.
  142. return [];
  143. }
  144. /** @type {!Array<!shaka.extern.ICeaParser.CaptionPacket>} **/
  145. const captionPackets = [];
  146. let moofOffset = 0;
  147. /** @type {!Array<!shaka.cea.Mp4CeaParser.ParsedTRAF>} */
  148. let parsedTRAFs = [];
  149. new Mp4Parser()
  150. .box('moof', (box) => {
  151. moofOffset = box.start;
  152. // traf box parsing is reset on each moof.
  153. parsedTRAFs = [];
  154. Mp4Parser.children(box);
  155. })
  156. .box('traf', (box) => {
  157. parsedTRAFs.push({
  158. baseMediaDecodeTime: null,
  159. defaultSampleDuration: this.defaultSampleDuration_,
  160. defaultSampleSize: this.defaultSampleSize_,
  161. parsedTRUNs: [],
  162. timescale: shaka.cea.CeaUtils.DEFAULT_TIMESCALE_VALUE,
  163. });
  164. Mp4Parser.children(box);
  165. })
  166. .fullBox('trun', (box) => {
  167. goog.asserts.assert(
  168. box.version != null && box.flags != null,
  169. 'TRUN is a full box and should have a valid version & flags.');
  170. const lastTRAF = parsedTRAFs[parsedTRAFs.length - 1];
  171. const parsedTRUN = shaka.util.Mp4BoxParsers.parseTRUN(
  172. box.reader, box.version, box.flags);
  173. lastTRAF.parsedTRUNs.push(parsedTRUN);
  174. })
  175. .fullBox('tfhd', (box) => {
  176. goog.asserts.assert(
  177. box.flags != null,
  178. 'TFHD is a full box and should have valid flags.');
  179. const lastTRAF = parsedTRAFs[parsedTRAFs.length - 1];
  180. const parsedTFHD = shaka.util.Mp4BoxParsers.parseTFHD(
  181. box.reader, box.flags);
  182. // If specified, defaultSampleDuration and defaultSampleSize
  183. // override the ones specified in the TREX box
  184. lastTRAF.defaultSampleDuration = parsedTFHD.defaultSampleDuration ||
  185. this.defaultSampleDuration_;
  186. lastTRAF.defaultSampleSize = parsedTFHD.defaultSampleSize ||
  187. this.defaultSampleSize_;
  188. const trackId = parsedTFHD.trackId;
  189. // Get the timescale from the track Id
  190. if (this.trackIdToTimescale_.has(trackId)) {
  191. lastTRAF.timescale = this.trackIdToTimescale_.get(trackId);
  192. }
  193. })
  194. .fullBox('tfdt', (box) => {
  195. goog.asserts.assert(
  196. box.version != null,
  197. 'TFDT is a full box and should have a valid version.');
  198. const lastTRAF = parsedTRAFs[parsedTRAFs.length - 1];
  199. const parsedTFDT = shaka.util.Mp4BoxParsers.parseTFDTInaccurate(
  200. box.reader, box.version);
  201. lastTRAF.baseMediaDecodeTime = parsedTFDT.baseMediaDecodeTime;
  202. })
  203. .box('mdat', (box) => {
  204. const offset = moofOffset - box.start - 8;
  205. const initialPosition = box.reader.getPosition();
  206. for (const parsedTRAF of parsedTRAFs) {
  207. if (parsedTRAF.baseMediaDecodeTime === null) {
  208. // This field should have been populated by the Base Media Decode
  209. // Time in the tfdt box.
  210. shaka.log.alwaysWarn(
  211. 'Unable to find base media decode time for CEA captions!');
  212. throw new shaka.util.Error(
  213. shaka.util.Error.Severity.CRITICAL,
  214. shaka.util.Error.Category.TEXT,
  215. shaka.util.Error.Code.INVALID_MP4_CEA);
  216. }
  217. box.reader.seek(initialPosition);
  218. this.parseMdat_(box.reader,
  219. parsedTRAF.baseMediaDecodeTime,
  220. parsedTRAF.timescale,
  221. parsedTRAF.defaultSampleDuration,
  222. parsedTRAF.defaultSampleSize,
  223. offset,
  224. parsedTRAF.parsedTRUNs,
  225. captionPackets);
  226. }
  227. })
  228. .parse(mediaSegment, /* partialOkay= */ false);
  229. return captionPackets;
  230. }
  231. /**
  232. * Parse MDAT box.
  233. * @param {!shaka.util.DataViewReader} reader
  234. * @param {number} time
  235. * @param {number} timescale
  236. * @param {number} defaultSampleDuration
  237. * @param {number} defaultSampleSize
  238. * @param {number} offset
  239. * @param {!Array<shaka.util.ParsedTRUNBox>} parsedTRUNs
  240. * @param {!Array<!shaka.extern.ICeaParser.CaptionPacket>} captionPackets
  241. * @private
  242. */
  243. parseMdat_(reader, time, timescale, defaultSampleDuration,
  244. defaultSampleSize, offset, parsedTRUNs, captionPackets) {
  245. const BitstreamFormat = shaka.cea.Mp4CeaParser.BitstreamFormat;
  246. const CeaUtils = shaka.cea.CeaUtils;
  247. let sampleIndex = 0;
  248. // The fields in each ParsedTRUNSample contained in the sampleData
  249. // array are nullable. In the case of sample data and sample duration,
  250. // we use the defaults provided by the TREX/TFHD boxes. For sample
  251. // composition time offset, we default to 0.
  252. let sampleSize = defaultSampleSize;
  253. // Combine all sample data. This assumes that the samples described across
  254. // multiple trun boxes are still continuous in the mdat box.
  255. const sampleDatas = parsedTRUNs.map((t) => t.sampleData);
  256. const sampleData = [].concat(...sampleDatas);
  257. if (sampleData.length) {
  258. sampleSize = sampleData[0].sampleSize || defaultSampleSize;
  259. }
  260. reader.skip(offset + parsedTRUNs[0].dataOffset);
  261. while (reader.hasMoreData()) {
  262. const naluSize = reader.readUint32();
  263. const naluHeader = reader.readUint8();
  264. let naluType = null;
  265. let isSeiMessage = false;
  266. let naluHeaderSize = 1;
  267. goog.asserts.assert(this.bitstreamFormat_ != BitstreamFormat.UNKNOWN,
  268. 'Bitstream format should have been checked before now!');
  269. switch (this.bitstreamFormat_) {
  270. case BitstreamFormat.H264:
  271. naluType = naluHeader & 0x1f;
  272. isSeiMessage = naluType == CeaUtils.H264_NALU_TYPE_SEI;
  273. break;
  274. case BitstreamFormat.H265:
  275. naluHeaderSize = 2;
  276. reader.skip(1);
  277. naluType = (naluHeader >> 1) & 0x3f;
  278. isSeiMessage =
  279. naluType == CeaUtils.H265_PREFIX_NALU_TYPE_SEI ||
  280. naluType == CeaUtils.H265_SUFFIX_NALU_TYPE_SEI;
  281. break;
  282. default:
  283. return;
  284. }
  285. if (isSeiMessage) {
  286. let timeOffset = 0;
  287. if (sampleIndex < sampleData.length) {
  288. timeOffset = sampleData[sampleIndex].sampleCompositionTimeOffset || 0;
  289. }
  290. const pts = (time + timeOffset) / timescale;
  291. for (const packet of this.seiProcessor_
  292. .process(reader.readBytes(naluSize - naluHeaderSize))) {
  293. captionPackets.push({
  294. packet,
  295. pts,
  296. });
  297. }
  298. } else {
  299. try {
  300. reader.skip(naluSize - naluHeaderSize);
  301. } catch (e) {
  302. // It is necessary to ignore this error because it can break the start
  303. // of playback even if the user does not want to see the subtitles.
  304. break;
  305. }
  306. }
  307. sampleSize -= (naluSize + 4);
  308. if (sampleSize == 0) {
  309. if (sampleIndex < sampleData.length) {
  310. time += sampleData[sampleIndex].sampleDuration ||
  311. defaultSampleDuration;
  312. } else {
  313. time += defaultSampleDuration;
  314. }
  315. sampleIndex++;
  316. if (sampleIndex < sampleData.length) {
  317. sampleSize = sampleData[sampleIndex].sampleSize || defaultSampleSize;
  318. } else {
  319. sampleSize = defaultSampleSize;
  320. }
  321. }
  322. }
  323. }
  324. /**
  325. * @param {string} codec A fourcc for a codec.
  326. * @private
  327. */
  328. setBitstreamFormat_(codec) {
  329. if (codec in shaka.cea.Mp4CeaParser.CodecBitstreamMap_) {
  330. this.bitstreamFormat_ = shaka.cea.Mp4CeaParser.CodecBitstreamMap_[codec];
  331. }
  332. }
  333. };
  334. /** @enum {number} */
  335. shaka.cea.Mp4CeaParser.BitstreamFormat = {
  336. UNKNOWN: 0,
  337. H264: 1,
  338. H265: 2,
  339. };
  340. /** @private {Object.<string, shaka.cea.Mp4CeaParser.BitstreamFormat>} */
  341. shaka.cea.Mp4CeaParser.CodecBitstreamMap_ = {
  342. 'avc1': shaka.cea.Mp4CeaParser.BitstreamFormat.H264,
  343. 'avc3': shaka.cea.Mp4CeaParser.BitstreamFormat.H264,
  344. 'hev1': shaka.cea.Mp4CeaParser.BitstreamFormat.H265,
  345. 'hvc1': shaka.cea.Mp4CeaParser.BitstreamFormat.H265,
  346. // Dolby Vision based in AVC
  347. 'dvav': shaka.cea.Mp4CeaParser.BitstreamFormat.H264,
  348. 'dva1': shaka.cea.Mp4CeaParser.BitstreamFormat.H264,
  349. // Dolby Vision based in HEVC
  350. 'dvh1': shaka.cea.Mp4CeaParser.BitstreamFormat.H265,
  351. 'dvhe': shaka.cea.Mp4CeaParser.BitstreamFormat.H265,
  352. };
  353. /**
  354. * @typedef {{
  355. * baseMediaDecodeTime: ?number,
  356. * defaultSampleDuration: number,
  357. * defaultSampleSize: number,
  358. * parsedTRUNs: !Array<shaka.util.ParsedTRUNBox>,
  359. * timescale: number
  360. * }}
  361. *
  362. * @property {?number} baseMediaDecodeTime
  363. * @property {number} defaultSampleDuration
  364. * @property {number} defaultSampleSize
  365. * @property {!Array<shaka.util.ParsedTRUNBox>} parsedTRUNs
  366. * @property {?number} timescale
  367. */
  368. shaka.cea.Mp4CeaParser.ParsedTRAF;
  369. shaka.media.ClosedCaptionParser.registerParser('video/mp4',
  370. () => new shaka.cea.Mp4CeaParser());