From d6d0ed854a25473bcaa4d9811c862b72ededefd0 Mon Sep 17 00:00:00 2001 From: Troy Benson Date: Sat, 11 Jan 2025 23:09:46 +0000 Subject: [PATCH] small improve --- crates/flv/src/audio.rs | 80 +++++++++++++++++++++--- crates/flv/src/file.rs | 14 +++-- crates/flv/src/header.rs | 14 ++++- crates/flv/src/lib.rs | 62 +++++++++++++------ crates/flv/src/macros.rs | 114 ++++++----------------------------- crates/flv/src/tag.rs | 59 +++++++++++++----- crates/flv/src/video.rs | 112 ++++++++++++++++++++++++++++------ crates/transmuxer/src/lib.rs | 6 +- 8 files changed, 292 insertions(+), 169 deletions(-) diff --git a/crates/flv/src/audio.rs b/crates/flv/src/audio.rs index 82c4ff2c8..71c96c09b 100644 --- a/crates/flv/src/audio.rs +++ b/crates/flv/src/audio.rs @@ -7,21 +7,38 @@ use scuffle_bytes_util::BytesCursorExt; use super::aac::{AacPacket, AacPacketType}; use crate::macros::nutype_enum; +/// FLV Tag Audio Data +/// +/// This is the container for the audio data. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) #[derive(Debug, Clone, PartialEq)] pub struct AudioData { + /// The sound rate of the audio data. (2 bits) pub sound_rate: SoundRate, + /// The sound size of the audio data. (1 bit) pub sound_size: SoundSize, + /// The sound type of the audio data. (1 bit) pub sound_type: SoundType, + /// The body of the audio data. pub body: AudioDataBody, } impl AudioData { pub fn demux(reader: &mut io::Cursor) -> io::Result { let byte = reader.read_u8()?; + // SoundFormat is the first 4 bits of the byte let sound_format = SoundFormat::from(byte >> 4); + // SoundRate is the next 2 bits of the byte let sound_rate = SoundRate::from((byte >> 2) & 0b11); + // SoundSize is the next bit of the byte let sound_size = SoundSize::from((byte >> 1) & 0b1); + // SoundType is the last bit of the byte let sound_type = SoundType::from(byte & 0b1); + + // Now we can demux the body of the audio data let body = AudioDataBody::demux(sound_format, reader)?; Ok(AudioData { @@ -35,28 +52,49 @@ impl AudioData { nutype_enum! { /// FLV Sound Format - /// Defined in the FLV specification. Chapter 1 - AudioTags - /// The SoundFormat indicates the codec used to encode the sound. + /// + /// Denotes the type of the underlying data packet + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) pub enum SoundFormat(u8) { + /// Linear PCM, platform endian LinearPcmPlatformEndian = 0, + /// ADPCM Adpcm = 1, + /// MP3 Mp3 = 2, + /// Linear PCM, little endian LinearPcmLittleEndian = 3, + /// Nellymoser 16Khz Mono Nellymoser16KhzMono = 4, + /// Nellymoser 8Khz Mono Nellymoser8KhzMono = 5, + /// Nellymoser Nellymoser = 6, + /// G.711 A-Law logarithmic PCM G711ALaw = 7, + /// G.711 Mu-Law logarithmic PCM G711MuLaw = 8, - Reserved = 9, + /// AAC Aac = 10, + /// Speex Speex = 11, + /// Mp3 8Khz Mp38Khz = 14, + /// Device specific sound DeviceSpecificSound = 15, } } /// FLV Tag Audio Data Body -/// Defined by [video_file_format_spec_v10_1 - Annex E.4.2.2 AUDIODATA] +/// +/// This is the container for the audio data body. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) #[derive(Debug, Clone, PartialEq)] pub enum AudioDataBody { /// AAC Audio Packet @@ -69,6 +107,8 @@ impl AudioDataBody { pub fn demux(sound_format: SoundFormat, reader: &mut io::Cursor) -> io::Result { match sound_format { SoundFormat::Aac => { + // For some reason the spec adds a specific byte before the AAC data. + // This byte is the AAC packet type. let aac_packet_type = AacPacketType::from(reader.read_u8()?); Ok(Self::Aac(AacPacket::demux(aac_packet_type, reader)?)) } @@ -82,32 +122,52 @@ impl AudioDataBody { nutype_enum! { /// FLV Sound Rate - /// Defined in the FLV specification. Chapter 1 - AudioTags - /// The SoundRate indicates the sampling rate of the audio data. + /// + /// Denotes the sampling rate of the audio data. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) pub enum SoundRate(u8) { + /// 5.5 KHz Hz5500 = 0, + /// 11 KHz Hz11000 = 1, + /// 22 KHz Hz22000 = 2, + /// 44 KHz Hz44000 = 3, } } nutype_enum! { /// FLV Sound Size - /// Defined in the FLV specification. Chapter 1 - AudioTags - /// The SoundSize indicates the size of each sample in the audio data. + /// + /// Denotes the size of each sample in the audio data. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) pub enum SoundSize(u8) { + /// 8 bit Bit8 = 0, + /// 16 bit Bit16 = 1, } } nutype_enum! { /// FLV Sound Type - /// Defined in the FLV specification. Chapter 1 - AudioTags - /// The SoundType indicates the number of channels in the audio data. + /// + /// Denotes the number of channels in the audio data. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) pub enum SoundType(u8) { + /// Mono Mono = 0, + /// Stereo Stereo = 1, } } diff --git a/crates/flv/src/file.rs b/crates/flv/src/file.rs index edcafe5ec..7ed7f9015 100644 --- a/crates/flv/src/file.rs +++ b/crates/flv/src/file.rs @@ -4,10 +4,11 @@ use bytes::{Buf, Bytes}; use super::header::FlvHeader; use super::tag::FlvTag; -/// An FLV file -/// Defined by [video_file_format_spec_v10_1 - Annex E. The FLV File Format] -/// Specifically this is a combonation of a [`FlvHeader`] followed by the -/// `FLVFileBody` (which is just a series of [`FlvTag`]s) +/// An FLV file is a combination of a [`FlvHeader`] followed by the `FLVFileBody` (which is just a series of [`FlvTag`]s) +/// +/// The `FLVFileBody` is defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Page 8) +/// - video_file_format_spec_v10_1.pdf (Annex E.3 - The FLV File Body) #[derive(Debug, Clone, PartialEq)] pub struct FlvFile { pub header: FlvHeader, @@ -20,12 +21,15 @@ impl FlvFile { let mut tags = Vec::new(); while reader.has_remaining() { - reader.read_u32::()?; // previous tag size + // We don't care about the previous tag size, its only really used for seeking backwards. + reader.read_u32::()?; + // If there is no more data, we can stop reading. if !reader.has_remaining() { break; } + // Demux the tag from the reader. let tag = FlvTag::demux(reader)?; tags.push(tag); } diff --git a/crates/flv/src/header.rs b/crates/flv/src/header.rs index e98bb793f..70d2ffd64 100644 --- a/crates/flv/src/header.rs +++ b/crates/flv/src/header.rs @@ -5,23 +5,33 @@ use bytes::Bytes; use scuffle_bytes_util::BytesCursorExt; /// The FLV Header -/// Defined by [video_file_format_spec_v10_1 - Annex E.2 The FLV Header] /// Whenever a FLV file is read these are the first 9 bytes of the file. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV Header - Page 8) +/// - video_file_format_spec_v10_1.pdf (Annex E.2 - The FLV Header) #[derive(Debug, Clone, PartialEq)] pub struct FlvHeader { + /// The version of the FLV file. pub version: u8, + /// Whether the FLV file has audio. pub has_audio: bool, + /// Whether the FLV file has video. pub has_video: bool, + /// The extra data in the FLV file. + /// Since the header provides a data offset, this is the bytes between the end of the header and the start of the data. pub extra: Bytes, } impl FlvHeader { + /// Demux the FLV header from the given reader. + /// The reader will be returned in the position of the start of the data offset. pub fn demux(reader: &mut io::Cursor) -> io::Result { let start = reader.position() as usize; let signature = reader.read_u24::()?; - // 0 byte at the end because we are only reading 3 bytes not 4. + // 0 byte at the beginning because we are only reading 3 bytes not 4. if signature != u32::from_be_bytes([0, b'F', b'L', b'V']) { return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid signature")); } diff --git a/crates/flv/src/lib.rs b/crates/flv/src/lib.rs index 7554a7322..056fb0d80 100644 --- a/crates/flv/src/lib.rs +++ b/crates/flv/src/lib.rs @@ -1,3 +1,27 @@ +//! # scuffle-flv +//! +//! +//! [![crates.io](https://img.shields.io/crates/v/scuffle-flv.svg)](https://crates.io/crates/scuffle-flv) [![docs.rs](https://img.shields.io/docsrs/scuffle-flv)](https://docs.rs/scuffle-flv) +//! +//! --- +//! +//! A pure Rust implementation of the FLV format, allowing for demuxing of FLV files or streams. +//! +//! This does not support all FLV features (mainly those from FLV 10.1), however it does support some newer features, from the enhanced FLV specification. +//! +//! ## Specifications +//! +//! - +//! - +//! - +//! - +//! +//! ## License +//! +//! This project is licensed under the [MIT](./LICENSE.MIT) or [Apache-2.0](./LICENSE.Apache-2.0) license. +//! You can choose between one of them if you use this work. +//! +//! `SPDX-License-Identifier: MIT OR Apache-2.0` #![cfg_attr(all(coverage_nightly, test), feature(coverage_attribute))] pub mod aac; @@ -7,11 +31,12 @@ pub mod avc; pub mod file; pub mod header; pub mod hevc; -mod macros; pub mod script; pub mod tag; pub mod video; +mod macros; + pub use crate::file::FlvFile; pub use crate::header::FlvHeader; pub use crate::tag::{FlvTag, FlvTagData, FlvTagType}; @@ -54,12 +79,13 @@ mod tests { assert!(flv.header.has_video); assert_eq!(flv.header.extra.len(), 0); + let mut tags = flv.tags.into_iter(); // Metadata tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); // This is a metadata tag @@ -191,7 +217,7 @@ mod tests { // Video Sequence Header Tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); // This is a video tag @@ -241,7 +267,7 @@ mod tests { // Audio Sequence Header Tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); let (data, sound_rate, sound_size, sound_type) = match tag.data { @@ -281,10 +307,10 @@ mod tests { let mut last_timestamp = 0; let mut read_seq_end = false; for tag in tags { - assert!(tag.timestamp >= last_timestamp); + assert!(tag.timestamp_ms >= last_timestamp); assert_eq!(tag.stream_id, 0); - last_timestamp = tag.timestamp; + last_timestamp = tag.timestamp_ms; match tag.data { FlvTagData::Audio(AudioData { @@ -343,7 +369,7 @@ mod tests { // Metadata tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); // This is a metadata tag @@ -451,7 +477,7 @@ mod tests { // Audio Sequence Header Tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); let (body, sound_rate, sound_size, sound_type) = match tag.data { @@ -490,7 +516,7 @@ mod tests { // Video Sequence Header Tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); // This is a video tag @@ -527,11 +553,11 @@ mod tests { let mut last_timestamp = 0; let mut read_seq_end = false; for tag in tags { - assert!(tag.timestamp >= last_timestamp || tag.timestamp == 0); // Timestamps should be monotonically increasing or 0 + assert!(tag.timestamp_ms >= last_timestamp || tag.timestamp_ms == 0); // Timestamps should be monotonically increasing or 0 assert_eq!(tag.stream_id, 0); - if tag.timestamp != 0 { - last_timestamp = tag.timestamp; + if tag.timestamp_ms != 0 { + last_timestamp = tag.timestamp_ms; } match tag.data { @@ -593,7 +619,7 @@ mod tests { // Metadata tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); // This is a metadata tag @@ -701,7 +727,7 @@ mod tests { // Audio Sequence Header Tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); let (body, sound_rate, sound_size, sound_type) = match tag.data { @@ -740,7 +766,7 @@ mod tests { // Video Sequence Header Tag { let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); + assert_eq!(tag.timestamp_ms, 0); assert_eq!(tag.stream_id, 0); // This is a video tag @@ -803,11 +829,11 @@ mod tests { let mut last_timestamp = 0; let mut read_seq_end = false; for tag in tags { - assert!(tag.timestamp >= last_timestamp || tag.timestamp == 0); // Timestamps should be monotonically increasing or 0 + assert!(tag.timestamp_ms >= last_timestamp || tag.timestamp_ms == 0); // Timestamps should be monotonically increasing or 0 assert_eq!(tag.stream_id, 0); - if tag.timestamp != 0 { - last_timestamp = tag.timestamp; + if tag.timestamp_ms != 0 { + last_timestamp = tag.timestamp_ms; } match tag.data { diff --git a/crates/flv/src/macros.rs b/crates/flv/src/macros.rs index d97b7a6cf..49aa5e5cf 100644 --- a/crates/flv/src/macros.rs +++ b/crates/flv/src/macros.rs @@ -25,122 +25,42 @@ macro_rules! nutype_enum { ),*$(,)? } ) => { - #[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] $(#[$attr])* - #[repr($type)] - $vis enum $name { - $( - $(#[$variant_attr])* - $variant = $value, - )* - Unknown($type), - } + #[repr(transparent)] + $vis struct $name(pub $type); - impl $name { - pub const fn from_primitive(value: $type) -> Self { - match value { - $( - $value => $name::$variant, - )* - _ => $name::Unknown(value), - } - } - - pub const fn to_primitive(self) -> $type { + impl ::std::fmt::Debug for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { $( - $name::$variant => $value, + &$name::$variant => write!(f, "{}::{}", stringify!($name), stringify!($variant)), )* - $name::Unknown(value) => value, + _ => write!(f, "{}({:?})", stringify!($name), self.0), } } } + impl $name { + $( + $(#[$variant_attr])* + #[allow(non_upper_case_globals)] + pub const $variant: Self = Self($value); + )* + } + impl From<$type> for $name { fn from(value: $type) -> Self { - Self::from_primitive(value) + Self(value) } } impl From<$name> for $type { fn from(value: $name) -> Self { - value.to_primitive() + value.0 } } }; } pub(crate) use nutype_enum; - -/// Helper macro to create a new enum type with a fourcc value. -/// -/// This macro is used to create a new enum type with a fourcc value. -/// The enum type is derived with the `Clone`, `Copy`, `PartialEq`, `Eq`, -/// `PartialOrd`, `Ord`, and `Hash` traits. The enum type is also derived with -/// the `Debug` trait to provide a human-readable representation of the enum. -/// -/// # Examples -/// -/// ```rust,ignore -/// nutype_four_cc! { -/// pub enum VideoFourCC { -/// Av1 = b"av01", -/// Vp9 = b"vp09", -/// Hevc = b"hvc1", -/// } -/// } -/// ``` -macro_rules! nutype_four_cc { - ( - $(#[$attr:meta])* - $vis:vis - enum $name:ident { - $( - $variant:ident = $fourcc:expr - ),*$(,)? - } - ) => { - #[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - $(#[$attr])* - $vis enum $name { - $( - $variant, - )* - Unknown([u8; 4]), - } - - impl $name { - pub const fn from_primitive(fourcc: [u8; 4]) -> Self { - match &fourcc { - $( - $fourcc => $name::$variant, - )* - _ => $name::Unknown(fourcc), - } - } - - pub const fn to_primitive(self) -> [u8; 4] { - match self { - $( - $name::$variant => *$fourcc, - )* - $name::Unknown(fourcc) => fourcc, - } - } - } - - impl From<[u8; 4]> for $name { - fn from(fourcc: [u8; 4]) -> Self { - Self::from_primitive(fourcc) - } - } - - impl From<$name> for [u8; 4] { - fn from(fourcc: $name) -> Self { - fourcc.to_primitive() - } - } - }; -} - -pub(crate) use nutype_four_cc; diff --git a/crates/flv/src/tag.rs b/crates/flv/src/tag.rs index 503633f41..3dec11f65 100644 --- a/crates/flv/src/tag.rs +++ b/crates/flv/src/tag.rs @@ -8,33 +8,49 @@ use super::video::VideoData; use crate::macros::nutype_enum; /// An FLV Tag -/// Defined by [video_file_format_spec_v10_1 - Annex E.4.1 FLV Tag] -/// This is where actual media data is stored within the FLV binary format. +/// /// Tags have different types and thus different data structures. To accommodate /// this the [`FlvTagData`] enum is used. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag) +/// +/// The v10.1 spec adds some additional fields to the tag to accomodate encryption. +/// We dont support this because it is not needed for our use case. (and I suspect it is not used anywhere anymore.) +/// +/// However if the Tag is encrypted the tag_type will be a larger number (one we dont support), and therefore the +/// [`FlvTagData::Unknown`] variant will be used. #[derive(Debug, Clone, PartialEq)] pub struct FlvTag { /// A timestamp in milliseconds - pub timestamp: u32, + pub timestamp_ms: u32, /// A stream id pub stream_id: u32, pub data: FlvTagData, } impl FlvTag { + /// Demux a FLV tag from the given reader. + /// The cursor will be advanced to the end of the tag. pub fn demux(reader: &mut std::io::Cursor) -> std::io::Result { let tag_type = FlvTagType::from(reader.read_u8()?); let data_size = reader.read_u24::()?; - let timestamp = reader.read_u24::()? | ((reader.read_u8()? as u32) << 24); + // The timestamp bit is weird. Its 24bits but then there is an extended 8 bit number to create a 32bit number. + let timestamp_ms = reader.read_u24::()? | ((reader.read_u8()? as u32) << 24); + + // The stream id according to the spec is ALWAYS 0. (likely not true) let stream_id = reader.read_u24::()?; + // We then extract the data from the reader. (advancing the cursor to the end of the tag) let data = reader.extract_bytes(data_size as usize)?; + // Finally we demux the data. let data = FlvTagData::demux(tag_type, &mut std::io::Cursor::new(data))?; Ok(FlvTag { - timestamp, + timestamp_ms, stream_id, data, }) @@ -43,8 +59,13 @@ impl FlvTag { nutype_enum! { /// FLV Tag Type - /// Defined by [video_file_format_spec_v10_1 - Annex E.4.1 FLV Tag] + /// /// This is the type of the tag. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag) + /// pub enum FlvTagType(u8) { Audio = 8, Video = 9, @@ -53,21 +74,31 @@ nutype_enum! { } /// FLV Tag Data -/// Defined by [video_file_format_spec_v10_1 - Annex E.4.1 FLV Tag] +/// /// This is a container for the actual media data. /// This enum contains the data for the different types of tags. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag) #[derive(Debug, Clone, PartialEq)] pub enum FlvTagData { - /// AudioData defined by [video_file_format_spec_v10_1 - Annex E.4.2.1 - /// AUDIODATA] + /// AudioData when the FlvTagType is Audio(8) + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) Audio(AudioData), - /// VideoData defined by [video_file_format_spec_v10_1 - Annex E.4.3.1 - /// VIDEODATA] + /// VideoData when the FlvTagType is Video(9) + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) Video(VideoData), - /// ScriptData defined by [video_file_format_spec_v10_1 - Annex E.4.4.1 - /// SCRIPTDATA] + /// ScriptData when the FlvTagType is ScriptData(18) + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Data tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.4.1 - SCRIPTDATA) ScriptData(ScriptData), - /// Unknown tag type, with the data being the raw bytes of the tag + /// Any tag type that we dont know how to parse, with the corresponding data being the raw bytes of the tag Unknown { tag_type: FlvTagType, data: Bytes }, } diff --git a/crates/flv/src/video.rs b/crates/flv/src/video.rs index 9bac0b6c4..bd09da6b6 100644 --- a/crates/flv/src/video.rs +++ b/crates/flv/src/video.rs @@ -9,25 +9,40 @@ use scuffle_bytes_util::BytesCursorExt; use super::av1::Av1Packet; use super::avc::{AvcPacket, AvcPacketType}; use super::hevc::HevcPacket; -use crate::macros::{nutype_enum, nutype_four_cc}; +use crate::macros::nutype_enum; nutype_enum! { /// FLV Frame Type - /// Defined in the FLV specification. Chapter 1 - VideoTags - /// The frame type is used to determine if the video frame is a keyframe, an - /// interframe or disposable interframe. + /// This enum represents the different types of frames in a FLV file. + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) pub enum FrameType(u8) { + /// A keyframe is a frame that is a complete representation of the video content. Keyframe = 1, + /// An interframe is a frame that is a partial representation of the video content. Interframe = 2, + /// A disposable interframe is a frame that is a partial representation of the video content, but is not required to be displayed. (h263 only) DisposableInterframe = 3, + /// A generated keyframe is a frame that is a complete representation of the video content, but is not a keyframe. (reserved for server use only) GeneratedKeyframe = 4, + /// A video info or command frame is a frame that contains video information or commands. + /// If the frame is this type, the body will be a CommandPacket VideoInfoOrCommandFrame = 5, } } +/// FLV Tag Video Data +/// This is a container for video data. +/// This enum contains the data for the different types of video tags. +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) #[derive(Debug, Clone, PartialEq)] pub struct VideoData { + /// The frame type of the video data. (4 bits) pub frame_type: FrameType, + /// The body of the video data. pub body: VideoDataBody, } @@ -37,45 +52,92 @@ impl VideoData { let enhanced = (byte & 0b1000_0000) != 0; let frame_type_byte = (byte >> 4) & 0b0111; let packet_type_byte = byte & 0b0000_1111; - Ok(VideoData { - frame_type: FrameType::from(frame_type_byte), - body: VideoDataBody::demux(VideoPacketType::new(packet_type_byte, enhanced), reader)?, - }) + let frame_type = FrameType::from(frame_type_byte); + let body = if frame_type == FrameType::VideoInfoOrCommandFrame { + let command_packet = CommandPacket::from(reader.read_u8()?); + VideoDataBody::Command(command_packet) + } else { + VideoDataBody::demux(VideoPacketType::new(packet_type_byte, enhanced), reader)? + }; + + Ok(VideoData { frame_type, body }) } } nutype_enum! { /// FLV Video Codec ID - /// Defined in the FLV specification. Chapter 1 - VideoTags - /// The codec ID indicates which codec is used to encode the video data. + /// + /// Denotes the different types of video codecs that can be used in a FLV file. + /// This is a legacy enum for older codecs; for modern codecs, the [`EnhancedPacketType`] is used which uses a [`VideoFourCC`] identifier. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) pub enum VideoCodecId(u8) { + /// Sorenson H.263 SorensonH263 = 2, + /// Screen Video ScreenVideo = 3, + /// On2 VP6 On2VP6 = 4, + /// On2 VP6 with alpha channel On2VP6WithAlphaChannel = 5, + /// Screen Video Version 2 ScreenVideoVersion2 = 6, + /// AVC (H.264) Avc = 7, } } -/// FLV Tag Video Data +/// FLV Tag Video Data Body +/// /// This is a container for video data. /// This enum contains the data for the different types of video tags. -/// Defined in the FLV specification. Chapter 1 - FLV Video Tags +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) #[derive(Debug, Clone, PartialEq)] pub enum VideoDataBody { - /// AVC Video Packet defined in the FLV specification. Chapter 1 - - /// AVCVIDEOPACKET + /// AVC Video Packet (H.264) + /// When [`VideoPacketType::CodecId`] is [`VideoCodecId::Avc`] Avc(AvcPacket), - /// Enhanced Packet + /// Enhanced Packet (AV1, H.265, etc.) + /// When [`VideoPacketType::Enhanced`] is used Enhanced(EnhancedPacket), + /// Command Frame (VideoInfo or Command) + /// When [`FrameType::VideoInfoOrCommandFrame`] is used + Command(CommandPacket), /// Data we don't know how to parse Unknown { codec_id: VideoCodecId, data: Bytes }, } +nutype_enum! { + /// FLV Command Packet + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) + pub enum CommandPacket(u8) { + /// Start of client seeking, when FrameType is 5 + StartOfClientSeeking = 1, + /// End of client seeking, when FrameType is 5 + EndOfClientSeeking = 2, + } +} + +/// A wrapper enum for the different types of video packets that can be used in a FLV file. +/// +/// Used to construct a [`VideoDataBody`]. +/// +/// See: +/// - [`VideoCodecId`] +/// - [`EnhancedPacketType`] +/// - [`VideoDataBody`] #[derive(Debug, Clone, PartialEq, Copy, Eq, PartialOrd, Ord, Hash)] pub enum VideoPacketType { + /// Codec ID (legacy) CodecId(VideoCodecId), + /// Enhanced (modern) Enhanced(EnhancedPacketType), } @@ -90,6 +152,8 @@ impl VideoPacketType { } impl VideoDataBody { + /// Demux a video packet from the given reader. + /// The reader will consume all the data from the reader. pub fn demux(packet_type: VideoPacketType, reader: &mut io::Cursor) -> io::Result { match packet_type { VideoPacketType::CodecId(codec_id) => match codec_id { @@ -153,6 +217,14 @@ impl VideoDataBody { } } +/// An Enhanced FLV Packet +/// +/// This is a container for enhanced video packets. +/// The enchanced spec adds modern codecs to the FLV file format. +/// +/// Defined by: +/// - enhanced_rtmp-v1.pdf (Defining Additional Video Codecs) +/// - enhanced_rtmp-v2.pdf (Enhanced Video) #[derive(Debug, Clone, PartialEq)] pub enum EnhancedPacket { /// Metadata @@ -171,11 +243,11 @@ pub enum EnhancedPacket { }, } -nutype_four_cc! { - pub enum VideoFourCC { - Av1 = b"av01", - Vp9 = b"vp09", - Hevc = b"hvc1", +nutype_enum! { + pub enum VideoFourCC([u8; 4]) { + Av1 = *b"av01", + Vp9 = *b"vp09", + Hevc = *b"hvc1", } } diff --git a/crates/transmuxer/src/lib.rs b/crates/transmuxer/src/lib.rs index e56862609..c0dcbfcce 100644 --- a/crates/transmuxer/src/lib.rs +++ b/crates/transmuxer/src/lib.rs @@ -147,7 +147,7 @@ impl Transmuxer { let mut is_keyframe = false; let duration = - if self.last_video_timestamp == 0 || tag.timestamp == 0 || tag.timestamp < self.last_video_timestamp { + if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp { 1000 // the first frame is always 1000 ticks where the // timescale is 1000 * fps. } else { @@ -160,7 +160,7 @@ impl Transmuxer { // The reason we use a timescale which is 1000 * fps is because then we can // always represent the delta as an integer. If we use a timescale of 1000, we // would run into the same rounding errors. - let delta = tag.timestamp as f64 - self.last_video_timestamp as f64; + let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64; let expected_delta = 1000.0 / video_settings.framerate; if (delta - expected_delta).abs() <= 1.0 { 1000 @@ -285,7 +285,7 @@ impl Transmuxer { }))); } else { self.video_duration += total_duration as u64; - self.last_video_timestamp = tag.timestamp; + self.last_video_timestamp = tag.timestamp_ms; return Ok(Some(TransmuxResult::MediaSegment(MediaSegment { data: Bytes::from(writer), ty: MediaType::Video,