From 82115f918d1dde3615fdde4537b398c371bb3c5a Mon Sep 17 00:00:00 2001 From: Troy Benson Date: Sat, 11 Jan 2025 16:21:47 +0000 Subject: [PATCH] initial refactor of the flv crate --- Cargo.lock | 37 +- Cargo.toml | 1 + crates/av1/src/config.rs | 44 +- crates/av1/src/lib.rs | 2 +- crates/flv/Cargo.toml | 14 +- crates/flv/LICENSE.Apache-2.0 | 1 + crates/flv/LICENSE.MIT | 1 + crates/flv/README.md | 17 + crates/flv/src/aac.rs | 44 ++ crates/flv/src/audio.rs | 173 ++++++ crates/flv/src/av1.rs | 11 + crates/flv/src/avc.rs | 57 ++ crates/flv/src/define.rs | 308 ---------- crates/flv/src/errors.rs | 52 -- crates/flv/src/file.rs | 39 ++ crates/flv/src/flv.rs | 280 --------- crates/flv/src/header.rs | 61 ++ crates/flv/src/hevc.rs | 9 + crates/flv/src/lib.rs | 879 ++++++++++++++++++++++++++- crates/flv/src/macros.rs | 66 ++ crates/flv/src/script.rs | 29 + crates/flv/src/tag.rs | 117 ++++ crates/flv/src/tests/demuxer.rs | 829 ------------------------- crates/flv/src/tests/error.rs | 31 - crates/flv/src/tests/mod.rs | 2 - crates/flv/src/video.rs | 263 ++++++++ crates/mp4/src/tests/demux.rs | 1 + crates/transmuxer/Cargo.toml | 6 +- crates/transmuxer/src/codecs/aac.rs | 4 +- crates/transmuxer/src/codecs/av1.rs | 2 +- crates/transmuxer/src/codecs/avc.rs | 2 +- crates/transmuxer/src/codecs/hevc.rs | 2 +- crates/transmuxer/src/define.rs | 2 +- crates/transmuxer/src/errors.rs | 12 +- crates/transmuxer/src/lib.rs | 79 +-- crates/transmuxer/src/tests/mod.rs | 2 +- 36 files changed, 1898 insertions(+), 1581 deletions(-) create mode 120000 crates/flv/LICENSE.Apache-2.0 create mode 120000 crates/flv/LICENSE.MIT create mode 100644 crates/flv/README.md create mode 100644 crates/flv/src/aac.rs create mode 100644 crates/flv/src/audio.rs create mode 100644 crates/flv/src/av1.rs create mode 100644 crates/flv/src/avc.rs delete mode 100644 crates/flv/src/define.rs delete mode 100644 crates/flv/src/errors.rs create mode 100644 crates/flv/src/file.rs delete mode 100644 crates/flv/src/flv.rs create mode 100644 crates/flv/src/header.rs create mode 100644 crates/flv/src/hevc.rs create mode 100644 crates/flv/src/macros.rs create mode 100644 crates/flv/src/script.rs create mode 100644 crates/flv/src/tag.rs delete mode 100644 crates/flv/src/tests/demuxer.rs delete mode 100644 crates/flv/src/tests/error.rs delete mode 100644 crates/flv/src/tests/mod.rs create mode 100644 crates/flv/src/video.rs diff --git a/Cargo.lock b/Cargo.lock index d9c7aac92..c6e34666a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -933,23 +933,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "flv" -version = "0.0.1" -dependencies = [ - "byteorder", - "bytes", - "h264", - "h265", - "num-derive", - "num-traits", - "scuffle-aac", - "scuffle-amf0", - "scuffle-av1", - "scuffle-bytes-util", - "scuffle-workspace-hack", -] - [[package]] name = "fnv" version = "1.0.7" @@ -2633,6 +2616,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "scuffle-flv" +version = "0.0.1" +dependencies = [ + "byteorder", + "bytes", + "h264", + "h265", + "num-derive", + "num-traits", + "scuffle-aac", + "scuffle-amf0", + "scuffle-av1", + "scuffle-bytes-util", + "scuffle-workspace-hack", + "thiserror 2.0.7", +] + [[package]] name = "scuffle-future-ext" version = "0.0.1" @@ -3379,7 +3380,6 @@ version = "0.0.1" dependencies = [ "byteorder", "bytes", - "flv", "h264", "h265", "mp4", @@ -3387,6 +3387,7 @@ dependencies = [ "scuffle-amf0", "scuffle-av1", "scuffle-bytes-util", + "scuffle-flv", "scuffle-workspace-hack", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index ffc53ae00..a002b0c78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,6 +63,7 @@ scuffle-bytes-util = { path = "crates/bytes-util", version = "0.0.1" } scuffle-expgolomb = { path = "crates/expgolomb", version = "0.0.1" } scuffle-amf0 = { path = "crates/amf0", version = "0.0.1" } scuffle-av1 = { path = "crates/av1", version = "0.0.1" } +scuffle-flv = { path = "crates/flv", version = "0.0.1" } [profile.release-debug] inherits = "release" diff --git a/crates/av1/src/config.rs b/crates/av1/src/config.rs index 8286158e4..e4f335f81 100644 --- a/crates/av1/src/config.rs +++ b/crates/av1/src/config.rs @@ -1,8 +1,38 @@ use std::io; +use byteorder::ReadBytesExt; use bytes::Bytes; use scuffle_bytes_util::{BitReader, BitWriter, BytesCursorExt}; +/// AV1 Video Descriptor +/// https://aomediacodec.github.io/av1-mpeg2-ts/#av1-video-descriptor +#[derive(Debug, Clone, PartialEq)] +pub struct AV1VideoDescriptor { + pub codec_configuration_record: AV1CodecConfigurationRecord, +} + +impl AV1VideoDescriptor { + pub fn demux(reader: &mut io::Cursor) -> io::Result { + let tag = reader.read_u8()?; + let length = reader.read_u8()?; + + if tag != 0x80 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid AV1 video descriptor tag")); + } + + if length != 4 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Invalid AV1 video descriptor length", + )); + } + + Ok(AV1VideoDescriptor { + codec_configuration_record: AV1CodecConfigurationRecord::demux(reader)?, + }) + } +} + #[derive(Debug, Clone, PartialEq)] /// AV1 Codec Configuration Record /// https://aomediacodec.github.io/av1-isobmff/#av1codecconfigurationbox-syntax @@ -16,6 +46,7 @@ pub struct AV1CodecConfigurationRecord { pub chroma_subsampling_x: bool, pub chroma_subsampling_y: bool, pub chroma_sample_position: u8, + pub hdr_wcg_idc: u8, pub initial_presentation_delay_minus_one: Option, pub config_obu: Bytes, } @@ -45,7 +76,13 @@ impl AV1CodecConfigurationRecord { let chroma_subsampling_y = bit_reader.read_bit()?; let chroma_sample_position = bit_reader.read_bits(2)? as u8; - bit_reader.seek_bits(3)?; // reserved 3 bits + // This is from the https://aomediacodec.github.io/av1-mpeg2-ts/#av1-video-descriptor spec + // The spec from https://aomediacodec.github.io/av1-isobmff/#av1codecconfigurationbox-section is old and contains 3 bits reserved + // The newer spec takes 2 of those reserved bits to represent the HDR WCG IDC + // Leaving 1 bit for future use + let hdr_wcg_idc = bit_reader.read_bits(2)? as u8; + + bit_reader.seek_bits(1)?; // reserved 1 bits let initial_presentation_delay_minus_one = if bit_reader.read_bit()? { Some(bit_reader.read_bits(4)? as u8) @@ -70,6 +107,7 @@ impl AV1CodecConfigurationRecord { chroma_subsampling_x, chroma_subsampling_y, chroma_sample_position, + hdr_wcg_idc, initial_presentation_delay_minus_one, config_obu: reader.extract_remaining(), }) @@ -139,6 +177,7 @@ mod tests { chroma_subsampling_x: true, chroma_subsampling_y: true, chroma_sample_position: 0, + hdr_wcg_idc: 0, initial_presentation_delay_minus_one: None, config_obu: b"\n\x0f\0\0\0j\xef\xbf\xe1\xbc\x02\x19\x90\x10\x10\x10@", } @@ -182,6 +221,7 @@ mod tests { chroma_subsampling_x: true, chroma_subsampling_y: true, chroma_sample_position: 0, + hdr_wcg_idc: 0, initial_presentation_delay_minus_one: Some( 15, ), @@ -202,6 +242,7 @@ mod tests { chroma_subsampling_x: false, chroma_subsampling_y: false, chroma_sample_position: 0, + hdr_wcg_idc: 0, initial_presentation_delay_minus_one: None, config_obu: Bytes::from_static(b"HELLO FROM THE OBU"), }; @@ -224,6 +265,7 @@ mod tests { chroma_subsampling_x: false, chroma_subsampling_y: false, chroma_sample_position: 0, + hdr_wcg_idc: 0, initial_presentation_delay_minus_one: Some(0), config_obu: Bytes::from_static(b"HELLO FROM THE OBU"), }; diff --git a/crates/av1/src/lib.rs b/crates/av1/src/lib.rs index 428d09ca0..55268b75b 100644 --- a/crates/av1/src/lib.rs +++ b/crates/av1/src/lib.rs @@ -16,5 +16,5 @@ mod config; mod obu; -pub use config::AV1CodecConfigurationRecord; +pub use config::{AV1CodecConfigurationRecord, AV1VideoDescriptor}; pub use obu::{seq, ObuHeader, ObuType}; diff --git a/crates/flv/Cargo.toml b/crates/flv/Cargo.toml index 013afc35d..f60afccd3 100644 --- a/crates/flv/Cargo.toml +++ b/crates/flv/Cargo.toml @@ -1,19 +1,29 @@ [package] -name = "flv" +name = "scuffle-flv" version = "0.0.1" edition = "2021" license = "MIT OR Apache-2.0" +repository = "https://github.com/scufflecloud/scuffle" +authors = ["Scuffle "] +readme = "README.md" +documentation = "https://docs.rs/scuffle-flv" +description = "A pure Rust FLV demuxer." +keywords = ["flv", "demuxer"] + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage_nightly)'] } [dependencies] byteorder = "1.5" bytes = "1.5" num-traits = "0.2" num-derive = "0.4" +thiserror = "2.0" -scuffle-av1.workspace = true h264 = { path = "../h264" } h265 = { path = "../h265" } scuffle-aac = { path = "../aac" } scuffle-bytes-util.workspace = true +scuffle-av1.workspace = true scuffle-amf0.workspace = true scuffle-workspace-hack.workspace = true diff --git a/crates/flv/LICENSE.Apache-2.0 b/crates/flv/LICENSE.Apache-2.0 new file mode 120000 index 000000000..5a4558f07 --- /dev/null +++ b/crates/flv/LICENSE.Apache-2.0 @@ -0,0 +1 @@ +../../LICENSE.Apache-2.0 \ No newline at end of file diff --git a/crates/flv/LICENSE.MIT b/crates/flv/LICENSE.MIT new file mode 120000 index 000000000..244dbbf0b --- /dev/null +++ b/crates/flv/LICENSE.MIT @@ -0,0 +1 @@ +../../LICENSE.MIT \ No newline at end of file diff --git a/crates/flv/README.md b/crates/flv/README.md new file mode 100644 index 000000000..dacc1afa1 --- /dev/null +++ b/crates/flv/README.md @@ -0,0 +1,17 @@ +# scuffle-flv + +> [!WARNING] +> This crate is under active development and may not be stable. + +[![crates.io](https://img.shields.io/crates/v/scuffle-flv.svg)](https://crates.io/crates/scuffle-flv) [![docs.rs](https://img.shields.io/docsrs/scuffle-flv)](https://docs.rs/scuffle-flv) + +--- + +A pure Rust implementation of the FLV format, allowing for demuxing of FLV files or streams. + +## License + +This project is licensed under the [MIT](./LICENSE.MIT) or [Apache-2.0](./LICENSE.Apache-2.0) license. +You can choose between one of them if you use this work. + +`SPDX-License-Identifier: MIT OR Apache-2.0` diff --git a/crates/flv/src/aac.rs b/crates/flv/src/aac.rs new file mode 100644 index 000000000..a74dd8c6d --- /dev/null +++ b/crates/flv/src/aac.rs @@ -0,0 +1,44 @@ +use bytes::Bytes; +use scuffle_bytes_util::BytesCursorExt; + +use crate::macros::nutype_enum; + +nutype_enum! { + /// FLV AAC Packet Type + /// + /// Defined in the FLV specification. Chapter 1 - AACAUDIODATA + /// + /// The AACPacketType indicates the type of data in the AACAUDIODATA. + pub enum AacPacketType(u8) { + /// Sequence Header + SequenceHeader = 0x0, + /// Raw + Raw = 0x1, + } +} + +/// AAC Packet +/// This is a container for aac data. +/// This enum contains the data for the different types of aac packets. +/// Defined in the FLV specification. Chapter 1 - AACAUDIODATA +#[derive(Debug, Clone, PartialEq)] +pub enum AacPacket { + /// AAC Sequence Header + SequenceHeader(Bytes), + /// AAC Raw + Raw(Bytes), + /// Data we don't know how to parse + Unknown { aac_packet_type: AacPacketType, data: Bytes }, +} + +impl AacPacket { + pub fn demux(aac_packet_type: AacPacketType, reader: &mut std::io::Cursor) -> std::io::Result { + let data = reader.extract_remaining(); + + match aac_packet_type { + AacPacketType::Raw => Ok(AacPacket::Raw(data)), + AacPacketType::SequenceHeader => Ok(AacPacket::SequenceHeader(data)), + _ => Ok(AacPacket::Unknown { aac_packet_type, data }), + } + } +} diff --git a/crates/flv/src/audio.rs b/crates/flv/src/audio.rs new file mode 100644 index 000000000..71c96c09b --- /dev/null +++ b/crates/flv/src/audio.rs @@ -0,0 +1,173 @@ +use std::io; + +use byteorder::ReadBytesExt; +use bytes::Bytes; +use scuffle_bytes_util::BytesCursorExt; + +use super::aac::{AacPacket, AacPacketType}; +use crate::macros::nutype_enum; + +/// FLV Tag Audio Data +/// +/// This is the container for the audio data. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) +#[derive(Debug, Clone, PartialEq)] +pub struct AudioData { + /// The sound rate of the audio data. (2 bits) + pub sound_rate: SoundRate, + /// The sound size of the audio data. (1 bit) + pub sound_size: SoundSize, + /// The sound type of the audio data. (1 bit) + pub sound_type: SoundType, + /// The body of the audio data. + pub body: AudioDataBody, +} + +impl AudioData { + pub fn demux(reader: &mut io::Cursor) -> io::Result { + let byte = reader.read_u8()?; + // SoundFormat is the first 4 bits of the byte + let sound_format = SoundFormat::from(byte >> 4); + // SoundRate is the next 2 bits of the byte + let sound_rate = SoundRate::from((byte >> 2) & 0b11); + // SoundSize is the next bit of the byte + let sound_size = SoundSize::from((byte >> 1) & 0b1); + // SoundType is the last bit of the byte + let sound_type = SoundType::from(byte & 0b1); + + // Now we can demux the body of the audio data + let body = AudioDataBody::demux(sound_format, reader)?; + + Ok(AudioData { + sound_rate, + sound_size, + sound_type, + body, + }) + } +} + +nutype_enum! { + /// FLV Sound Format + /// + /// Denotes the type of the underlying data packet + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) + pub enum SoundFormat(u8) { + /// Linear PCM, platform endian + LinearPcmPlatformEndian = 0, + /// ADPCM + Adpcm = 1, + /// MP3 + Mp3 = 2, + /// Linear PCM, little endian + LinearPcmLittleEndian = 3, + /// Nellymoser 16Khz Mono + Nellymoser16KhzMono = 4, + /// Nellymoser 8Khz Mono + Nellymoser8KhzMono = 5, + /// Nellymoser + Nellymoser = 6, + /// G.711 A-Law logarithmic PCM + G711ALaw = 7, + /// G.711 Mu-Law logarithmic PCM + G711MuLaw = 8, + /// AAC + Aac = 10, + /// Speex + Speex = 11, + /// Mp3 8Khz + Mp38Khz = 14, + /// Device specific sound + DeviceSpecificSound = 15, + } +} + +/// FLV Tag Audio Data Body +/// +/// This is the container for the audio data body. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) +#[derive(Debug, Clone, PartialEq)] +pub enum AudioDataBody { + /// AAC Audio Packet + Aac(AacPacket), + /// Some other audio format we don't know how to parse + Unknown { sound_format: SoundFormat, data: Bytes }, +} + +impl AudioDataBody { + pub fn demux(sound_format: SoundFormat, reader: &mut io::Cursor) -> io::Result { + match sound_format { + SoundFormat::Aac => { + // For some reason the spec adds a specific byte before the AAC data. + // This byte is the AAC packet type. + let aac_packet_type = AacPacketType::from(reader.read_u8()?); + Ok(Self::Aac(AacPacket::demux(aac_packet_type, reader)?)) + } + _ => Ok(Self::Unknown { + sound_format, + data: reader.extract_remaining(), + }), + } + } +} + +nutype_enum! { + /// FLV Sound Rate + /// + /// Denotes the sampling rate of the audio data. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) + pub enum SoundRate(u8) { + /// 5.5 KHz + Hz5500 = 0, + /// 11 KHz + Hz11000 = 1, + /// 22 KHz + Hz22000 = 2, + /// 44 KHz + Hz44000 = 3, + } +} + +nutype_enum! { + /// FLV Sound Size + /// + /// Denotes the size of each sample in the audio data. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) + pub enum SoundSize(u8) { + /// 8 bit + Bit8 = 0, + /// 16 bit + Bit16 = 1, + } +} + +nutype_enum! { + /// FLV Sound Type + /// + /// Denotes the number of channels in the audio data. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) + pub enum SoundType(u8) { + /// Mono + Mono = 0, + /// Stereo + Stereo = 1, + } +} diff --git a/crates/flv/src/av1.rs b/crates/flv/src/av1.rs new file mode 100644 index 000000000..bed668f7d --- /dev/null +++ b/crates/flv/src/av1.rs @@ -0,0 +1,11 @@ +use scuffle_av1::AV1CodecConfigurationRecord; +use bytes::Bytes; + +/// AV1 Packet +/// This is a container for av1 data. +/// This enum contains the data for the different types of av1 packets. +#[derive(Debug, Clone, PartialEq)] +pub enum Av1Packet { + SequenceStart(AV1CodecConfigurationRecord), + Raw(Bytes), +} diff --git a/crates/flv/src/avc.rs b/crates/flv/src/avc.rs new file mode 100644 index 000000000..aaee177fd --- /dev/null +++ b/crates/flv/src/avc.rs @@ -0,0 +1,57 @@ +use std::io; + +use byteorder::{BigEndian, ReadBytesExt}; +use bytes::Bytes; +use h264::AVCDecoderConfigurationRecord; +use scuffle_bytes_util::BytesCursorExt; + +use crate::macros::nutype_enum; + +/// AVC Packet +#[derive(Debug, Clone, PartialEq)] +pub enum AvcPacket { + /// AVC NALU + Nalu { composition_time: u32, data: Bytes }, + /// AVC Sequence Header + SequenceHeader(AVCDecoderConfigurationRecord), + /// AVC End of Sequence + EndOfSequence, + /// AVC Unknown (we don't know how to parse it) + Unknown { + avc_packet_type: AvcPacketType, + composition_time: u32, + data: Bytes, + }, +} + +impl AvcPacket { + pub fn demux(avc_packet_type: AvcPacketType, reader: &mut io::Cursor) -> io::Result { + let composition_time = reader.read_u24::()?; + + match avc_packet_type { + AvcPacketType::SeqHdr => Ok(Self::SequenceHeader(AVCDecoderConfigurationRecord::demux(reader)?)), + AvcPacketType::Nalu => Ok(Self::Nalu { + composition_time, + data: reader.extract_remaining(), + }), + AvcPacketType::EndOfSequence => Ok(Self::EndOfSequence), + _ => Ok(Self::Unknown { + avc_packet_type, + composition_time, + data: reader.extract_remaining(), + }), + } + } +} + +nutype_enum! { + /// FLV AVC Packet Type + /// Defined in the FLV specification. Chapter 1 - AVCVIDEODATA + /// The AVC packet type is used to determine if the video data is a sequence + /// header or a NALU. + pub enum AvcPacketType(u8) { + SeqHdr = 0, + Nalu = 1, + EndOfSequence = 2, + } +} diff --git a/crates/flv/src/define.rs b/crates/flv/src/define.rs deleted file mode 100644 index 2c1053a6b..000000000 --- a/crates/flv/src/define.rs +++ /dev/null @@ -1,308 +0,0 @@ -use bytes::Bytes; -use h264::AVCDecoderConfigurationRecord; -use h265::HEVCDecoderConfigurationRecord; -use num_derive::FromPrimitive; -use scuffle_amf0::Amf0Value; -use scuffle_av1::AV1CodecConfigurationRecord; - -#[derive(Debug, Clone, PartialEq)] -/// FLV File -/// Is a container which has a header and a series of tags. -/// Defined in the FLV specification. Chapter 1 - FLV File Format -pub struct Flv { - pub header: FlvHeader, - pub tags: Vec, -} - -#[derive(Debug, Clone, PartialEq)] -/// FLV Header -/// Is a 9-byte header which contains information about the FLV file. -/// Defined in the FLV specification. Chapter 1 - The FLV Header -pub struct FlvHeader { - pub version: u8, - pub has_audio: bool, - pub has_video: bool, - pub data_offset: u32, - pub extra: Bytes, -} - -#[derive(Debug, Clone, PartialEq)] -/// FLV Tag -/// This is a container for the actual media data. -/// Defined in the FLV specification. Chapter 1 - FLV Tags -pub struct FlvTag { - pub timestamp: u32, - pub stream_id: u32, - pub data: FlvTagData, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV Tag Type -/// Defined in the FLV specification. Chapter 1 - FLV tags -pub enum FlvTagType { - Audio = 8, - Video = 9, - ScriptData = 18, -} - -#[derive(Debug, Clone, PartialEq)] -/// FLV Tag Data -/// This is a container for the actual media data. -/// This enum contains the data for the different types of tags. -/// Defined in the FLV specification. Chapter 1 - FLV tags -pub enum FlvTagData { - /// AudioData defined in the FLV specification. Chapter 1 - FLV Audio Tags - Audio { - sound_rate: SoundRate, - sound_size: SoundSize, - sound_type: SoundType, - data: FlvTagAudioData, - }, - /// VideoData defined in the FLV specification. Chapter 1 - FLV Video Tags - Video { frame_type: FrameType, data: FlvTagVideoData }, - /// ScriptData defined in the FLV specification. Chapter 1 - FLV Data Tags - ScriptData { name: String, data: Vec> }, - /// Data we don't know how to parse - Unknown { tag_type: u8, data: Bytes }, -} - -#[derive(Debug, Clone, PartialEq)] -/// FLV Tag Audio Data -/// This is a container for audio data. -/// This enum contains the data for the different types of audio tags. -/// Defined in the FLV specification. Chapter 1 - FLV Audio Tags -pub enum FlvTagAudioData { - /// AAC Audio Packet defined in the FLV specification. Chapter 1 - - /// AACAUDIODATA - Aac(AacPacket), - /// Data we don't know how to parse - Unknown { sound_format: u8, data: Bytes }, -} - -#[derive(Debug, Clone, PartialEq)] -/// AAC Packet -/// This is a container for aac data. -/// This enum contains the data for the different types of aac packets. -/// Defined in the FLV specification. Chapter 1 - AACAUDIODATA -pub enum AacPacket { - /// AAC Raw - Raw(Bytes), - /// AAC Sequence Header - SequenceHeader(Bytes), - /// Data we don't know how to parse - Unknown { aac_packet_type: u8, data: Bytes }, -} - -#[derive(Debug, Clone, PartialEq)] -/// FLV Tag Video Data -/// This is a container for video data. -/// This enum contains the data for the different types of video tags. -/// Defined in the FLV specification. Chapter 1 - FLV Video Tags -pub enum FlvTagVideoData { - /// AVC Video Packet defined in the FLV specification. Chapter 1 - - /// AVCVIDEOPACKET - Avc(AvcPacket), - /// Enhanced Packet - Enhanced(EnhancedPacket), - /// Data we don't know how to parse - Unknown { codec_id: u8, data: Bytes }, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum EnhancedPacket { - /// Metadata - Metadata(Bytes), - /// Sequence End - SequenceEnd, - /// Av1 Video Packet - Av1(Av1Packet), - /// Hevc (H.265) Video Packet - Hevc(HevcPacket), - /// We don't know how to parse it - Unknown { - packet_type: u8, - video_codec: [u8; 4], - data: Bytes, - }, -} - -#[derive(Debug, Clone, PartialEq)] -/// AVC Packet -pub enum AvcPacket { - /// AVC NALU - Nalu { composition_time: u32, data: Bytes }, - /// AVC Sequence Header - SequenceHeader(AVCDecoderConfigurationRecord), - /// AVC End of Sequence - EndOfSequence, - /// AVC Unknown (we don't know how to parse it) - Unknown { - avc_packet_type: u8, - composition_time: u32, - data: Bytes, - }, -} - -#[derive(Debug, Clone, PartialEq)] -/// HEVC Packet -pub enum HevcPacket { - SequenceStart(HEVCDecoderConfigurationRecord), - Nalu { composition_time: Option, data: Bytes }, -} - -#[derive(Debug, Clone, PartialEq)] -/// AV1 Packet -/// This is a container for av1 data. -/// This enum contains the data for the different types of av1 packets. -pub enum Av1Packet { - SequenceStart(AV1CodecConfigurationRecord), - Raw(Bytes), -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -pub(crate) enum EnhancedPacketType { - SequenceStart = 0x00, - CodedFrames = 0x01, - SequenceEnd = 0x02, - CodedFramesX = 0x03, - Metadata = 0x04, - Mpeg2SequenceStart = 0x05, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV Sound Codec Id -/// Defined in the FLV specification. Chapter 1 - AudioTags -/// The SoundCodecID indicates the codec used to encode the sound. -pub(crate) enum SoundCodecId { - LinearPcmPlatformEndian = 0x0, - Adpcm = 0x1, - Mp3 = 0x2, - LinearPcmLittleEndian = 0x3, - Nellymoser16KhzMono = 0x4, - Nellymoser8KhzMono = 0x5, - Nellymoser = 0x6, - G711ALaw = 0x7, - G711MuLaw = 0x8, - Reserved = 0x9, - Aac = 0xA, - Speex = 0xB, - Mp38Khz = 0xE, - DeviceSpecificSound = 0xF, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV Sound Rate -/// Defined in the FLV specification. Chapter 1 - AudioTags -/// The SoundRate indicates the sampling rate of the audio data. -pub enum SoundRate { - Hz5500 = 0x0, - Hz11000 = 0x1, - Hz22000 = 0x2, - Hz44000 = 0x3, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV Sound Size -/// Defined in the FLV specification. Chapter 1 - AudioTags -/// The SoundSize indicates the size of each sample in the audio data. -pub enum SoundSize { - Bit8 = 0x0, - Bit16 = 0x1, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV Sound Type -/// Defined in the FLV specification. Chapter 1 - AudioTags -/// The SoundType indicates the number of channels in the audio data. -pub enum SoundType { - Mono = 0x0, - Stereo = 0x1, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV AAC Packet Type -/// Defined in the FLV specification. Chapter 1 - AACAUDIODATA -/// The AACPacketType indicates the type of data in the AACAUDIODATA. -pub(crate) enum AacPacketType { - SeqHdr = 0x0, - Raw = 0x1, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV Video Codec ID -/// Defined in the FLV specification. Chapter 1 - VideoTags -/// The codec ID indicates which codec is used to encode the video data. -pub(crate) enum VideoCodecId { - SorensonH263 = 0x2, - ScreenVideo = 0x3, - On2VP6 = 0x4, - On2VP6WithAlphaChannel = 0x5, - ScreenVideoVersion2 = 0x6, - Avc = 0x7, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(crate) enum VideoFourCC { - Av1, - Vp9, - Hevc, - Unknown([u8; 4]), -} - -impl From<[u8; 4]> for VideoFourCC { - fn from(fourcc: [u8; 4]) -> Self { - match &fourcc { - b"av01" => VideoFourCC::Av1, - b"vp09" => VideoFourCC::Vp9, - b"hvc1" => VideoFourCC::Hevc, - _ => VideoFourCC::Unknown(fourcc), - } - } -} - -impl From for [u8; 4] { - fn from(fourcc: VideoFourCC) -> Self { - match fourcc { - VideoFourCC::Av1 => *b"av01", - VideoFourCC::Vp9 => *b"vp09", - VideoFourCC::Hevc => *b"hvc1", - VideoFourCC::Unknown(fourcc) => fourcc, - } - } -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV Frame Type -/// Defined in the FLV specification. Chapter 1 - VideoTags -/// The frame type is used to determine if the video frame is a keyframe, an -/// interframe or disposable interframe. -pub enum FrameType { - Unknown = 0x0, - Keyframe = 0x1, - Interframe = 0x2, - DisposableInterframe = 0x3, - GeneratedKeyframe = 0x4, - VideoInfoOrCommandFrame = 0x5, - EnhancedMetadata = 0xF, -} - -#[derive(Debug, Clone, Copy, FromPrimitive, PartialEq, Eq)] -#[repr(u8)] -/// FLV AVC Packet Type -/// Defined in the FLV specification. Chapter 1 - AVCVIDEODATA -/// The AVC packet type is used to determine if the video data is a sequence -/// header or a NALU. -pub(crate) enum AvcPacketType { - SeqHdr = 0x0, - Nalu = 0x1, - EndOfSequence = 0x2, -} diff --git a/crates/flv/src/errors.rs b/crates/flv/src/errors.rs deleted file mode 100644 index e0705ae97..000000000 --- a/crates/flv/src/errors.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::{fmt, io}; - -#[derive(Debug)] -pub enum FlvDemuxerError { - IO(io::Error), - Amf0Read(scuffle_amf0::Amf0ReadError), - InvalidFlvHeader, - InvalidScriptDataName, - InvalidEnhancedPacketType(u8), - InvalidSoundRate(u8), - InvalidSoundSize(u8), - InvalidSoundType(u8), - InvalidFrameType(u8), -} - -impl From for FlvDemuxerError { - fn from(error: io::Error) -> Self { - Self::IO(error) - } -} - -impl From for FlvDemuxerError { - fn from(value: scuffle_amf0::Amf0ReadError) -> Self { - Self::Amf0Read(value) - } -} - -impl std::fmt::Display for FlvDemuxerError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::IO(error) => write!(f, "io error: {}", error), - Self::Amf0Read(error) => write!(f, "amf0 read error: {}", error), - Self::InvalidFlvHeader => write!(f, "invalid flv header"), - Self::InvalidScriptDataName => write!(f, "invalid script data name"), - Self::InvalidEnhancedPacketType(error) => { - write!(f, "invalid enhanced packet type: {}", error) - } - Self::InvalidSoundRate(error) => { - write!(f, "invalid sound rate: {}", error) - } - Self::InvalidSoundSize(error) => { - write!(f, "invalid sound size: {}", error) - } - Self::InvalidSoundType(error) => { - write!(f, "invalid sound type: {}", error) - } - Self::InvalidFrameType(error) => { - write!(f, "invalid frame type: {}", error) - } - } - } -} diff --git a/crates/flv/src/file.rs b/crates/flv/src/file.rs new file mode 100644 index 000000000..7ed7f9015 --- /dev/null +++ b/crates/flv/src/file.rs @@ -0,0 +1,39 @@ +use byteorder::{BigEndian, ReadBytesExt}; +use bytes::{Buf, Bytes}; + +use super::header::FlvHeader; +use super::tag::FlvTag; + +/// An FLV file is a combination of a [`FlvHeader`] followed by the `FLVFileBody` (which is just a series of [`FlvTag`]s) +/// +/// The `FLVFileBody` is defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Page 8) +/// - video_file_format_spec_v10_1.pdf (Annex E.3 - The FLV File Body) +#[derive(Debug, Clone, PartialEq)] +pub struct FlvFile { + pub header: FlvHeader, + pub tags: Vec, +} + +impl FlvFile { + pub fn demux(reader: &mut std::io::Cursor) -> std::io::Result { + let header = FlvHeader::demux(reader)?; + + let mut tags = Vec::new(); + while reader.has_remaining() { + // We don't care about the previous tag size, its only really used for seeking backwards. + reader.read_u32::()?; + + // If there is no more data, we can stop reading. + if !reader.has_remaining() { + break; + } + + // Demux the tag from the reader. + let tag = FlvTag::demux(reader)?; + tags.push(tag); + } + + Ok(FlvFile { header, tags }) + } +} diff --git a/crates/flv/src/flv.rs b/crates/flv/src/flv.rs deleted file mode 100644 index 5c937fb5d..000000000 --- a/crates/flv/src/flv.rs +++ /dev/null @@ -1,280 +0,0 @@ -use std::io::{ - Read, {self}, -}; - -use byteorder::{BigEndian, ReadBytesExt}; -use bytes::{Buf, Bytes}; -use h264::AVCDecoderConfigurationRecord; -use h265::HEVCDecoderConfigurationRecord; -use num_traits::FromPrimitive; -use scuffle_amf0::{Amf0Decoder, Amf0Value}; -use scuffle_av1::AV1CodecConfigurationRecord; -use scuffle_bytes_util::BytesCursorExt; - -use crate::define::Flv; -use crate::{ - AacPacket, AacPacketType, Av1Packet, AvcPacket, AvcPacketType, EnhancedPacket, EnhancedPacketType, FlvDemuxerError, - FlvHeader, FlvTag, FlvTagAudioData, FlvTagData, FlvTagType, FlvTagVideoData, FrameType, HevcPacket, SoundCodecId, - SoundRate, SoundSize, SoundType, VideoCodecId, VideoFourCC, -}; - -impl Flv { - /// Demux a FLV file. - pub fn demux(reader: &mut io::Cursor) -> Result { - let header = FlvHeader::demux(reader)?; - - let mut tags = Vec::new(); - while reader.has_remaining() { - reader.read_u32::()?; // previous tag size - - if !reader.has_remaining() { - break; - } - - let tag = FlvTag::demux(reader)?; - tags.push(tag); - } - - Ok(Flv { header, tags }) - } -} - -impl FlvHeader { - pub fn demux(reader: &mut io::Cursor) -> Result { - let mut flv_bytes = [0; 3]; - reader.read_exact(&mut flv_bytes)?; - - if &flv_bytes != b"FLV" { - return Err(FlvDemuxerError::InvalidFlvHeader); - } - - let version = reader.read_u8()?; - let flags = reader.read_u8()?; - - let has_audio = flags & 0b0000_0100 != 0; - let has_video = flags & 0b0000_0001 != 0; - - let data_offset = reader.read_u32::()?; - - let remaining = data_offset - reader.position() as u32; - let extra = reader.extract_bytes(remaining as usize)?; - - Ok(FlvHeader { - data_offset, - has_audio, - has_video, - version, - extra, - }) - } -} - -impl FlvTag { - pub fn demux(reader: &mut io::Cursor) -> Result { - let tag_type = reader.read_u8()?; - let data_size = reader.read_u24::()?; - let timestamp = reader.read_u24::()? | ((reader.read_u8()? as u32) << 24); - let stream_id = reader.read_u24::()?; - - let data = reader.extract_bytes(data_size as usize)?; - - let data = FlvTagData::demux(tag_type, data)?; - - Ok(FlvTag { - timestamp, - stream_id, - data, - }) - } -} - -impl FlvTagData { - pub fn demux(tag_type: u8, data: Bytes) -> Result { - let mut reader = io::Cursor::new(data); - - match FlvTagType::from_u8(tag_type) { - Some(FlvTagType::Audio) => { - let flags = reader.read_u8()?; - - let sound_format = (flags & 0b1111_0000) >> 4; - - let sound_rate = (flags & 0b0000_1100) >> 2; - let sound_rate = SoundRate::from_u8(sound_rate).ok_or(FlvDemuxerError::InvalidSoundRate(sound_rate))?; - - let sound_size = (flags & 0b0000_0010) >> 1; - let sound_size = SoundSize::from_u8(sound_size).ok_or(FlvDemuxerError::InvalidSoundSize(sound_size))?; - - let sound_type = flags & 0b0000_0001; - let sound_type = SoundType::from_u8(sound_type).ok_or(FlvDemuxerError::InvalidSoundType(sound_type))?; - - let data = FlvTagAudioData::demux(sound_format, &mut reader)?; - - Ok(FlvTagData::Audio { - sound_rate, - sound_size, - sound_type, - data, - }) - } - Some(FlvTagType::Video) => { - let flags = reader.read_u8()?; - let mut frame_type = flags >> 4; - - let mut is_enhanced = false; - let codec_id = flags & 0b0000_1111; - - if frame_type & 0b1000 != 0 { - // Enhanced Flv Tag - frame_type &= 0b0111; - is_enhanced = true; - - if codec_id == EnhancedPacketType::Metadata as u8 { - frame_type = FrameType::EnhancedMetadata as u8; - } - } - - let frame_type = FrameType::from_u8(frame_type).ok_or(FlvDemuxerError::InvalidFrameType(frame_type))?; - - Ok(FlvTagData::Video { - frame_type, - data: if is_enhanced { - FlvTagVideoData::demux_enhanced(codec_id, &mut reader)? - } else { - FlvTagVideoData::demux(codec_id, &mut reader)? - }, - }) - } - Some(FlvTagType::ScriptData) => { - let remaining = reader.extract_remaining(); - let values = Amf0Decoder::new(&remaining).decode_all()?; - - let name = match values.first() { - Some(Amf0Value::String(name)) => name, - _ => return Err(FlvDemuxerError::InvalidScriptDataName), - }; - - Ok(FlvTagData::ScriptData { - name: name.to_string(), - data: values.into_iter().skip(1).map(|v| v.to_owned()).collect(), - }) - } - None => Ok(FlvTagData::Unknown { - tag_type, - data: reader.extract_remaining(), - }), - } - } -} - -impl FlvTagAudioData { - pub fn demux(sound_format: u8, reader: &mut io::Cursor) -> Result { - match SoundCodecId::from_u8(sound_format) { - Some(SoundCodecId::Aac) => { - let aac_packet_type = reader.read_u8()?; - Ok(Self::Aac(AacPacket::demux(aac_packet_type, reader)?)) - } - _ => Ok(Self::Unknown { - sound_format, - data: reader.extract_remaining(), - }), - } - } -} - -impl AacPacket { - pub fn demux(aac_packet_type: u8, reader: &mut io::Cursor) -> Result { - match AacPacketType::from_u8(aac_packet_type) { - Some(AacPacketType::SeqHdr) => Ok(Self::SequenceHeader(reader.extract_remaining())), - Some(AacPacketType::Raw) => Ok(Self::Raw(reader.extract_remaining())), - _ => Ok(Self::Unknown { - aac_packet_type, - data: reader.extract_remaining(), - }), - } - } -} - -impl FlvTagVideoData { - pub fn demux(codec_id: u8, reader: &mut io::Cursor) -> Result { - match VideoCodecId::from_u8(codec_id) { - Some(VideoCodecId::Avc) => { - let avc_packet_type = reader.read_u8()?; - Ok(Self::Avc(AvcPacket::demux(avc_packet_type, reader)?)) - } - _ => Ok(Self::Unknown { - codec_id, - data: reader.extract_remaining(), - }), - } - } - - pub fn demux_enhanced(packet_type: u8, reader: &mut io::Cursor) -> Result { - // In the enhanced spec the codec id is the packet type - let packet_type = - EnhancedPacketType::from_u8(packet_type).ok_or(FlvDemuxerError::InvalidEnhancedPacketType(packet_type))?; - let mut video_codec = [0; 4]; - reader.read_exact(&mut video_codec)?; - let video_codec = VideoFourCC::from(video_codec); - - match packet_type { - EnhancedPacketType::SequenceEnd => { - return Ok(Self::Enhanced(EnhancedPacket::SequenceEnd)); - } - EnhancedPacketType::Metadata => { - return Ok(Self::Enhanced(EnhancedPacket::Metadata(reader.extract_remaining()))); - } - _ => {} - } - - match (video_codec, packet_type) { - (VideoFourCC::Av1, EnhancedPacketType::SequenceStart) => Ok(Self::Enhanced(EnhancedPacket::Av1( - Av1Packet::SequenceStart(AV1CodecConfigurationRecord::demux(reader)?), - ))), - (VideoFourCC::Av1, EnhancedPacketType::CodedFrames) => Ok(Self::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw( - reader.extract_remaining(), - )))), - (VideoFourCC::Hevc, EnhancedPacketType::SequenceStart) => Ok(Self::Enhanced(EnhancedPacket::Hevc( - HevcPacket::SequenceStart(HEVCDecoderConfigurationRecord::demux(reader)?), - ))), - (VideoFourCC::Hevc, EnhancedPacketType::CodedFrames) => { - let composition_time = reader.read_i24::()?; - Ok(Self::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { - composition_time: Some(composition_time), - data: reader.extract_remaining(), - }))) - } - (VideoFourCC::Hevc, EnhancedPacketType::CodedFramesX) => { - Ok(Self::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { - composition_time: None, - data: reader.extract_remaining(), - }))) - } - _ => Ok(Self::Enhanced(EnhancedPacket::Unknown { - packet_type: packet_type as u8, - video_codec: video_codec.into(), - data: reader.extract_remaining(), - })), - } - } -} - -impl AvcPacket { - pub fn demux(avc_packet_type: u8, reader: &mut io::Cursor) -> Result { - match AvcPacketType::from_u8(avc_packet_type) { - Some(AvcPacketType::SeqHdr) => { - reader.read_u24::()?; // composition time (always 0) - Ok(Self::SequenceHeader(AVCDecoderConfigurationRecord::demux(reader)?)) - } - Some(AvcPacketType::Nalu) => Ok(Self::Nalu { - composition_time: reader.read_u24::()?, - data: reader.extract_remaining(), - }), - Some(AvcPacketType::EndOfSequence) => Ok(Self::EndOfSequence), - _ => Ok(Self::Unknown { - avc_packet_type, - composition_time: reader.read_u24::()?, - data: reader.extract_remaining(), - }), - } - } -} diff --git a/crates/flv/src/header.rs b/crates/flv/src/header.rs new file mode 100644 index 000000000..70d2ffd64 --- /dev/null +++ b/crates/flv/src/header.rs @@ -0,0 +1,61 @@ +use std::io; + +use byteorder::{BigEndian, ReadBytesExt}; +use bytes::Bytes; +use scuffle_bytes_util::BytesCursorExt; + +/// The FLV Header +/// Whenever a FLV file is read these are the first 9 bytes of the file. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV Header - Page 8) +/// - video_file_format_spec_v10_1.pdf (Annex E.2 - The FLV Header) +#[derive(Debug, Clone, PartialEq)] +pub struct FlvHeader { + /// The version of the FLV file. + pub version: u8, + /// Whether the FLV file has audio. + pub has_audio: bool, + /// Whether the FLV file has video. + pub has_video: bool, + /// The extra data in the FLV file. + /// Since the header provides a data offset, this is the bytes between the end of the header and the start of the data. + pub extra: Bytes, +} + +impl FlvHeader { + /// Demux the FLV header from the given reader. + /// The reader will be returned in the position of the start of the data offset. + pub fn demux(reader: &mut io::Cursor) -> io::Result { + let start = reader.position() as usize; + + let signature = reader.read_u24::()?; + + // 0 byte at the beginning because we are only reading 3 bytes not 4. + if signature != u32::from_be_bytes([0, b'F', b'L', b'V']) { + return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid signature")); + } + + let version = reader.read_u8()?; + let flags = reader.read_u8()?; + let has_audio = (flags & 0b00000100) != 0; + let has_video = (flags & 0b00000001) != 0; + + let offset = reader.read_u32::()? as usize; + let end = reader.position() as usize; + let size = end - start; + + let extra = reader.extract_bytes( + offset + .checked_sub(size) + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid offset"))?, + )?; + + Ok(FlvHeader { + version, + has_audio, + has_video, + extra, + }) + } +} diff --git a/crates/flv/src/hevc.rs b/crates/flv/src/hevc.rs new file mode 100644 index 000000000..02e7f9070 --- /dev/null +++ b/crates/flv/src/hevc.rs @@ -0,0 +1,9 @@ +use bytes::Bytes; +use h265::HEVCDecoderConfigurationRecord; + +/// HEVC Packet +#[derive(Debug, Clone, PartialEq)] +pub enum HevcPacket { + SequenceStart(HEVCDecoderConfigurationRecord), + Nalu { composition_time: Option, data: Bytes }, +} diff --git a/crates/flv/src/lib.rs b/crates/flv/src/lib.rs index 065999008..056fb0d80 100644 --- a/crates/flv/src/lib.rs +++ b/crates/flv/src/lib.rs @@ -1,9 +1,876 @@ -mod define; -mod errors; -mod flv; +//! # scuffle-flv +//! +//! +//! [![crates.io](https://img.shields.io/crates/v/scuffle-flv.svg)](https://crates.io/crates/scuffle-flv) [![docs.rs](https://img.shields.io/docsrs/scuffle-flv)](https://docs.rs/scuffle-flv) +//! +//! --- +//! +//! A pure Rust implementation of the FLV format, allowing for demuxing of FLV files or streams. +//! +//! This does not support all FLV features (mainly those from FLV 10.1), however it does support some newer features, from the enhanced FLV specification. +//! +//! ## Specifications +//! +//! - +//! - +//! - +//! - +//! +//! ## License +//! +//! This project is licensed under the [MIT](./LICENSE.MIT) or [Apache-2.0](./LICENSE.Apache-2.0) license. +//! You can choose between one of them if you use this work. +//! +//! `SPDX-License-Identifier: MIT OR Apache-2.0` +#![cfg_attr(all(coverage_nightly, test), feature(coverage_attribute))] -pub use define::*; -pub use errors::FlvDemuxerError; +pub mod aac; +pub mod audio; +pub mod av1; +pub mod avc; +pub mod file; +pub mod header; +pub mod hevc; +pub mod script; +pub mod tag; +pub mod video; + +mod macros; + +pub use crate::file::FlvFile; +pub use crate::header::FlvHeader; +pub use crate::tag::{FlvTag, FlvTagData, FlvTagType}; #[cfg(test)] -mod tests; +#[cfg_attr(all(test, coverage_nightly), coverage(off))] +mod tests { + use std::collections::HashMap; + use std::io; + use std::path::PathBuf; + + use scuffle_amf0::Amf0Value; + use scuffle_av1::seq::SequenceHeaderObu; + use scuffle_av1::ObuHeader; + use bytes::Bytes; + use h264::{Sps, SpsExtended}; + use scuffle_aac::{AudioObjectType, PartialAudioSpecificConfig}; + + use crate::aac::AacPacket; + use crate::audio::{AudioData, AudioDataBody, SoundRate, SoundSize, SoundType}; + use crate::av1::Av1Packet; + use crate::avc::AvcPacket; + use crate::file::FlvFile; + use crate::hevc::HevcPacket; + use crate::script::ScriptData; + use crate::tag::FlvTagData; + use crate::video::{EnhancedPacket, FrameType, VideoData, VideoDataBody}; + + #[test] + fn test_demux_flv_avc_aac() { + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../assets"); + + let data = Bytes::from(std::fs::read(dir.join("avc_aac.flv")).expect("failed to read file")); + let mut reader = io::Cursor::new(data); + + let flv = FlvFile::demux(&mut reader).expect("failed to demux flv"); + + assert_eq!(flv.header.version, 1); + assert!(flv.header.has_audio); + assert!(flv.header.has_video); + assert_eq!(flv.header.extra.len(), 0); + + + let mut tags = flv.tags.into_iter(); + + // Metadata tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + // This is a metadata tag + let script_data = match tag.data { + FlvTagData::ScriptData(ScriptData { name, data }) => { + assert_eq!(name, "onMetaData"); + data + } + _ => panic!("expected script data"), + }; + + // Script data should be an AMF0 object + let object = match &script_data[0] { + Amf0Value::Object(object) => object, + _ => panic!("expected object"), + }; + + let object = object.into_iter().map(|(k, v)| (k.as_ref(), v)).collect::>(); + + // Should have a audio sample size property + let audio_sample_size = match object.get("audiosamplesize") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio sample size"), + }; + + assert_eq!(audio_sample_size, &16.0); + + // Should have a audio sample rate property + let audio_sample_rate = match object.get("audiosamplerate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio sample rate"), + }; + + assert_eq!(audio_sample_rate, &48000.0); + + // Should have a stereo property + let stereo = match object.get("stereo") { + Some(Amf0Value::Boolean(boolean)) => boolean, + _ => panic!("expected stereo"), + }; + + assert_eq!(stereo, &true); + + // Should have an audio codec id property + let audio_codec_id = match object.get("audiocodecid") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio codec id"), + }; + + assert_eq!(audio_codec_id, &10.0); // AAC + + // Should have a video codec id property + let video_codec_id = match object.get("videocodecid") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected video codec id"), + }; + + assert_eq!(video_codec_id, &7.0); // AVC + + // Should have a duration property + let duration = match object.get("duration") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected duration"), + }; + + assert_eq!(duration, &1.088); // 1.088 seconds + + // Should have a width property + let width = match object.get("width") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected width"), + }; + + assert_eq!(width, &3840.0); + + // Should have a height property + let height = match object.get("height") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected height"), + }; + + assert_eq!(height, &2160.0); + + // Should have a framerate property + let framerate = match object.get("framerate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected framerate"), + }; + + assert_eq!(framerate, &60.0); + + // Should have a videodatarate property + match object.get("videodatarate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected videodatarate"), + }; + + // Should have a audiodatarate property + match object.get("audiodatarate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audiodatarate"), + }; + + // Should have a minor version property + let minor_version = match object.get("minor_version") { + Some(Amf0Value::String(number)) => number, + _ => panic!("expected minor version"), + }; + + assert_eq!(minor_version, "512"); + + // Should have a major brand property + let major_brand = match object.get("major_brand") { + Some(Amf0Value::String(string)) => string, + _ => panic!("expected major brand"), + }; + + assert_eq!(major_brand, "iso5"); + + // Should have a compatible_brands property + let compatible_brands = match object.get("compatible_brands") { + Some(Amf0Value::String(string)) => string, + _ => panic!("expected compatible brands"), + }; + + assert_eq!(compatible_brands, "iso5iso6mp41"); + } + + // Video Sequence Header Tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + // This is a video tag + let (frame_type, video_data) = match tag.data { + FlvTagData::Video(VideoData { frame_type, body }) => (frame_type, body), + _ => panic!("expected video data"), + }; + + assert_eq!(frame_type, FrameType::Keyframe); + + // Video data should be an AVC sequence header + let avc_decoder_configuration_record = match video_data { + VideoDataBody::Avc(AvcPacket::SequenceHeader(data)) => data, + _ => panic!("expected avc sequence header"), + }; + + // The avc sequence header should be able to be decoded into an avc decoder + // configuration record + assert_eq!(avc_decoder_configuration_record.profile_indication, 100); + assert_eq!(avc_decoder_configuration_record.profile_compatibility, 0); + assert_eq!(avc_decoder_configuration_record.level_indication, 51); // 5.1 + assert_eq!(avc_decoder_configuration_record.length_size_minus_one, 3); + assert_eq!(avc_decoder_configuration_record.sps.len(), 1); + assert_eq!(avc_decoder_configuration_record.pps.len(), 1); + assert_eq!(avc_decoder_configuration_record.extended_config, None); + + let sps = &avc_decoder_configuration_record.sps[0]; + // SPS should be able to be decoded into a sequence parameter set + let sps = Sps::parse(sps.clone()).expect("expected sequence parameter set"); + + assert_eq!(sps.profile_idc, 100); + assert_eq!(sps.level_idc, 51); + assert_eq!(sps.width, 3840); + assert_eq!(sps.height, 2160); + assert_eq!(sps.frame_rate, 60.0); + + assert_eq!( + sps.ext, + Some(SpsExtended { + chroma_format_idc: 1, + bit_depth_luma_minus8: 0, + bit_depth_chroma_minus8: 0, + }) + ) + } + + // Audio Sequence Header Tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + let (data, sound_rate, sound_size, sound_type) = match tag.data { + FlvTagData::Audio(AudioData { + sound_rate, + sound_size, + sound_type, + body, + }) => (body, sound_rate, sound_size, sound_type), + _ => panic!("expected audio data"), + }; + + assert_eq!(sound_rate, SoundRate::Hz44000); + assert_eq!(sound_size, SoundSize::Bit16); + assert_eq!(sound_type, SoundType::Stereo); + + // Audio data should be an AAC sequence header + let data = match data { + AudioDataBody::Aac(AacPacket::SequenceHeader(data)) => data, + _ => panic!("expected aac sequence header"), + }; + + // The aac sequence header should be able to be decoded into an aac decoder + // configuration record + let aac_decoder_configuration_record = + PartialAudioSpecificConfig::parse(&data).expect("expected aac decoder configuration record"); + + assert_eq!( + aac_decoder_configuration_record.audio_object_type, + AudioObjectType::AacLowComplexity + ); + assert_eq!(aac_decoder_configuration_record.sampling_frequency, 48000); + assert_eq!(aac_decoder_configuration_record.channel_configuration, 2); + } + + // Rest of the tags should be video / audio data + let mut last_timestamp = 0; + let mut read_seq_end = false; + for tag in tags { + assert!(tag.timestamp_ms >= last_timestamp); + assert_eq!(tag.stream_id, 0); + + last_timestamp = tag.timestamp_ms; + + match tag.data { + FlvTagData::Audio(AudioData { + body, + sound_rate, + sound_size, + sound_type, + }) => { + assert_eq!(sound_rate, SoundRate::Hz44000); + assert_eq!(sound_size, SoundSize::Bit16); + assert_eq!(sound_type, SoundType::Stereo); + match body { + AudioDataBody::Aac(AacPacket::Raw(data)) => data, + _ => panic!("expected aac raw packet"), + }; + } + FlvTagData::Video(VideoData { frame_type, body }) => { + match frame_type { + FrameType::Keyframe => (), + FrameType::Interframe => (), + _ => panic!("expected keyframe or interframe"), + } + + match body { + VideoDataBody::Avc(AvcPacket::Nalu { .. }) => assert!(!read_seq_end), + VideoDataBody::Avc(AvcPacket::EndOfSequence) => { + assert!(!read_seq_end); + read_seq_end = true; + } + _ => panic!("expected avc nalu packet: {:?}", body), + }; + } + _ => panic!("expected audio data"), + }; + } + + assert!(read_seq_end); + } + + #[test] + fn test_demux_flv_av1_aac() { + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../assets"); + + let data = Bytes::from(std::fs::read(dir.join("av1_aac.flv")).expect("failed to read file")); + let mut reader = io::Cursor::new(data); + + let flv = FlvFile::demux(&mut reader).expect("failed to demux flv"); + + assert_eq!(flv.header.version, 1); + assert!(flv.header.has_audio); + assert!(flv.header.has_video); + assert_eq!(flv.header.extra.len(), 0); + + let mut tags = flv.tags.into_iter(); + + // Metadata tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + // This is a metadata tag + let script_data = match tag.data { + FlvTagData::ScriptData(ScriptData { name, data }) => { + assert_eq!(name, "onMetaData"); + data + } + _ => panic!("expected script data"), + }; + + // Script data should be an AMF0 object + let object = match &script_data[0] { + Amf0Value::Object(object) => object, + _ => panic!("expected object"), + }; + + let object = object.into_iter().map(|(k, v)| (k.as_ref(), v)).collect::>(); + + // Should have a audio sample size property + let audio_sample_size = match object.get("audiosamplesize") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio sample size"), + }; + + assert_eq!(audio_sample_size, &16.0); + + // Should have a audio sample rate property + let audio_sample_rate = match object.get("audiosamplerate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio sample rate"), + }; + + assert_eq!(audio_sample_rate, &48000.0); + + // Should have a stereo property + let stereo = match object.get("stereo") { + Some(Amf0Value::Boolean(boolean)) => boolean, + _ => panic!("expected stereo"), + }; + + assert_eq!(stereo, &true); + + // Should have an audio codec id property + let audio_codec_id = match object.get("audiocodecid") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio codec id"), + }; + + assert_eq!(audio_codec_id, &10.0); // AAC + + // Should have a video codec id property + let video_codec_id = match object.get("videocodecid") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected video codec id"), + }; + + assert_eq!(video_codec_id, &7.0); // AVC + + // Should have a duration property + let duration = match object.get("duration") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected duration"), + }; + + assert_eq!(duration, &0.0); // 0 seconds (this was a live stream) + + // Should have a width property + let width = match object.get("width") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected width"), + }; + + assert_eq!(width, &2560.0); + + // Should have a height property + let height = match object.get("height") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected height"), + }; + + assert_eq!(height, &1440.0); + + // Should have a framerate property + let framerate = match object.get("framerate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected framerate"), + }; + + assert_eq!(framerate, &144.0); + + // Should have a videodatarate property + match object.get("videodatarate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected videodatarate"), + }; + + // Should have a audiodatarate property + match object.get("audiodatarate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audiodatarate"), + }; + } + + // Audio Sequence Header Tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + let (body, sound_rate, sound_size, sound_type) = match tag.data { + FlvTagData::Audio(AudioData { + body, + sound_rate, + sound_size, + sound_type, + }) => (body, sound_rate, sound_size, sound_type), + _ => panic!("expected audio data"), + }; + + assert_eq!(sound_rate, SoundRate::Hz44000); + assert_eq!(sound_size, SoundSize::Bit16); + assert_eq!(sound_type, SoundType::Stereo); + + // Audio data should be an AAC sequence header + let data = match body { + AudioDataBody::Aac(AacPacket::SequenceHeader(data)) => data, + _ => panic!("expected aac sequence header"), + }; + + // The aac sequence header should be able to be decoded into an aac decoder + // configuration record + let aac_decoder_configuration_record = + PartialAudioSpecificConfig::parse(&data).expect("expected aac decoder configuration record"); + + assert_eq!( + aac_decoder_configuration_record.audio_object_type, + AudioObjectType::AacLowComplexity + ); + assert_eq!(aac_decoder_configuration_record.sampling_frequency, 48000); + assert_eq!(aac_decoder_configuration_record.channel_configuration, 2); + } + + // Video Sequence Header Tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + // This is a video tag + let (frame_type, video_data) = match tag.data { + FlvTagData::Video(VideoData { frame_type, body }) => (frame_type, body), + _ => panic!("expected video data"), + }; + + assert_eq!(frame_type, FrameType::Keyframe); + + // Video data should be an AVC sequence header + let config = match video_data { + VideoDataBody::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart(config))) => config, + _ => panic!("expected av1 sequence header found {:?}", video_data), + }; + + assert_eq!(config.chroma_sample_position, 0); + assert!(config.chroma_subsampling_x); // 5.1 + assert!(config.chroma_subsampling_y); + assert!(!config.high_bitdepth); + assert!(!config.twelve_bit); + + let mut reader = std::io::Cursor::new(config.config_obu); + + let header = ObuHeader::parse(&mut reader).expect("expected obu header"); + + let seq_obu = SequenceHeaderObu::parse(header, &mut reader).expect("expected sequence obu"); + + assert_eq!(seq_obu.max_frame_height, 1440); + assert_eq!(seq_obu.max_frame_width, 2560); + } + + // Rest of the tags should be video / audio data + let mut last_timestamp = 0; + let mut read_seq_end = false; + for tag in tags { + assert!(tag.timestamp_ms >= last_timestamp || tag.timestamp_ms == 0); // Timestamps should be monotonically increasing or 0 + assert_eq!(tag.stream_id, 0); + + if tag.timestamp_ms != 0 { + last_timestamp = tag.timestamp_ms; + } + + match tag.data { + FlvTagData::Audio(AudioData { + body, + sound_rate, + sound_size, + sound_type, + }) => { + assert_eq!(sound_rate, SoundRate::Hz44000); + assert_eq!(sound_size, SoundSize::Bit16); + assert_eq!(sound_type, SoundType::Stereo); + match body { + AudioDataBody::Aac(AacPacket::Raw(data)) => data, + _ => panic!("expected aac raw packet"), + }; + } + FlvTagData::Video(VideoData { frame_type, body }) => { + match frame_type { + FrameType::Keyframe => (), + FrameType::Interframe => (), + _ => panic!("expected keyframe or interframe"), + } + + match body { + VideoDataBody::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw(_))) => { + assert!(!read_seq_end) + } + VideoDataBody::Enhanced(EnhancedPacket::SequenceEnd) => { + assert!(!read_seq_end); + read_seq_end = true; + } + _ => panic!("expected av1 raw packet: {:?}", body), + }; + } + _ => panic!("expected audio data"), + }; + } + + assert!(read_seq_end); + } + + #[test] + fn test_demux_flv_hevc_aac() { + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../assets"); + + let data = Bytes::from(std::fs::read(dir.join("hevc_aac.flv")).expect("failed to read file")); + let mut reader = io::Cursor::new(data); + + let flv = FlvFile::demux(&mut reader).expect("failed to demux flv"); + + assert_eq!(flv.header.version, 1); + assert!(flv.header.has_audio); + assert!(flv.header.has_video); + assert_eq!(flv.header.extra.len(), 0); + + let mut tags = flv.tags.into_iter(); + + // Metadata tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + // This is a metadata tag + let script_data = match tag.data { + FlvTagData::ScriptData(ScriptData { name, data }) => { + assert_eq!(name, "onMetaData"); + data + } + _ => panic!("expected script data"), + }; + + // Script data should be an AMF0 object + let object = match &script_data[0] { + Amf0Value::Object(object) => object, + _ => panic!("expected object"), + }; + + let object = object.into_iter().map(|(k, v)| (k.as_ref(), v)).collect::>(); + + // Should have a audio sample size property + let audio_sample_size = match object.get("audiosamplesize") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio sample size"), + }; + + assert_eq!(audio_sample_size, &16.0); + + // Should have a audio sample rate property + let audio_sample_rate = match object.get("audiosamplerate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio sample rate"), + }; + + assert_eq!(audio_sample_rate, &48000.0); + + // Should have a stereo property + let stereo = match object.get("stereo") { + Some(Amf0Value::Boolean(boolean)) => boolean, + _ => panic!("expected stereo"), + }; + + assert_eq!(stereo, &true); + + // Should have an audio codec id property + let audio_codec_id = match object.get("audiocodecid") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audio codec id"), + }; + + assert_eq!(audio_codec_id, &10.0); // AAC + + // Should have a video codec id property + let video_codec_id = match object.get("videocodecid") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected video codec id"), + }; + + assert_eq!(video_codec_id, &7.0); // AVC + + // Should have a duration property + let duration = match object.get("duration") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected duration"), + }; + + assert_eq!(duration, &0.0); // 0 seconds (this was a live stream) + + // Should have a width property + let width = match object.get("width") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected width"), + }; + + assert_eq!(width, &2560.0); + + // Should have a height property + let height = match object.get("height") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected height"), + }; + + assert_eq!(height, &1440.0); + + // Should have a framerate property + let framerate = match object.get("framerate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected framerate"), + }; + + assert_eq!(framerate, &144.0); + + // Should have a videodatarate property + match object.get("videodatarate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected videodatarate"), + }; + + // Should have a audiodatarate property + match object.get("audiodatarate") { + Some(Amf0Value::Number(number)) => number, + _ => panic!("expected audiodatarate"), + }; + } + + // Audio Sequence Header Tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + let (body, sound_rate, sound_size, sound_type) = match tag.data { + FlvTagData::Audio(AudioData { + body, + sound_rate, + sound_size, + sound_type, + }) => (body, sound_rate, sound_size, sound_type), + _ => panic!("expected audio data"), + }; + + assert_eq!(sound_rate, SoundRate::Hz44000); + assert_eq!(sound_size, SoundSize::Bit16); + assert_eq!(sound_type, SoundType::Stereo); + + // Audio data should be an AAC sequence header + let data = match body { + AudioDataBody::Aac(AacPacket::SequenceHeader(data)) => data, + _ => panic!("expected aac sequence header"), + }; + + // The aac sequence header should be able to be decoded into an aac decoder + // configuration record + let aac_decoder_configuration_record = + PartialAudioSpecificConfig::parse(&data).expect("expected aac decoder configuration record"); + + assert_eq!( + aac_decoder_configuration_record.audio_object_type, + AudioObjectType::AacLowComplexity + ); + assert_eq!(aac_decoder_configuration_record.sampling_frequency, 48000); + assert_eq!(aac_decoder_configuration_record.channel_configuration, 2); + } + + // Video Sequence Header Tag + { + let tag = tags.next().expect("expected tag"); + assert_eq!(tag.timestamp_ms, 0); + assert_eq!(tag.stream_id, 0); + + // This is a video tag + let (frame_type, video_data) = match tag.data { + FlvTagData::Video(VideoData { frame_type, body }) => (frame_type, body), + _ => panic!("expected video data"), + }; + + assert_eq!(frame_type, FrameType::Keyframe); + + // Video data should be an AVC sequence header + let config = match video_data { + VideoDataBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::SequenceStart(config))) => config, + _ => panic!("expected hevc sequence header found {:?}", video_data), + }; + + assert_eq!(config.configuration_version, 1); + assert_eq!(config.avg_frame_rate, 0); + assert_eq!(config.constant_frame_rate, 0); + assert_eq!(config.num_temporal_layers, 1); + + // We should be able to find a SPS NAL unit in the sequence header + let Some(sps) = config + .arrays + .iter() + .find(|a| a.nal_unit_type == h265::NaluType::Sps) + .and_then(|v| v.nalus.first()) + else { + panic!("expected sps"); + }; + + // We should be able to find a PPS NAL unit in the sequence header + let Some(_) = config + .arrays + .iter() + .find(|a| a.nal_unit_type == h265::NaluType::Pps) + .and_then(|v| v.nalus.first()) + else { + panic!("expected pps"); + }; + + // We should be able to decode the SPS NAL unit + let sps = h265::Sps::parse(sps.clone()).expect("expected sps"); + + assert_eq!(sps.frame_rate, 144.0); + assert_eq!(sps.width, 2560); + assert_eq!(sps.height, 1440); + assert_eq!( + sps.color_config, + Some(h265::ColorConfig { + full_range: false, + color_primaries: 1, + transfer_characteristics: 1, + matrix_coefficients: 1, + }) + ) + } + + // Rest of the tags should be video / audio data + let mut last_timestamp = 0; + let mut read_seq_end = false; + for tag in tags { + assert!(tag.timestamp_ms >= last_timestamp || tag.timestamp_ms == 0); // Timestamps should be monotonically increasing or 0 + assert_eq!(tag.stream_id, 0); + + if tag.timestamp_ms != 0 { + last_timestamp = tag.timestamp_ms; + } + + match tag.data { + FlvTagData::Audio(AudioData { + body, + sound_rate, + sound_size, + sound_type, + }) => { + assert_eq!(sound_rate, SoundRate::Hz44000); + assert_eq!(sound_size, SoundSize::Bit16); + assert_eq!(sound_type, SoundType::Stereo); + match body { + AudioDataBody::Aac(AacPacket::Raw(data)) => data, + _ => panic!("expected aac raw packet"), + }; + } + FlvTagData::Video(VideoData { frame_type, body }) => { + match frame_type { + FrameType::Keyframe => (), + FrameType::Interframe => (), + _ => panic!("expected keyframe or interframe"), + } + + match body { + VideoDataBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { .. })) => assert!(!read_seq_end), + VideoDataBody::Enhanced(EnhancedPacket::SequenceEnd) => { + assert!(!read_seq_end); + read_seq_end = true; + } + _ => panic!("expected hevc nalu packet: {:?}", body), + }; + } + _ => panic!("expected audio data"), + }; + } + + assert!(read_seq_end); + } +} diff --git a/crates/flv/src/macros.rs b/crates/flv/src/macros.rs new file mode 100644 index 000000000..49aa5e5cf --- /dev/null +++ b/crates/flv/src/macros.rs @@ -0,0 +1,66 @@ +/// Helper macro to create a new enum type with a single field. +/// +/// This macro is used to create a new enum type with a single field. +/// The enum type is derived with the `Clone`, `Copy`, `PartialEq`, `Eq`, +/// `PartialOrd`, `Ord`, and `Hash` traits. The enum type is also derived with +/// the `Debug` trait to provide a human-readable representation of the enum. +/// +/// # Examples +/// +/// ```rust,ignore +/// nutype_enum! { +/// pub enum AacPacketType(u8) { +/// SeqHdr = 0x0, +/// Raw = 0x1, +/// } +/// } +/// ``` +macro_rules! nutype_enum { + ( + $(#[$attr:meta])* + $vis:vis enum $name:ident($type:ty) { + $( + $(#[$variant_attr:meta])* + $variant:ident = $value:expr + ),*$(,)? + } + ) => { + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + $(#[$attr])* + #[repr(transparent)] + $vis struct $name(pub $type); + + impl ::std::fmt::Debug for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + $( + &$name::$variant => write!(f, "{}::{}", stringify!($name), stringify!($variant)), + )* + _ => write!(f, "{}({:?})", stringify!($name), self.0), + } + } + } + + impl $name { + $( + $(#[$variant_attr])* + #[allow(non_upper_case_globals)] + pub const $variant: Self = Self($value); + )* + } + + impl From<$type> for $name { + fn from(value: $type) -> Self { + Self(value) + } + } + + impl From<$name> for $type { + fn from(value: $name) -> Self { + value.0 + } + } + }; +} + +pub(crate) use nutype_enum; diff --git a/crates/flv/src/script.rs b/crates/flv/src/script.rs new file mode 100644 index 000000000..2670e8c98 --- /dev/null +++ b/crates/flv/src/script.rs @@ -0,0 +1,29 @@ +use std::io; + +use bytes::Bytes; +use scuffle_amf0::{Amf0Decoder, Amf0Marker, Amf0Value}; +use scuffle_bytes_util::BytesCursorExt; + +#[derive(Debug, Clone, PartialEq)] +pub struct ScriptData { + pub name: String, + pub data: Vec>, +} + +impl ScriptData { + pub fn demux(reader: &mut io::Cursor) -> io::Result { + let buf = reader.extract_remaining(); + let mut amf0_reader = Amf0Decoder::new(&buf); + + let name = match amf0_reader.decode_with_type(Amf0Marker::String) { + Ok(Amf0Value::String(name)) => name, + _ => return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid script data name")), + }; + + let data = amf0_reader + .decode_all() + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid script data"))?; + + Ok(Self { name: name.into_owned(), data: data.into_iter().map(|v| v.to_owned()).collect() }) + } +} diff --git a/crates/flv/src/tag.rs b/crates/flv/src/tag.rs new file mode 100644 index 000000000..3dec11f65 --- /dev/null +++ b/crates/flv/src/tag.rs @@ -0,0 +1,117 @@ +use byteorder::{BigEndian, ReadBytesExt}; +use bytes::Bytes; +use scuffle_bytes_util::BytesCursorExt; + +use super::audio::AudioData; +use super::script::ScriptData; +use super::video::VideoData; +use crate::macros::nutype_enum; + +/// An FLV Tag +/// +/// Tags have different types and thus different data structures. To accommodate +/// this the [`FlvTagData`] enum is used. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag) +/// +/// The v10.1 spec adds some additional fields to the tag to accomodate encryption. +/// We dont support this because it is not needed for our use case. (and I suspect it is not used anywhere anymore.) +/// +/// However if the Tag is encrypted the tag_type will be a larger number (one we dont support), and therefore the +/// [`FlvTagData::Unknown`] variant will be used. +#[derive(Debug, Clone, PartialEq)] +pub struct FlvTag { + /// A timestamp in milliseconds + pub timestamp_ms: u32, + /// A stream id + pub stream_id: u32, + pub data: FlvTagData, +} + +impl FlvTag { + /// Demux a FLV tag from the given reader. + /// The cursor will be advanced to the end of the tag. + pub fn demux(reader: &mut std::io::Cursor) -> std::io::Result { + let tag_type = FlvTagType::from(reader.read_u8()?); + + let data_size = reader.read_u24::()?; + // The timestamp bit is weird. Its 24bits but then there is an extended 8 bit number to create a 32bit number. + let timestamp_ms = reader.read_u24::()? | ((reader.read_u8()? as u32) << 24); + + // The stream id according to the spec is ALWAYS 0. (likely not true) + let stream_id = reader.read_u24::()?; + + // We then extract the data from the reader. (advancing the cursor to the end of the tag) + let data = reader.extract_bytes(data_size as usize)?; + + // Finally we demux the data. + let data = FlvTagData::demux(tag_type, &mut std::io::Cursor::new(data))?; + + Ok(FlvTag { + timestamp_ms, + stream_id, + data, + }) + } +} + +nutype_enum! { + /// FLV Tag Type + /// + /// This is the type of the tag. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag) + /// + pub enum FlvTagType(u8) { + Audio = 8, + Video = 9, + ScriptData = 18, + } +} + +/// FLV Tag Data +/// +/// This is a container for the actual media data. +/// This enum contains the data for the different types of tags. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag) +#[derive(Debug, Clone, PartialEq)] +pub enum FlvTagData { + /// AudioData when the FlvTagType is Audio(8) + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA) + Audio(AudioData), + /// VideoData when the FlvTagType is Video(9) + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) + Video(VideoData), + /// ScriptData when the FlvTagType is ScriptData(18) + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Data tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.4.1 - SCRIPTDATA) + ScriptData(ScriptData), + /// Any tag type that we dont know how to parse, with the corresponding data being the raw bytes of the tag + Unknown { tag_type: FlvTagType, data: Bytes }, +} + +impl FlvTagData { + pub fn demux(tag_type: FlvTagType, reader: &mut std::io::Cursor) -> std::io::Result { + match tag_type { + FlvTagType::Audio => Ok(FlvTagData::Audio(AudioData::demux(reader)?)), + FlvTagType::Video => Ok(FlvTagData::Video(VideoData::demux(reader)?)), + FlvTagType::ScriptData => Ok(FlvTagData::ScriptData(ScriptData::demux(reader)?)), + _ => Ok(FlvTagData::Unknown { + tag_type, + data: reader.extract_remaining(), + }), + } + } +} diff --git a/crates/flv/src/tests/demuxer.rs b/crates/flv/src/tests/demuxer.rs deleted file mode 100644 index 0efb18b8c..000000000 --- a/crates/flv/src/tests/demuxer.rs +++ /dev/null @@ -1,829 +0,0 @@ -use std::collections::HashMap; -use std::io; -use std::path::PathBuf; - -use bytes::{Buf, Bytes}; -use h264::{Sps, SpsExtended}; -use scuffle_aac::{AudioObjectType, PartialAudioSpecificConfig}; -use scuffle_av1::seq::SequenceHeaderObu; -use scuffle_av1::ObuHeader; -use scuffle_bytes_util::BytesCursorExt; - -use crate::{ - AacPacket, Av1Packet, AvcPacket, EnhancedPacket, Flv, FlvTagAudioData, FlvTagData, FlvTagVideoData, FrameType, - HevcPacket, SoundRate, SoundSize, SoundType, -}; - -#[test] -fn test_demux_flv_avc_aac() { - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../assets"); - - let data = Bytes::from(std::fs::read(dir.join("avc_aac.flv")).expect("failed to read file")); - let mut reader = io::Cursor::new(data); - - let flv = Flv::demux(&mut reader).expect("failed to demux flv"); - - assert_eq!(flv.header.version, 1); - assert!(flv.header.has_audio); - assert!(flv.header.has_video); - assert_eq!(flv.header.data_offset, 9); - assert_eq!(flv.header.extra.len(), 0); - - let mut tags = flv.tags.into_iter(); - - // Metadata tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - // This is a metadata tag - let script_data = match tag.data { - FlvTagData::ScriptData { name, data } => { - assert_eq!(name, "onMetaData"); - data - } - _ => panic!("expected script data"), - }; - - // Script data should be an AMF0 object - let object = match &script_data[0] { - scuffle_amf0::Amf0Value::Object(object) => object, - _ => panic!("expected object"), - }; - - let map = object.iter().map(|(k, v)| (k.as_ref(), v)).collect::>(); - - // Should have a audio sample size property - let audio_sample_size = match map.get("audiosamplesize") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio sample size"), - }; - - assert_eq!(audio_sample_size, &16.0); - - // Should have a audio sample rate property - let audio_sample_rate = match map.get("audiosamplerate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio sample rate"), - }; - - assert_eq!(audio_sample_rate, &48000.0); - - // Should have a stereo property - let stereo = match map.get("stereo") { - Some(scuffle_amf0::Amf0Value::Boolean(boolean)) => boolean, - _ => panic!("expected stereo"), - }; - - assert_eq!(stereo, &true); - - // Should have an audio codec id property - let audio_codec_id = match map.get("audiocodecid") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio codec id"), - }; - - assert_eq!(audio_codec_id, &10.0); // AAC - - // Should have a video codec id property - let video_codec_id = match map.get("videocodecid") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected video codec id"), - }; - - assert_eq!(video_codec_id, &7.0); // AVC - - // Should have a duration property - let duration = match map.get("duration") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected duration"), - }; - - assert_eq!(duration, &1.088); // 1.088 seconds - - // Should have a width property - let width = match map.get("width") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected width"), - }; - - assert_eq!(width, &3840.0); - - // Should have a height property - let height = match map.get("height") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected height"), - }; - - assert_eq!(height, &2160.0); - - // Should have a framerate property - let framerate = match map.get("framerate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected framerate"), - }; - - assert_eq!(framerate, &60.0); - - // Should have a videodatarate property - match map.get("videodatarate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected videodatarate"), - }; - - // Should have a audiodatarate property - match map.get("audiodatarate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audiodatarate"), - }; - - // Should have a minor version property - let minor_version = match map.get("minor_version") { - Some(scuffle_amf0::Amf0Value::String(number)) => number, - _ => panic!("expected minor version"), - }; - - assert_eq!(minor_version, "512"); - - // Should have a major brand property - let major_brand = match map.get("major_brand") { - Some(scuffle_amf0::Amf0Value::String(string)) => string, - _ => panic!("expected major brand"), - }; - - assert_eq!(major_brand, "iso5"); - - // Should have a compatible_brands property - let compatible_brands = match map.get("compatible_brands") { - Some(scuffle_amf0::Amf0Value::String(string)) => string, - _ => panic!("expected compatible brands"), - }; - - assert_eq!(compatible_brands, "iso5iso6mp41"); - } - - // Video Sequence Header Tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - // This is a video tag - let (frame_type, video_data) = match tag.data { - FlvTagData::Video { frame_type, data } => (frame_type, data), - _ => panic!("expected video data"), - }; - - assert_eq!(frame_type, FrameType::Keyframe); - - // Video data should be an AVC sequence header - let avc_decoder_configuration_record = match video_data { - FlvTagVideoData::Avc(AvcPacket::SequenceHeader(data)) => data, - _ => panic!("expected avc sequence header"), - }; - - // The avc sequence header should be able to be decoded into an avc decoder - // configuration record - assert_eq!(avc_decoder_configuration_record.profile_indication, 100); - assert_eq!(avc_decoder_configuration_record.profile_compatibility, 0); - assert_eq!(avc_decoder_configuration_record.level_indication, 51); // 5.1 - assert_eq!(avc_decoder_configuration_record.length_size_minus_one, 3); - assert_eq!(avc_decoder_configuration_record.sps.len(), 1); - assert_eq!(avc_decoder_configuration_record.pps.len(), 1); - assert_eq!(avc_decoder_configuration_record.extended_config, None); - - let sps = &avc_decoder_configuration_record.sps[0]; - // SPS should be able to be decoded into a sequence parameter set - let sps = Sps::parse(sps.clone()).expect("expected sequence parameter set"); - - assert_eq!(sps.profile_idc, 100); - assert_eq!(sps.level_idc, 51); - assert_eq!(sps.width, 3840); - assert_eq!(sps.height, 2160); - assert_eq!(sps.frame_rate, 60.0); - - assert_eq!( - sps.ext, - Some(SpsExtended { - chroma_format_idc: 1, - bit_depth_luma_minus8: 0, - bit_depth_chroma_minus8: 0, - }) - ) - } - - // Audio Sequence Header Tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - let (data, sound_rate, sound_size, sound_type) = match tag.data { - FlvTagData::Audio { - data, - sound_rate, - sound_size, - sound_type, - } => (data, sound_rate, sound_size, sound_type), - _ => panic!("expected audio data"), - }; - - assert_eq!(sound_rate, SoundRate::Hz44000); - assert_eq!(sound_size, SoundSize::Bit16); - assert_eq!(sound_type, SoundType::Stereo); - - // Audio data should be an AAC sequence header - let data = match data { - FlvTagAudioData::Aac(AacPacket::SequenceHeader(data)) => data, - _ => panic!("expected aac sequence header"), - }; - - // The aac sequence header should be able to be decoded into an aac decoder - // configuration record - let aac_decoder_configuration_record = - PartialAudioSpecificConfig::parse(&data).expect("expected aac decoder configuration record"); - - assert_eq!( - aac_decoder_configuration_record.audio_object_type, - AudioObjectType::AacLowComplexity - ); - assert_eq!(aac_decoder_configuration_record.sampling_frequency, 48000); - assert_eq!(aac_decoder_configuration_record.channel_configuration, 2); - } - - // Rest of the tags should be video / audio data - let mut last_timestamp = 0; - let mut read_seq_end = false; - for tag in tags { - assert!(tag.timestamp >= last_timestamp); - assert_eq!(tag.stream_id, 0); - - last_timestamp = tag.timestamp; - - match tag.data { - FlvTagData::Audio { - data, - sound_rate, - sound_size, - sound_type, - } => { - assert_eq!(sound_rate, SoundRate::Hz44000); - assert_eq!(sound_size, SoundSize::Bit16); - assert_eq!(sound_type, SoundType::Stereo); - match data { - FlvTagAudioData::Aac(AacPacket::Raw(data)) => data, - _ => panic!("expected aac raw packet"), - }; - } - FlvTagData::Video { frame_type, data } => { - match frame_type { - FrameType::Keyframe => (), - FrameType::Interframe => (), - _ => panic!("expected keyframe or interframe"), - } - - match data { - FlvTagVideoData::Avc(AvcPacket::Nalu { .. }) => assert!(!read_seq_end), - FlvTagVideoData::Avc(AvcPacket::EndOfSequence) => { - assert!(!read_seq_end); - read_seq_end = true; - } - _ => panic!("expected avc nalu packet: {:?}", data), - }; - } - _ => panic!("expected audio data"), - }; - } - - assert!(read_seq_end); -} - -#[test] -fn test_demux_flv_av1_aac() { - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../assets"); - - let data = Bytes::from(std::fs::read(dir.join("av1_aac.flv")).expect("failed to read file")); - let mut reader = io::Cursor::new(data); - - let flv = Flv::demux(&mut reader).expect("failed to demux flv"); - - assert_eq!(flv.header.version, 1); - assert!(flv.header.has_audio); - assert!(flv.header.has_video); - assert_eq!(flv.header.data_offset, 9); - assert_eq!(flv.header.extra.len(), 0); - - let mut tags = flv.tags.into_iter(); - - // Metadata tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - // This is a metadata tag - let script_data = match tag.data { - FlvTagData::ScriptData { name, data } => { - assert_eq!(name, "onMetaData"); - data - } - _ => panic!("expected script data"), - }; - - // Script data should be an AMF0 object - let object = match &script_data[0] { - scuffle_amf0::Amf0Value::Object(object) => object, - _ => panic!("expected object"), - }; - - let map = object.iter().map(|(k, v)| (k.as_ref(), v)).collect::>(); - - // Should have a audio sample size property - let audio_sample_size = match map.get("audiosamplesize") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio sample size"), - }; - - assert_eq!(audio_sample_size, &16.0); - - // Should have a audio sample rate property - let audio_sample_rate = match map.get("audiosamplerate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio sample rate"), - }; - - assert_eq!(audio_sample_rate, &48000.0); - - // Should have a stereo property - let stereo = match map.get("stereo") { - Some(scuffle_amf0::Amf0Value::Boolean(boolean)) => boolean, - _ => panic!("expected stereo"), - }; - - assert_eq!(stereo, &true); - - // Should have an audio codec id property - let audio_codec_id = match map.get("audiocodecid") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio codec id"), - }; - - assert_eq!(audio_codec_id, &10.0); // AAC - - // Should have a video codec id property - let video_codec_id = match map.get("videocodecid") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected video codec id"), - }; - - assert_eq!(video_codec_id, &7.0); // AVC - - // Should have a duration property - let duration = match map.get("duration") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected duration"), - }; - - assert_eq!(duration, &0.0); // 0 seconds (this was a live stream) - - // Should have a width property - let width = match map.get("width") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected width"), - }; - - assert_eq!(width, &2560.0); - - // Should have a height property - let height = match map.get("height") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected height"), - }; - - assert_eq!(height, &1440.0); - - // Should have a framerate property - let framerate = match map.get("framerate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected framerate"), - }; - - assert_eq!(framerate, &144.0); - - // Should have a videodatarate property - match map.get("videodatarate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected videodatarate"), - }; - - // Should have a audiodatarate property - match map.get("audiodatarate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audiodatarate"), - }; - } - - // Audio Sequence Header Tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - let (data, sound_rate, sound_size, sound_type) = match tag.data { - FlvTagData::Audio { - data, - sound_rate, - sound_size, - sound_type, - } => (data, sound_rate, sound_size, sound_type), - _ => panic!("expected audio data"), - }; - - assert_eq!(sound_rate, SoundRate::Hz44000); - assert_eq!(sound_size, SoundSize::Bit16); - assert_eq!(sound_type, SoundType::Stereo); - - // Audio data should be an AAC sequence header - let data = match data { - FlvTagAudioData::Aac(AacPacket::SequenceHeader(data)) => data, - _ => panic!("expected aac sequence header"), - }; - - // The aac sequence header should be able to be decoded into an aac decoder - // configuration record - let aac_decoder_configuration_record = - PartialAudioSpecificConfig::parse(&data).expect("expected aac decoder configuration record"); - - assert_eq!( - aac_decoder_configuration_record.audio_object_type, - AudioObjectType::AacLowComplexity - ); - assert_eq!(aac_decoder_configuration_record.sampling_frequency, 48000); - assert_eq!(aac_decoder_configuration_record.channel_configuration, 2); - } - - // Video Sequence Header Tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - // This is a video tag - let (frame_type, video_data) = match tag.data { - FlvTagData::Video { frame_type, data } => (frame_type, data), - _ => panic!("expected video data"), - }; - - assert_eq!(frame_type, FrameType::Keyframe); - - // Video data should be an AVC sequence header - let config = match video_data { - FlvTagVideoData::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart(config))) => config, - _ => panic!("expected av1 sequence header found {:?}", video_data), - }; - - assert_eq!(config.chroma_sample_position, 0); - assert!(config.chroma_subsampling_x); // 5.1 - assert!(config.chroma_subsampling_y); - assert!(!config.high_bitdepth); - assert!(!config.twelve_bit); - - let mut cursor = std::io::Cursor::new(config.config_obu.clone()); - let header = ObuHeader::parse(&mut cursor).expect("expected obu header"); - - let data = cursor - .extract_bytes(header.size.unwrap_or(cursor.remaining() as u64) as usize) - .expect("expected data"); - - let seq_obu = SequenceHeaderObu::parse(header, &mut std::io::Cursor::new(data)).expect("expected sequence obu"); - - assert_eq!(seq_obu.max_frame_height, 1440); - assert_eq!(seq_obu.max_frame_width, 2560); - } - - // Rest of the tags should be video / audio data - let mut last_timestamp = 0; - let mut read_seq_end = false; - for tag in tags { - assert!(tag.timestamp >= last_timestamp || tag.timestamp == 0); // Timestamps should be monotonically increasing or 0 - assert_eq!(tag.stream_id, 0); - - if tag.timestamp != 0 { - last_timestamp = tag.timestamp; - } - - match tag.data { - FlvTagData::Audio { - data, - sound_rate, - sound_size, - sound_type, - } => { - assert_eq!(sound_rate, SoundRate::Hz44000); - assert_eq!(sound_size, SoundSize::Bit16); - assert_eq!(sound_type, SoundType::Stereo); - match data { - FlvTagAudioData::Aac(AacPacket::Raw(data)) => data, - _ => panic!("expected aac raw packet"), - }; - } - FlvTagData::Video { frame_type, data } => { - match frame_type { - FrameType::Keyframe => (), - FrameType::Interframe => (), - _ => panic!("expected keyframe or interframe"), - } - - match data { - FlvTagVideoData::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw(_))) => { - assert!(!read_seq_end) - } - FlvTagVideoData::Enhanced(EnhancedPacket::SequenceEnd) => { - assert!(!read_seq_end); - read_seq_end = true; - } - _ => panic!("expected av1 raw packet: {:?}", data), - }; - } - _ => panic!("expected audio data"), - }; - } - - assert!(read_seq_end); -} - -#[test] -fn test_demux_flv_hevc_aac() { - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../assets"); - - let data = Bytes::from(std::fs::read(dir.join("hevc_aac.flv")).expect("failed to read file")); - let mut reader = io::Cursor::new(data); - - let flv = Flv::demux(&mut reader).expect("failed to demux flv"); - - assert_eq!(flv.header.version, 1); - assert!(flv.header.has_audio); - assert!(flv.header.has_video); - assert_eq!(flv.header.data_offset, 9); - assert_eq!(flv.header.extra.len(), 0); - - let mut tags = flv.tags.into_iter(); - - // Metadata tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - // This is a metadata tag - let script_data = match tag.data { - FlvTagData::ScriptData { name, data } => { - assert_eq!(name, "onMetaData"); - data - } - _ => panic!("expected script data"), - }; - - // Script data should be an AMF0 object - let object = match &script_data[0] { - scuffle_amf0::Amf0Value::Object(object) => object, - _ => panic!("expected object"), - }; - - let map = object.iter().map(|(k, v)| (k.as_ref(), v)).collect::>(); - - // Should have a audio sample size property - let audio_sample_size = match map.get("audiosamplesize") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio sample size"), - }; - - assert_eq!(audio_sample_size, &16.0); - - // Should have a audio sample rate property - let audio_sample_rate = match map.get("audiosamplerate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio sample rate"), - }; - - assert_eq!(audio_sample_rate, &48000.0); - - // Should have a stereo property - let stereo = match map.get("stereo") { - Some(scuffle_amf0::Amf0Value::Boolean(boolean)) => boolean, - _ => panic!("expected stereo"), - }; - - assert_eq!(stereo, &true); - - // Should have an audio codec id property - let audio_codec_id = match map.get("audiocodecid") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audio codec id"), - }; - - assert_eq!(audio_codec_id, &10.0); // AAC - - // Should have a video codec id property - let video_codec_id = match map.get("videocodecid") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected video codec id"), - }; - - assert_eq!(video_codec_id, &7.0); // AVC - - // Should have a duration property - let duration = match map.get("duration") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected duration"), - }; - - assert_eq!(duration, &0.0); // 0 seconds (this was a live stream) - - // Should have a width property - let width = match map.get("width") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected width"), - }; - - assert_eq!(width, &2560.0); - - // Should have a height property - let height = match map.get("height") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected height"), - }; - - assert_eq!(height, &1440.0); - - // Should have a framerate property - let framerate = match map.get("framerate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected framerate"), - }; - - assert_eq!(framerate, &144.0); - - // Should have a videodatarate property - match map.get("videodatarate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected videodatarate"), - }; - - // Should have a audiodatarate property - match map.get("audiodatarate") { - Some(scuffle_amf0::Amf0Value::Number(number)) => number, - _ => panic!("expected audiodatarate"), - }; - } - - // Audio Sequence Header Tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - let (data, sound_rate, sound_size, sound_type) = match tag.data { - FlvTagData::Audio { - data, - sound_rate, - sound_size, - sound_type, - } => (data, sound_rate, sound_size, sound_type), - _ => panic!("expected audio data"), - }; - - assert_eq!(sound_rate, SoundRate::Hz44000); - assert_eq!(sound_size, SoundSize::Bit16); - assert_eq!(sound_type, SoundType::Stereo); - - // Audio data should be an AAC sequence header - let data = match data { - FlvTagAudioData::Aac(AacPacket::SequenceHeader(data)) => data, - _ => panic!("expected aac sequence header"), - }; - - // The aac sequence header should be able to be decoded into an aac decoder - // configuration record - let aac_decoder_configuration_record = - PartialAudioSpecificConfig::parse(&data).expect("expected aac decoder configuration record"); - - assert_eq!( - aac_decoder_configuration_record.audio_object_type, - AudioObjectType::AacLowComplexity - ); - assert_eq!(aac_decoder_configuration_record.sampling_frequency, 48000); - assert_eq!(aac_decoder_configuration_record.channel_configuration, 2); - } - - // Video Sequence Header Tag - { - let tag = tags.next().expect("expected tag"); - assert_eq!(tag.timestamp, 0); - assert_eq!(tag.stream_id, 0); - - // This is a video tag - let (frame_type, video_data) = match tag.data { - FlvTagData::Video { frame_type, data } => (frame_type, data), - _ => panic!("expected video data"), - }; - - assert_eq!(frame_type, FrameType::Keyframe); - - // Video data should be an AVC sequence header - let config = match video_data { - FlvTagVideoData::Enhanced(EnhancedPacket::Hevc(HevcPacket::SequenceStart(config))) => config, - _ => panic!("expected hevc sequence header found {:?}", video_data), - }; - - assert_eq!(config.configuration_version, 1); - assert_eq!(config.avg_frame_rate, 0); - assert_eq!(config.constant_frame_rate, 0); - assert_eq!(config.num_temporal_layers, 1); - - // We should be able to find a SPS NAL unit in the sequence header - let Some(sps) = config - .arrays - .iter() - .find(|a| a.nal_unit_type == h265::NaluType::Sps) - .and_then(|v| v.nalus.first()) - else { - panic!("expected sps"); - }; - - // We should be able to find a PPS NAL unit in the sequence header - let Some(_) = config - .arrays - .iter() - .find(|a| a.nal_unit_type == h265::NaluType::Pps) - .and_then(|v| v.nalus.first()) - else { - panic!("expected pps"); - }; - - // We should be able to decode the SPS NAL unit - let sps = h265::Sps::parse(sps.clone()).expect("expected sps"); - - assert_eq!(sps.frame_rate, 144.0); - assert_eq!(sps.width, 2560); - assert_eq!(sps.height, 1440); - assert_eq!( - sps.color_config, - Some(h265::ColorConfig { - full_range: false, - color_primaries: 1, - transfer_characteristics: 1, - matrix_coefficients: 1, - }) - ) - } - - // Rest of the tags should be video / audio data - let mut last_timestamp = 0; - let mut read_seq_end = false; - for tag in tags { - assert!(tag.timestamp >= last_timestamp || tag.timestamp == 0); // Timestamps should be monotonically increasing or 0 - assert_eq!(tag.stream_id, 0); - - if tag.timestamp != 0 { - last_timestamp = tag.timestamp; - } - - match tag.data { - FlvTagData::Audio { - data, - sound_rate, - sound_size, - sound_type, - } => { - assert_eq!(sound_rate, SoundRate::Hz44000); - assert_eq!(sound_size, SoundSize::Bit16); - assert_eq!(sound_type, SoundType::Stereo); - match data { - FlvTagAudioData::Aac(AacPacket::Raw(data)) => data, - _ => panic!("expected aac raw packet"), - }; - } - FlvTagData::Video { frame_type, data } => { - match frame_type { - FrameType::Keyframe => (), - FrameType::Interframe => (), - _ => panic!("expected keyframe or interframe"), - } - - match data { - FlvTagVideoData::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { .. })) => assert!(!read_seq_end), - FlvTagVideoData::Enhanced(EnhancedPacket::SequenceEnd) => { - assert!(!read_seq_end); - read_seq_end = true; - } - _ => panic!("expected hevc nalu packet: {:?}", data), - }; - } - _ => panic!("expected audio data"), - }; - } - - assert!(read_seq_end); -} diff --git a/crates/flv/src/tests/error.rs b/crates/flv/src/tests/error.rs deleted file mode 100644 index 04e4be5f9..000000000 --- a/crates/flv/src/tests/error.rs +++ /dev/null @@ -1,31 +0,0 @@ -use crate::FlvDemuxerError; - -#[test] -fn test_error_display() { - let error = FlvDemuxerError::InvalidFrameType(0); - assert_eq!(error.to_string(), "invalid frame type: 0"); - - let error = FlvDemuxerError::IO(std::io::Error::new(std::io::ErrorKind::Other, "test")); - assert_eq!(error.to_string(), "io error: test"); - - let error = FlvDemuxerError::Amf0Read(scuffle_amf0::Amf0ReadError::UnknownMarker(0)); - assert_eq!(error.to_string(), "amf0 read error: unknown marker: 0"); - - let error = FlvDemuxerError::InvalidFlvHeader; - assert_eq!(error.to_string(), "invalid flv header"); - - let error = FlvDemuxerError::InvalidScriptDataName; - assert_eq!(error.to_string(), "invalid script data name"); - - let error = FlvDemuxerError::InvalidEnhancedPacketType(0); - assert_eq!(error.to_string(), "invalid enhanced packet type: 0"); - - let error = FlvDemuxerError::InvalidSoundRate(0); - assert_eq!(error.to_string(), "invalid sound rate: 0"); - - let error = FlvDemuxerError::InvalidSoundSize(0); - assert_eq!(error.to_string(), "invalid sound size: 0"); - - let error = FlvDemuxerError::InvalidSoundType(0); - assert_eq!(error.to_string(), "invalid sound type: 0"); -} diff --git a/crates/flv/src/tests/mod.rs b/crates/flv/src/tests/mod.rs deleted file mode 100644 index 9d93f26c1..000000000 --- a/crates/flv/src/tests/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod demuxer; -mod error; diff --git a/crates/flv/src/video.rs b/crates/flv/src/video.rs new file mode 100644 index 000000000..bd09da6b6 --- /dev/null +++ b/crates/flv/src/video.rs @@ -0,0 +1,263 @@ +use std::io::{self, Read}; + +use scuffle_av1::{AV1CodecConfigurationRecord, AV1VideoDescriptor}; +use byteorder::{BigEndian, ReadBytesExt}; +use bytes::Bytes; +use h265::HEVCDecoderConfigurationRecord; +use scuffle_bytes_util::BytesCursorExt; + +use super::av1::Av1Packet; +use super::avc::{AvcPacket, AvcPacketType}; +use super::hevc::HevcPacket; +use crate::macros::nutype_enum; + +nutype_enum! { + /// FLV Frame Type + /// This enum represents the different types of frames in a FLV file. + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) + pub enum FrameType(u8) { + /// A keyframe is a frame that is a complete representation of the video content. + Keyframe = 1, + /// An interframe is a frame that is a partial representation of the video content. + Interframe = 2, + /// A disposable interframe is a frame that is a partial representation of the video content, but is not required to be displayed. (h263 only) + DisposableInterframe = 3, + /// A generated keyframe is a frame that is a complete representation of the video content, but is not a keyframe. (reserved for server use only) + GeneratedKeyframe = 4, + /// A video info or command frame is a frame that contains video information or commands. + /// If the frame is this type, the body will be a CommandPacket + VideoInfoOrCommandFrame = 5, + } +} + +/// FLV Tag Video Data +/// This is a container for video data. +/// This enum contains the data for the different types of video tags. +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) +#[derive(Debug, Clone, PartialEq)] +pub struct VideoData { + /// The frame type of the video data. (4 bits) + pub frame_type: FrameType, + /// The body of the video data. + pub body: VideoDataBody, +} + +impl VideoData { + pub fn demux(reader: &mut io::Cursor) -> io::Result { + let byte = reader.read_u8()?; + let enhanced = (byte & 0b1000_0000) != 0; + let frame_type_byte = (byte >> 4) & 0b0111; + let packet_type_byte = byte & 0b0000_1111; + let frame_type = FrameType::from(frame_type_byte); + let body = if frame_type == FrameType::VideoInfoOrCommandFrame { + let command_packet = CommandPacket::from(reader.read_u8()?); + VideoDataBody::Command(command_packet) + } else { + VideoDataBody::demux(VideoPacketType::new(packet_type_byte, enhanced), reader)? + }; + + Ok(VideoData { frame_type, body }) + } +} + +nutype_enum! { + /// FLV Video Codec ID + /// + /// Denotes the different types of video codecs that can be used in a FLV file. + /// This is a legacy enum for older codecs; for modern codecs, the [`EnhancedPacketType`] is used which uses a [`VideoFourCC`] identifier. + /// + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) + pub enum VideoCodecId(u8) { + /// Sorenson H.263 + SorensonH263 = 2, + /// Screen Video + ScreenVideo = 3, + /// On2 VP6 + On2VP6 = 4, + /// On2 VP6 with alpha channel + On2VP6WithAlphaChannel = 5, + /// Screen Video Version 2 + ScreenVideoVersion2 = 6, + /// AVC (H.264) + Avc = 7, + } +} + +/// FLV Tag Video Data Body +/// +/// This is a container for video data. +/// This enum contains the data for the different types of video tags. +/// +/// Defined by: +/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) +/// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) +#[derive(Debug, Clone, PartialEq)] +pub enum VideoDataBody { + /// AVC Video Packet (H.264) + /// When [`VideoPacketType::CodecId`] is [`VideoCodecId::Avc`] + Avc(AvcPacket), + /// Enhanced Packet (AV1, H.265, etc.) + /// When [`VideoPacketType::Enhanced`] is used + Enhanced(EnhancedPacket), + /// Command Frame (VideoInfo or Command) + /// When [`FrameType::VideoInfoOrCommandFrame`] is used + Command(CommandPacket), + /// Data we don't know how to parse + Unknown { codec_id: VideoCodecId, data: Bytes }, +} + +nutype_enum! { + /// FLV Command Packet + /// Defined by: + /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags) + /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA) + pub enum CommandPacket(u8) { + /// Start of client seeking, when FrameType is 5 + StartOfClientSeeking = 1, + /// End of client seeking, when FrameType is 5 + EndOfClientSeeking = 2, + } +} + +/// A wrapper enum for the different types of video packets that can be used in a FLV file. +/// +/// Used to construct a [`VideoDataBody`]. +/// +/// See: +/// - [`VideoCodecId`] +/// - [`EnhancedPacketType`] +/// - [`VideoDataBody`] +#[derive(Debug, Clone, PartialEq, Copy, Eq, PartialOrd, Ord, Hash)] +pub enum VideoPacketType { + /// Codec ID (legacy) + CodecId(VideoCodecId), + /// Enhanced (modern) + Enhanced(EnhancedPacketType), +} + +impl VideoPacketType { + pub fn new(byte: u8, enhanced: bool) -> Self { + if enhanced { + Self::Enhanced(EnhancedPacketType::from(byte)) + } else { + Self::CodecId(VideoCodecId::from(byte)) + } + } +} + +impl VideoDataBody { + /// Demux a video packet from the given reader. + /// The reader will consume all the data from the reader. + pub fn demux(packet_type: VideoPacketType, reader: &mut io::Cursor) -> io::Result { + match packet_type { + VideoPacketType::CodecId(codec_id) => match codec_id { + VideoCodecId::Avc => { + let avc_packet_type = AvcPacketType::from(reader.read_u8()?); + Ok(VideoDataBody::Avc(AvcPacket::demux(avc_packet_type, reader)?)) + } + _ => Ok(VideoDataBody::Unknown { + codec_id, + data: reader.extract_remaining(), + }), + }, + VideoPacketType::Enhanced(packet_type) => { + let mut video_codec = [0; 4]; + reader.read_exact(&mut video_codec)?; + let video_codec = VideoFourCC::from(video_codec); + + match packet_type { + EnhancedPacketType::SequenceEnd => return Ok(VideoDataBody::Enhanced(EnhancedPacket::SequenceEnd)), + EnhancedPacketType::Metadata => { + return Ok(VideoDataBody::Enhanced(EnhancedPacket::Metadata(reader.extract_remaining()))) + } + _ => {} + } + + match (video_codec, packet_type) { + (VideoFourCC::Av1, EnhancedPacketType::SequenceStart) => Ok(VideoDataBody::Enhanced( + EnhancedPacket::Av1(Av1Packet::SequenceStart(AV1CodecConfigurationRecord::demux(reader)?)), + )), + (VideoFourCC::Av1, EnhancedPacketType::Mpeg2SequenceStart) => { + Ok(VideoDataBody::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart( + AV1VideoDescriptor::demux(reader)?.codec_configuration_record, + )))) + } + (VideoFourCC::Av1, EnhancedPacketType::CodedFrames) => Ok(VideoDataBody::Enhanced(EnhancedPacket::Av1( + Av1Packet::Raw(reader.extract_remaining()), + ))), + (VideoFourCC::Hevc, EnhancedPacketType::SequenceStart) => Ok(VideoDataBody::Enhanced( + EnhancedPacket::Hevc(HevcPacket::SequenceStart(HEVCDecoderConfigurationRecord::demux(reader)?)), + )), + (VideoFourCC::Hevc, EnhancedPacketType::CodedFrames) => { + Ok(VideoDataBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { + composition_time: Some(reader.read_i24::()?), + data: reader.extract_remaining(), + }))) + } + (VideoFourCC::Hevc, EnhancedPacketType::CodedFramesX) => { + Ok(VideoDataBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { + composition_time: None, + data: reader.extract_remaining(), + }))) + } + _ => Ok(VideoDataBody::Enhanced(EnhancedPacket::Unknown { + packet_type, + video_codec, + data: reader.extract_remaining(), + })), + } + } + } + } +} + +/// An Enhanced FLV Packet +/// +/// This is a container for enhanced video packets. +/// The enchanced spec adds modern codecs to the FLV file format. +/// +/// Defined by: +/// - enhanced_rtmp-v1.pdf (Defining Additional Video Codecs) +/// - enhanced_rtmp-v2.pdf (Enhanced Video) +#[derive(Debug, Clone, PartialEq)] +pub enum EnhancedPacket { + /// Metadata + Metadata(Bytes), + /// Sequence End + SequenceEnd, + /// Av1 Video Packet + Av1(Av1Packet), + /// Hevc (H.265) Video Packet + Hevc(HevcPacket), + /// We don't know how to parse it + Unknown { + packet_type: EnhancedPacketType, + video_codec: VideoFourCC, + data: Bytes, + }, +} + +nutype_enum! { + pub enum VideoFourCC([u8; 4]) { + Av1 = *b"av01", + Vp9 = *b"vp09", + Hevc = *b"hvc1", + } +} + +nutype_enum! { + pub enum EnhancedPacketType(u8) { + SequenceStart = 0, + CodedFrames = 1, + SequenceEnd = 2, + CodedFramesX = 3, + Metadata = 4, + Mpeg2SequenceStart = 5, + } +} diff --git a/crates/mp4/src/tests/demux.rs b/crates/mp4/src/tests/demux.rs index b4f9efb86..53161d41b 100644 --- a/crates/mp4/src/tests/demux.rs +++ b/crates/mp4/src/tests/demux.rs @@ -1211,6 +1211,7 @@ fn test_demux_av1_aac() { chroma_subsampling_y: true, chroma_sample_position: 1, initial_presentation_delay_minus_one: None, + hdr_wcg_idc: 0, config_obu: b"\n\x0e\0\0\0$O\x7fS\0\xbe\x04\x04\x04\x04\x90".to_vec().into(), }, }, diff --git a/crates/transmuxer/Cargo.toml b/crates/transmuxer/Cargo.toml index 444634ab6..6a96851a3 100644 --- a/crates/transmuxer/Cargo.toml +++ b/crates/transmuxer/Cargo.toml @@ -10,11 +10,11 @@ bytes = "1.5" h264 = { path = "../h264" } h265 = { path = "../h265" } -scuffle-av1.workspace = true scuffle-aac = { path = "../aac" } -scuffle-amf0.workspace = true -flv = { path = "../flv" } mp4 = { path = "../mp4" } +scuffle-av1.workspace = true +scuffle-flv.workspace = true +scuffle-amf0.workspace = true scuffle-bytes-util.workspace = true scuffle-workspace-hack.workspace = true diff --git a/crates/transmuxer/src/codecs/aac.rs b/crates/transmuxer/src/codecs/aac.rs index 0ea8ba797..a2d576599 100644 --- a/crates/transmuxer/src/codecs/aac.rs +++ b/crates/transmuxer/src/codecs/aac.rs @@ -1,5 +1,4 @@ use bytes::Bytes; -use flv::{SoundSize, SoundType}; use mp4::types::esds::descriptor::header::DescriptorHeader; use mp4::types::esds::descriptor::traits::DescriptorType; use mp4::types::esds::descriptor::types::decoder_config::DecoderConfigDescriptor; @@ -11,6 +10,7 @@ use mp4::types::stsd::{AudioSampleEntry, SampleEntry}; use mp4::types::trun::{TrunSample, TrunSampleFlag}; use mp4::DynBox; use scuffle_aac::PartialAudioSpecificConfig; +use scuffle_flv::audio::{SoundSize, SoundType}; use crate::TransmuxError; @@ -27,10 +27,12 @@ pub fn stsd_entry( match sound_type { SoundType::Mono => 1, SoundType::Stereo => 2, + _ => return Err(TransmuxError::InvalidAudioChannels), }, match sound_size { SoundSize::Bit8 => 8, SoundSize::Bit16 => 16, + _ => return Err(TransmuxError::InvalidAudioSampleSize), }, aac_config.sampling_frequency, )), diff --git a/crates/transmuxer/src/codecs/av1.rs b/crates/transmuxer/src/codecs/av1.rs index a7b9f85fa..92c0457bb 100644 --- a/crates/transmuxer/src/codecs/av1.rs +++ b/crates/transmuxer/src/codecs/av1.rs @@ -1,5 +1,4 @@ use bytes::{Buf, Bytes}; -use flv::FrameType; use mp4::types::av01::Av01; use mp4::types::av1c::Av1C; use mp4::types::colr::{ColorType, Colr}; @@ -9,6 +8,7 @@ use mp4::DynBox; use scuffle_av1::seq::SequenceHeaderObu; use scuffle_av1::{AV1CodecConfigurationRecord, ObuHeader, ObuType}; use scuffle_bytes_util::BytesCursorExt; +use scuffle_flv::video::FrameType; use crate::TransmuxError; diff --git a/crates/transmuxer/src/codecs/avc.rs b/crates/transmuxer/src/codecs/avc.rs index 9f53a328a..76f026ddf 100644 --- a/crates/transmuxer/src/codecs/avc.rs +++ b/crates/transmuxer/src/codecs/avc.rs @@ -1,5 +1,4 @@ use bytes::Bytes; -use flv::FrameType; use h264::{AVCDecoderConfigurationRecord, Sps}; use mp4::types::avc1::Avc1; use mp4::types::avcc::AvcC; @@ -7,6 +6,7 @@ use mp4::types::colr::{ColorType, Colr}; use mp4::types::stsd::{SampleEntry, VisualSampleEntry}; use mp4::types::trun::{TrunSample, TrunSampleFlag}; use mp4::DynBox; +use scuffle_flv::video::FrameType; use crate::TransmuxError; diff --git a/crates/transmuxer/src/codecs/hevc.rs b/crates/transmuxer/src/codecs/hevc.rs index 1ec817b88..f1b16fc4f 100644 --- a/crates/transmuxer/src/codecs/hevc.rs +++ b/crates/transmuxer/src/codecs/hevc.rs @@ -1,5 +1,4 @@ use bytes::Bytes; -use flv::FrameType; use h265::{HEVCDecoderConfigurationRecord, Sps}; use mp4::types::colr::{ColorType, Colr}; use mp4::types::hev1::Hev1; @@ -7,6 +6,7 @@ use mp4::types::hvcc::HvcC; use mp4::types::stsd::{SampleEntry, VisualSampleEntry}; use mp4::types::trun::{TrunSample, TrunSampleFlag}; use mp4::DynBox; +use scuffle_flv::video::FrameType; use crate::TransmuxError; diff --git a/crates/transmuxer/src/define.rs b/crates/transmuxer/src/define.rs index 7ab26471b..2bbda8bd1 100644 --- a/crates/transmuxer/src/define.rs +++ b/crates/transmuxer/src/define.rs @@ -1,9 +1,9 @@ use bytes::Bytes; -use flv::{SoundSize, SoundType}; use h264::AVCDecoderConfigurationRecord; use h265::HEVCDecoderConfigurationRecord; use mp4::codec::{AudioCodec, VideoCodec}; use scuffle_av1::AV1CodecConfigurationRecord; +use scuffle_flv::audio::{SoundSize, SoundType}; pub(crate) enum VideoSequenceHeader { Avc(AVCDecoderConfigurationRecord), diff --git a/crates/transmuxer/src/errors.rs b/crates/transmuxer/src/errors.rs index c0e727912..3fdb5315e 100644 --- a/crates/transmuxer/src/errors.rs +++ b/crates/transmuxer/src/errors.rs @@ -5,18 +5,13 @@ pub enum TransmuxError { InvalidVideoDimensions, InvalidVideoFrameRate, InvalidAudioSampleRate, + InvalidAudioChannels, + InvalidAudioSampleSize, InvalidHEVCDecoderConfigurationRecord, InvalidAv1DecoderConfigurationRecord, InvalidAVCDecoderConfigurationRecord, NoSequenceHeaders, IO(io::Error), - FlvDemuxer(flv::FlvDemuxerError), -} - -impl From for TransmuxError { - fn from(err: flv::FlvDemuxerError) -> Self { - Self::FlvDemuxer(err) - } } impl From for TransmuxError { @@ -31,6 +26,8 @@ impl std::fmt::Display for TransmuxError { Self::InvalidVideoDimensions => write!(f, "invalid video dimensions"), Self::InvalidVideoFrameRate => write!(f, "invalid video frame rate"), Self::InvalidAudioSampleRate => write!(f, "invalid audio sample rate"), + Self::InvalidAudioChannels => write!(f, "invalid audio channels"), + Self::InvalidAudioSampleSize => write!(f, "invalid audio sample size"), Self::InvalidHEVCDecoderConfigurationRecord => { write!(f, "invalid hevc decoder configuration record") } @@ -42,7 +39,6 @@ impl std::fmt::Display for TransmuxError { } Self::NoSequenceHeaders => write!(f, "no sequence headers"), Self::IO(err) => write!(f, "io error: {}", err), - Self::FlvDemuxer(err) => write!(f, "flv demuxer error: {}", err), } } } diff --git a/crates/transmuxer/src/lib.rs b/crates/transmuxer/src/lib.rs index f08ccaf85..c0dcbfcce 100644 --- a/crates/transmuxer/src/lib.rs +++ b/crates/transmuxer/src/lib.rs @@ -7,10 +7,6 @@ use std::io; use byteorder::{BigEndian, ReadBytesExt}; use bytes::{Buf, Bytes}; -use flv::{ - AacPacket, Av1Packet, AvcPacket, EnhancedPacket, FlvTag, FlvTagAudioData, FlvTagData, FlvTagVideoData, FrameType, - HevcPacket, SoundType, -}; use mp4::codec::{AudioCodec, VideoCodec}; use mp4::types::ftyp::{FourCC, Ftyp}; use mp4::types::hdlr::{HandlerType, Hdlr}; @@ -40,6 +36,14 @@ use mp4::types::trun::Trun; use mp4::types::vmhd::Vmhd; use mp4::BoxType; use scuffle_amf0::Amf0Value; +use scuffle_flv::aac::AacPacket; +use scuffle_flv::audio::{AudioData, AudioDataBody, SoundType}; +use scuffle_flv::av1::Av1Packet; +use scuffle_flv::avc::AvcPacket; +use scuffle_flv::hevc::HevcPacket; +use scuffle_flv::script::ScriptData; +use scuffle_flv::tag::{FlvTag, FlvTagData}; +use scuffle_flv::video::{EnhancedPacket, FrameType, VideoData, VideoDataBody}; mod codecs; mod define; @@ -94,7 +98,7 @@ impl Transmuxer { break; } - let tag = flv::FlvTag::demux(&mut cursor)?; + let tag = FlvTag::demux(&mut cursor)?; self.tags.push_back(tag); } @@ -143,7 +147,7 @@ impl Transmuxer { let mut is_keyframe = false; let duration = - if self.last_video_timestamp == 0 || tag.timestamp == 0 || tag.timestamp < self.last_video_timestamp { + if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp { 1000 // the first frame is always 1000 ticks where the // timescale is 1000 * fps. } else { @@ -156,7 +160,7 @@ impl Transmuxer { // The reason we use a timescale which is 1000 * fps is because then we can // always represent the delta as an integer. If we use a timescale of 1000, we // would run into the same rounding errors. - let delta = tag.timestamp as f64 - self.last_video_timestamp as f64; + let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64; let expected_delta = 1000.0 / video_settings.framerate; if (delta - expected_delta).abs() <= 1.0 { 1000 @@ -166,10 +170,10 @@ impl Transmuxer { }; match tag.data { - FlvTagData::Audio { - data: FlvTagAudioData::Aac(AacPacket::Raw(data)), + FlvTagData::Audio(AudioData { + body: AudioDataBody::Aac(AacPacket::Raw(data)), .. - } => { + }) => { let (sample, duration) = codecs::aac::trun_sample(&data)?; trun_sample = sample; @@ -177,10 +181,11 @@ impl Transmuxer { total_duration = duration; is_audio = true; } - FlvTagData::Video { + FlvTagData::Video(VideoData { frame_type, - data: FlvTagVideoData::Avc(AvcPacket::Nalu { composition_time, data }), - } => { + body: VideoDataBody::Avc(AvcPacket::Nalu { composition_time, data }), + .. + }) => { let composition_time = ((composition_time as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0; let sample = codecs::avc::trun_sample(frame_type, composition_time as u32, duration, &data)?; @@ -191,10 +196,11 @@ impl Transmuxer { is_keyframe = frame_type == FrameType::Keyframe; } - FlvTagData::Video { + FlvTagData::Video(VideoData { frame_type, - data: FlvTagVideoData::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw(data))), - } => { + body: VideoDataBody::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw(data))), + .. + }) => { let sample = codecs::av1::trun_sample(frame_type, duration, &data)?; trun_sample = sample; @@ -203,10 +209,11 @@ impl Transmuxer { is_keyframe = frame_type == FrameType::Keyframe; } - FlvTagData::Video { + FlvTagData::Video(VideoData { frame_type, - data: FlvTagVideoData::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { composition_time, data })), - } => { + body: VideoDataBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { composition_time, data })), + .. + }) => { let composition_time = ((composition_time.unwrap_or_default() as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0; @@ -278,7 +285,7 @@ impl Transmuxer { }))); } else { self.video_duration += total_duration as u64; - self.last_video_timestamp = tag.timestamp; + self.last_video_timestamp = tag.timestamp_ms; return Ok(Some(TransmuxResult::MediaSegment(MediaSegment { data: Bytes::from(writer), ty: MediaType::Video, @@ -302,37 +309,40 @@ impl Transmuxer { } match &tag.data { - FlvTagData::Video { + FlvTagData::Video(VideoData { frame_type: _, - data: FlvTagVideoData::Avc(AvcPacket::SequenceHeader(data)), - } => { + body: VideoDataBody::Avc(AvcPacket::SequenceHeader(data)), + .. + }) => { video_sequence_header = Some(VideoSequenceHeader::Avc(data.clone())); } - FlvTagData::Video { + FlvTagData::Video(VideoData { frame_type: _, - data: FlvTagVideoData::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart(config))), - } => { + body: VideoDataBody::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart(config))), + .. + }) => { video_sequence_header = Some(VideoSequenceHeader::Av1(config.clone())); } - FlvTagData::Video { + FlvTagData::Video(VideoData { frame_type: _, - data: FlvTagVideoData::Enhanced(EnhancedPacket::Hevc(HevcPacket::SequenceStart(config))), - } => { + body: VideoDataBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::SequenceStart(config))), + .. + }) => { video_sequence_header = Some(VideoSequenceHeader::Hevc(config.clone())); } - FlvTagData::Audio { + FlvTagData::Audio(AudioData { + body: AudioDataBody::Aac(AacPacket::SequenceHeader(data)), sound_size, sound_type, - sound_rate: _, - data: FlvTagAudioData::Aac(AacPacket::SequenceHeader(data)), - } => { + .. + }) => { audio_sequence_header = Some(AudioSequenceHeader { data: AudioSequenceHeaderData::Aac(data.clone()), sound_size: *sound_size, sound_type: *sound_type, }); } - FlvTagData::ScriptData { data, name } => { + FlvTagData::ScriptData(ScriptData { data, name }) => { if name == "@setDataFrame" || name == "onMetaData" { let meta_object = data.iter().find(|v| matches!(v, Amf0Value::Object(_))); @@ -492,6 +502,7 @@ impl Transmuxer { audio_channels = match audio_sequence_header.sound_type { SoundType::Mono => 1, SoundType::Stereo => 2, + _ => return Err(TransmuxError::InvalidAudioChannels), }; entry diff --git a/crates/transmuxer/src/tests/mod.rs b/crates/transmuxer/src/tests/mod.rs index f58da7b6a..d7cdee791 100644 --- a/crates/transmuxer/src/tests/mod.rs +++ b/crates/transmuxer/src/tests/mod.rs @@ -4,9 +4,9 @@ use std::io::{ use std::path::PathBuf; use std::process::{Command, Stdio}; -use flv::FlvHeader; use mp4::codec::{AudioCodec, VideoCodec}; use scuffle_aac::AudioObjectType; +use scuffle_flv::header::FlvHeader; use crate::define::{AudioSettings, VideoSettings}; use crate::{TransmuxResult, Transmuxer};