Auto merge of #250 - troy/flv, r=lennartkloock

flv crate refactor Refactors the FLV Crate adding support for the new enhanced spec and improving the usage and API docs. CLOUD-27 Requested-by: lennartkloock <39778085+lennartkloock@users.noreply.github.com> Reviewed-by: lennartkloock <39778085+lennartkloock@users.noreply.github.com>
ScuffleCloud · Jan 20, 2025 · 57d4018 · 57d4018
2 parents 494a60b + 9100b33
commit 57d4018
Showing 36 changed files with 2,515 additions and 1,581 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -63,6 +63,7 @@ scuffle-bytes-util = { path = "crates/bytes-util", version = "0.0.1" }
 scuffle-expgolomb = { path = "crates/expgolomb", version = "0.0.1" }
 scuffle-amf0 = { path = "crates/amf0", version = "0.0.1" }
 scuffle-av1 = { path = "crates/av1", version = "0.0.1" }
+scuffle-flv = { path = "crates/flv", version = "0.0.1" }
 
 [profile.release-debug]
 inherits = "release"

diff --git a/crates/av1/src/config.rs b/crates/av1/src/config.rs
@@ -1,8 +1,41 @@
 use std::io;
 
+use byteorder::ReadBytesExt;
 use bytes::Bytes;
 use scuffle_bytes_util::{BitReader, BitWriter, BytesCursorExt};
 
+/// AV1 Video Descriptor
+/// <https://aomediacodec.github.io/av1-mpeg2-ts/#av1-video-descriptor>
+#[derive(Debug, Clone, PartialEq)]
+pub struct AV1VideoDescriptor {
+    pub tag: u8,
+    pub length: u8,
+    pub codec_configuration_record: AV1CodecConfigurationRecord,
+}
+
+impl AV1VideoDescriptor {
+    pub fn demux(reader: &mut io::Cursor<Bytes>) -> io::Result<Self> {
+        let tag = reader.read_u8()?;
+        if tag != 0x80 {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid AV1 video descriptor tag"));
+        }
+
+        let length = reader.read_u8()?;
+        if length != 4 {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidData,
+                "Invalid AV1 video descriptor length",
+            ));
+        }
+
+        Ok(AV1VideoDescriptor {
+            tag,
+            length,
+            codec_configuration_record: AV1CodecConfigurationRecord::demux(reader)?,
+        })
+    }
+}
+
 #[derive(Debug, Clone, PartialEq)]
 /// AV1 Codec Configuration Record
 /// <https://aomediacodec.github.io/av1-isobmff/#av1codecconfigurationbox-syntax>
@@ -16,6 +49,7 @@ pub struct AV1CodecConfigurationRecord {
     pub chroma_subsampling_x: bool,
     pub chroma_subsampling_y: bool,
     pub chroma_sample_position: u8,
+    pub hdr_wcg_idc: u8,
     pub initial_presentation_delay_minus_one: Option<u8>,
     pub config_obu: Bytes,
 }
@@ -45,7 +79,13 @@ impl AV1CodecConfigurationRecord {
         let chroma_subsampling_y = bit_reader.read_bit()?;
         let chroma_sample_position = bit_reader.read_bits(2)? as u8;
 
-        bit_reader.seek_bits(3)?; // reserved 3 bits
+        // This is from the https://aomediacodec.github.io/av1-mpeg2-ts/#av1-video-descriptor spec
+        // The spec from https://aomediacodec.github.io/av1-isobmff/#av1codecconfigurationbox-section is old and contains 3 bits reserved
+        // The newer spec takes 2 of those reserved bits to represent the HDR WCG IDC
+        // Leaving 1 bit for future use
+        let hdr_wcg_idc = bit_reader.read_bits(2)? as u8;
+
+        bit_reader.seek_bits(1)?; // reserved 1 bits
 
         let initial_presentation_delay_minus_one = if bit_reader.read_bit()? {
             Some(bit_reader.read_bits(4)? as u8)
@@ -70,6 +110,7 @@ impl AV1CodecConfigurationRecord {
             chroma_subsampling_x,
             chroma_subsampling_y,
             chroma_sample_position,
+            hdr_wcg_idc,
             initial_presentation_delay_minus_one,
             config_obu: reader.extract_remaining(),
         })
@@ -139,6 +180,7 @@ mod tests {
             chroma_subsampling_x: true,
             chroma_subsampling_y: true,
             chroma_sample_position: 0,
+            hdr_wcg_idc: 0,
             initial_presentation_delay_minus_one: None,
             config_obu: b"\n\x0f\0\0\0j\xef\xbf\xe1\xbc\x02\x19\x90\x10\x10\x10@",
         }
@@ -182,6 +224,7 @@ mod tests {
             chroma_subsampling_x: true,
             chroma_subsampling_y: true,
             chroma_sample_position: 0,
+            hdr_wcg_idc: 0,
             initial_presentation_delay_minus_one: Some(
                 15,
             ),
@@ -202,6 +245,7 @@ mod tests {
             chroma_subsampling_x: false,
             chroma_subsampling_y: false,
             chroma_sample_position: 0,
+            hdr_wcg_idc: 0,
             initial_presentation_delay_minus_one: None,
             config_obu: Bytes::from_static(b"HELLO FROM THE OBU"),
         };
@@ -224,6 +268,7 @@ mod tests {
             chroma_subsampling_x: false,
             chroma_subsampling_y: false,
             chroma_sample_position: 0,
+            hdr_wcg_idc: 0,
             initial_presentation_delay_minus_one: Some(0),
             config_obu: Bytes::from_static(b"HELLO FROM THE OBU"),
         };
@@ -233,4 +278,54 @@ mod tests {
 
         insta::assert_snapshot!(format!("{:?}", Bytes::from(buf)), @r#"b"\x81\0\0\x10HELLO FROM THE OBU""#);
     }
+
+    #[test]
+    fn test_video_descriptor_demux() {
+        let data = b"\x80\x04\x81\r\x0c\x3f\n\x0f\0\0\0j\xef\xbf\xe1\xbc\x02\x19\x90\x10\x10\x10@".to_vec();
+
+        let config = AV1VideoDescriptor::demux(&mut io::Cursor::new(data.into())).unwrap();
+
+        insta::assert_debug_snapshot!(config, @r#"
+        AV1VideoDescriptor {
+            tag: 128,
+            length: 4,
+            codec_configuration_record: AV1CodecConfigurationRecord {
+                seq_profile: 0,
+                seq_level_idx_0: 13,
+                seq_tier_0: false,
+                high_bitdepth: false,
+                twelve_bit: false,
+                monochrome: false,
+                chroma_subsampling_x: true,
+                chroma_subsampling_y: true,
+                chroma_sample_position: 0,
+                hdr_wcg_idc: 0,
+                initial_presentation_delay_minus_one: Some(
+                    15,
+                ),
+                config_obu: b"\n\x0f\0\0\0j\xef\xbf\xe1\xbc\x02\x19\x90\x10\x10\x10@",
+            },
+        }
+        "#);
+    }
+
+    #[test]
+    fn test_video_descriptor_demux_invalid_tag() {
+        let data = b"\x81".to_vec();
+
+        let err = AV1VideoDescriptor::demux(&mut io::Cursor::new(data.into())).unwrap_err();
+
+        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
+        assert_eq!(err.to_string(), "Invalid AV1 video descriptor tag");
+    }
+
+    #[test]
+    fn test_video_descriptor_demux_invalid_length() {
+        let data = b"\x80\x05ju".to_vec();
+
+        let err = AV1VideoDescriptor::demux(&mut io::Cursor::new(data.into())).unwrap_err();
+
+        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
+        assert_eq!(err.to_string(), "Invalid AV1 video descriptor length");
+    }
 }
diff --git a/crates/av1/src/lib.rs b/crates/av1/src/lib.rs
@@ -16,5 +16,5 @@
 mod config;
 mod obu;
 
-pub use config::AV1CodecConfigurationRecord;
+pub use config::{AV1CodecConfigurationRecord, AV1VideoDescriptor};
 pub use obu::{seq, ObuHeader, ObuType};
diff --git a/crates/flv/Cargo.toml b/crates/flv/Cargo.toml
@@ -1,19 +1,29 @@
 [package]
-name = "flv"
+name = "scuffle-flv"
 version = "0.0.1"
 edition = "2021"
 license = "MIT OR Apache-2.0"
+repository = "https://github.com/scufflecloud/scuffle"
+authors = ["Scuffle <opensource@scuffle.cloud>"]
+readme = "README.md"
+documentation = "https://docs.rs/scuffle-flv"
+description = "A pure Rust FLV demuxer."
+keywords = ["flv", "demuxer"]
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage_nightly)'] }
 
 [dependencies]
 byteorder = "1.5"
 bytes = "1.5"
 num-traits = "0.2"
 num-derive = "0.4"
+thiserror = "2.0"
 
-scuffle-av1.workspace = true
 h264 = { path = "../h264" }
 h265 = { path = "../h265" }
 scuffle-aac = { path = "../aac" }
 scuffle-bytes-util.workspace = true
+scuffle-av1.workspace = true
 scuffle-amf0.workspace = true
 scuffle-workspace-hack.workspace = true
diff --git a/crates/flv/LICENSE.Apache-2.0 b/crates/flv/LICENSE.Apache-2.0
@@ -0,0 +1 @@
+../../LICENSE.Apache-2.0
diff --git a/crates/flv/LICENSE.MIT b/crates/flv/LICENSE.MIT
@@ -0,0 +1 @@
+../../LICENSE.MIT
diff --git a/crates/flv/README.md b/crates/flv/README.md
@@ -0,0 +1,17 @@
+# scuffle-flv
+
+> [!WARNING]  
+> This crate is under active development and may not be stable.
+
+[![crates.io](https://img.shields.io/crates/v/scuffle-flv.svg)](https://crates.io/crates/scuffle-flv) [![docs.rs](https://img.shields.io/docsrs/scuffle-flv)](https://docs.rs/scuffle-flv)
+
+---
+
+A pure Rust implementation of the FLV format, allowing for demuxing of FLV files or streams.
+
+## License
+
+This project is licensed under the [MIT](./LICENSE.MIT) or [Apache-2.0](./LICENSE.Apache-2.0) license.
+You can choose between one of them if you use this work.
+
+`SPDX-License-Identifier: MIT OR Apache-2.0`
diff --git a/crates/flv/src/aac.rs b/crates/flv/src/aac.rs
@@ -0,0 +1,109 @@
+use bytes::Bytes;
+
+use crate::macros::nutype_enum;
+
+nutype_enum! {
+    /// FLV AAC Packet Type
+    ///
+    /// Defined in the FLV specification. Chapter 1 - AACAUDIODATA
+    ///
+    /// The AACPacketType indicates the type of data in the AACAUDIODATA.
+    pub enum AacPacketType(u8) {
+        /// Sequence Header
+        SequenceHeader = 0x0,
+        /// Raw
+        Raw = 0x1,
+    }
+}
+
+/// AAC Packet
+/// This is a container for aac data.
+/// This enum contains the data for the different types of aac packets.
+/// Defined in the FLV specification. Chapter 1 - AACAUDIODATA
+#[derive(Debug, Clone, PartialEq)]
+pub enum AacPacket {
+    /// AAC Sequence Header
+    SequenceHeader(Bytes),
+    /// AAC Raw
+    Raw(Bytes),
+    /// Data we don't know how to parse
+    Unknown { aac_packet_type: AacPacketType, data: Bytes },
+}
+
+impl AacPacket {
+    /// Create a new AAC packet from the given data and packet type
+    pub fn new(aac_packet_type: AacPacketType, data: Bytes) -> Self {
+        match aac_packet_type {
+            AacPacketType::Raw => AacPacket::Raw(data),
+            AacPacketType::SequenceHeader => AacPacket::SequenceHeader(data),
+            _ => AacPacket::Unknown { aac_packet_type, data },
+        }
+    }
+}
+
+#[cfg(test)]
+#[cfg_attr(all(test, coverage_nightly), coverage(off))]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_new() {
+        let cases = [
+            (
+                AacPacketType::Raw,
+                Bytes::from(vec![0, 1, 2, 3]),
+                AacPacket::Raw(Bytes::from(vec![0, 1, 2, 3])),
+            ),
+            (
+                AacPacketType::SequenceHeader,
+                Bytes::from(vec![0, 1, 2, 3]),
+                AacPacket::SequenceHeader(Bytes::from(vec![0, 1, 2, 3])),
+            ),
+            (
+                AacPacketType(0x0),
+                Bytes::from(vec![0, 1, 2, 3]),
+                AacPacket::SequenceHeader(Bytes::from(vec![0, 1, 2, 3])),
+            ),
+            (
+                AacPacketType(0x1),
+                Bytes::from(vec![0, 1, 2, 3]),
+                AacPacket::Raw(Bytes::from(vec![0, 1, 2, 3])),
+            ),
+            (
+                AacPacketType(0x2),
+                Bytes::from(vec![0, 1, 2, 3]),
+                AacPacket::Unknown {
+                    aac_packet_type: AacPacketType(0x2),
+                    data: Bytes::from(vec![0, 1, 2, 3]),
+                },
+            ),
+            (
+                AacPacketType(0x3),
+                Bytes::from(vec![0, 1, 2, 3]),
+                AacPacket::Unknown {
+                    aac_packet_type: AacPacketType(0x3),
+                    data: Bytes::from(vec![0, 1, 2, 3]),
+                },
+            ),
+        ];
+
+        for (packet_type, data, expected) in cases {
+            let packet = AacPacket::new(packet_type, data.clone());
+            assert_eq!(packet, expected);
+        }
+    }
+
+    #[test]
+    fn test_aac_packet_type() {
+        assert_eq!(
+            format!("{:?}", AacPacketType::SequenceHeader),
+            "AacPacketType::SequenceHeader"
+        );
+        assert_eq!(format!("{:?}", AacPacketType::Raw), "AacPacketType::Raw");
+        assert_eq!(format!("{:?}", AacPacketType(0x2)), "AacPacketType(2)");
+        assert_eq!(format!("{:?}", AacPacketType(0x3)), "AacPacketType(3)");
+
+        assert_eq!(AacPacketType(0x01), AacPacketType::Raw);
+        assert_eq!(AacPacketType(0x00), AacPacketType::SequenceHeader);
+    }
+}
diff --git a/crates/flv/src/audio.rs b/crates/flv/src/audio.rs
@@ -0,0 +1,294 @@
+use std::io;
+
+use byteorder::ReadBytesExt;
+use bytes::Bytes;
+use scuffle_bytes_util::BytesCursorExt;
+
+use super::aac::{AacPacket, AacPacketType};
+use crate::macros::nutype_enum;
+
+/// FLV Tag Audio Data
+///
+/// This is the container for the audio data.
+///
+/// Defined by:
+/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags)
+/// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA)
+#[derive(Debug, Clone, PartialEq)]
+pub struct AudioData {
+    /// The sound rate of the audio data. (2 bits)
+    pub sound_rate: SoundRate,
+    /// The sound size of the audio data. (1 bit)
+    pub sound_size: SoundSize,
+    /// The sound type of the audio data. (1 bit)
+    pub sound_type: SoundType,
+    /// The body of the audio data.
+    pub body: AudioDataBody,
+}
+
+impl AudioData {
+    pub fn demux(reader: &mut io::Cursor<Bytes>) -> io::Result<Self> {
+        let byte = reader.read_u8()?;
+        // SoundFormat is the first 4 bits of the byte
+        let sound_format = SoundFormat::from(byte >> 4);
+        // SoundRate is the next 2 bits of the byte
+        let sound_rate = SoundRate::from((byte >> 2) & 0b11);
+        // SoundSize is the next bit of the byte
+        let sound_size = SoundSize::from((byte >> 1) & 0b1);
+        // SoundType is the last bit of the byte
+        let sound_type = SoundType::from(byte & 0b1);
+
+        // Now we can demux the body of the audio data
+        let body = AudioDataBody::demux(sound_format, reader)?;
+
+        Ok(AudioData {
+            sound_rate,
+            sound_size,
+            sound_type,
+            body,
+        })
+    }
+}
+
+nutype_enum! {
+    /// FLV Sound Format
+    ///
+    /// Denotes the type of the underlying data packet
+    ///
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA)
+    pub enum SoundFormat(u8) {
+        /// Linear PCM, platform endian
+        LinearPcmPlatformEndian = 0,
+        /// ADPCM
+        Adpcm = 1,
+        /// MP3
+        Mp3 = 2,
+        /// Linear PCM, little endian
+        LinearPcmLittleEndian = 3,
+        /// Nellymoser 16Khz Mono
+        Nellymoser16KhzMono = 4,
+        /// Nellymoser 8Khz Mono
+        Nellymoser8KhzMono = 5,
+        /// Nellymoser
+        Nellymoser = 6,
+        /// G.711 A-Law logarithmic PCM
+        G711ALaw = 7,
+        /// G.711 Mu-Law logarithmic PCM
+        G711MuLaw = 8,
+        /// AAC
+        Aac = 10,
+        /// Speex
+        Speex = 11,
+        /// Mp3 8Khz
+        Mp38Khz = 14,
+        /// Device specific sound
+        DeviceSpecificSound = 15,
+    }
+}
+
+/// FLV Tag Audio Data Body
+///
+/// This is the container for the audio data body.
+///
+/// Defined by:
+/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags)
+/// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA)
+#[derive(Debug, Clone, PartialEq)]
+pub enum AudioDataBody {
+    /// AAC Audio Packet
+    Aac(AacPacket),
+    /// Some other audio format we don't know how to parse
+    Unknown { sound_format: SoundFormat, data: Bytes },
+}
+
+impl AudioDataBody {
+    /// Demux the audio data body from the given reader
+    ///
+    /// The reader will be entirely consumed.
+    pub fn demux(sound_format: SoundFormat, reader: &mut io::Cursor<Bytes>) -> io::Result<Self> {
+        match sound_format {
+            SoundFormat::Aac => {
+                // For some reason the spec adds a specific byte before the AAC data.
+                // This byte is the AAC packet type.
+                let aac_packet_type = AacPacketType::from(reader.read_u8()?);
+                Ok(Self::Aac(AacPacket::new(aac_packet_type, reader.extract_remaining())))
+            }
+            _ => Ok(Self::Unknown {
+                sound_format,
+                data: reader.extract_remaining(),
+            }),
+        }
+    }
+}
+
+nutype_enum! {
+    /// FLV Sound Rate
+    ///
+    /// Denotes the sampling rate of the audio data.
+    ///
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA)
+    pub enum SoundRate(u8) {
+        /// 5.5 KHz
+        Hz5500 = 0,
+        /// 11 KHz
+        Hz11000 = 1,
+        /// 22 KHz
+        Hz22000 = 2,
+        /// 44 KHz
+        Hz44000 = 3,
+    }
+}
+
+nutype_enum! {
+    /// FLV Sound Size
+    ///
+    /// Denotes the size of each sample in the audio data.
+    ///
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA)
+    pub enum SoundSize(u8) {
+        /// 8 bit
+        Bit8 = 0,
+        /// 16 bit
+        Bit16 = 1,
+    }
+}
+
+nutype_enum! {
+    /// FLV Sound Type
+    ///
+    /// Denotes the number of channels in the audio data.
+    ///
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA)
+    pub enum SoundType(u8) {
+        /// Mono
+        Mono = 0,
+        /// Stereo
+        Stereo = 1,
+    }
+}
+
+#[cfg(test)]
+#[cfg_attr(all(test, coverage_nightly), coverage(off))]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_sound_format() {
+        let cases = [
+            (
+                0x00,
+                SoundFormat::LinearPcmPlatformEndian,
+                "SoundFormat::LinearPcmPlatformEndian",
+            ),
+            (0x01, SoundFormat::Adpcm, "SoundFormat::Adpcm"),
+            (0x02, SoundFormat::Mp3, "SoundFormat::Mp3"),
+            (0x03, SoundFormat::LinearPcmLittleEndian, "SoundFormat::LinearPcmLittleEndian"),
+            (0x04, SoundFormat::Nellymoser16KhzMono, "SoundFormat::Nellymoser16KhzMono"),
+            (0x05, SoundFormat::Nellymoser8KhzMono, "SoundFormat::Nellymoser8KhzMono"),
+            (0x06, SoundFormat::Nellymoser, "SoundFormat::Nellymoser"),
+            (0x07, SoundFormat::G711ALaw, "SoundFormat::G711ALaw"),
+            (0x08, SoundFormat::G711MuLaw, "SoundFormat::G711MuLaw"),
+            (0x0A, SoundFormat::Aac, "SoundFormat::Aac"),
+            (0x0B, SoundFormat::Speex, "SoundFormat::Speex"),
+            (0x0E, SoundFormat::Mp38Khz, "SoundFormat::Mp38Khz"),
+            (0x0F, SoundFormat::DeviceSpecificSound, "SoundFormat::DeviceSpecificSound"),
+        ];
+
+        for (value, expected, name) in cases {
+            let sound_format = SoundFormat::from(value);
+            assert_eq!(sound_format, expected);
+            assert_eq!(format!("{:?}", sound_format), name);
+        }
+    }
+
+    #[test]
+    fn test_sound_rate() {
+        let cases = [
+            (0x00, SoundRate::Hz5500, "SoundRate::Hz5500"),
+            (0x01, SoundRate::Hz11000, "SoundRate::Hz11000"),
+            (0x02, SoundRate::Hz22000, "SoundRate::Hz22000"),
+            (0x03, SoundRate::Hz44000, "SoundRate::Hz44000"),
+        ];
+
+        for (value, expected, name) in cases {
+            let sound_rate = SoundRate::from(value);
+            assert_eq!(sound_rate, expected);
+            assert_eq!(format!("{:?}", sound_rate), name);
+        }
+    }
+
+    #[test]
+    fn test_sound_size() {
+        let cases = [
+            (0x00, SoundSize::Bit8, "SoundSize::Bit8"),
+            (0x01, SoundSize::Bit16, "SoundSize::Bit16"),
+        ];
+
+        for (value, expected, name) in cases {
+            let sound_size = SoundSize::from(value);
+            assert_eq!(sound_size, expected);
+            assert_eq!(format!("{:?}", sound_size), name);
+        }
+    }
+
+    #[test]
+    fn test_sound_type() {
+        let cases = [
+            (0x00, SoundType::Mono, "SoundType::Mono"),
+            (0x01, SoundType::Stereo, "SoundType::Stereo"),
+        ];
+
+        for (value, expected, name) in cases {
+            let sound_type = SoundType::from(value);
+            assert_eq!(sound_type, expected);
+            assert_eq!(format!("{:?}", sound_type), name);
+        }
+    }
+
+    #[test]
+    fn test_audio_data_demux() {
+        let mut reader = io::Cursor::new(Bytes::from(vec![0b10101101, 0b00000000, 1, 2, 3]));
+
+        let audio_data = AudioData::demux(&mut reader).unwrap();
+        assert_eq!(audio_data.sound_rate, SoundRate::Hz44000);
+        assert_eq!(audio_data.sound_size, SoundSize::Bit8);
+        assert_eq!(audio_data.sound_type, SoundType::Stereo);
+        assert_eq!(
+            audio_data.body,
+            AudioDataBody::Aac(AacPacket::SequenceHeader(Bytes::from(vec![1, 2, 3])))
+        );
+
+        let mut reader = io::Cursor::new(Bytes::from(vec![0b10101101, 0b00100000, 1, 2, 3]));
+
+        let audio_data = AudioData::demux(&mut reader).unwrap();
+        assert_eq!(audio_data.sound_rate, SoundRate::Hz44000);
+        assert_eq!(audio_data.sound_size, SoundSize::Bit8);
+        assert_eq!(audio_data.sound_type, SoundType::Stereo);
+        assert_eq!(
+            audio_data.body,
+            AudioDataBody::Aac(AacPacket::Unknown {
+                aac_packet_type: AacPacketType(0b00100000),
+                data: Bytes::from(vec![1, 2, 3])
+            })
+        );
+
+        let mut reader = io::Cursor::new(Bytes::from(vec![0b10001101, 0b00000000, 1, 2, 3]));
+
+        let audio_data = AudioData::demux(&mut reader).unwrap();
+        assert_eq!(
+            audio_data.body,
+            AudioDataBody::Unknown {
+                sound_format: SoundFormat(8),
+                data: Bytes::from(vec![0, 1, 2, 3])
+            }
+        );
+    }
+}
diff --git a/crates/flv/src/av1.rs b/crates/flv/src/av1.rs
@@ -0,0 +1,13 @@
+use bytes::Bytes;
+use scuffle_av1::AV1CodecConfigurationRecord;
+
+/// AV1 Packet
+/// This is a container for av1 data.
+/// This enum contains the data for the different types of av1 packets.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Av1Packet {
+    /// AV1 Sequence Start
+    SequenceStart(AV1CodecConfigurationRecord),
+    /// AV1 Raw Data
+    Raw(Bytes),
+}
diff --git a/crates/flv/src/avc.rs b/crates/flv/src/avc.rs
@@ -0,0 +1,58 @@
+use std::io;
+
+use byteorder::{BigEndian, ReadBytesExt};
+use bytes::Bytes;
+use h264::AVCDecoderConfigurationRecord;
+use scuffle_bytes_util::BytesCursorExt;
+
+use crate::macros::nutype_enum;
+
+/// AVC Packet
+#[derive(Debug, Clone, PartialEq)]
+pub enum AvcPacket {
+    /// AVC NALU
+    Nalu { composition_time: u32, data: Bytes },
+    /// AVC Sequence Header
+    SequenceHeader(AVCDecoderConfigurationRecord),
+    /// AVC End of Sequence
+    EndOfSequence,
+    /// AVC Unknown (we don't know how to parse it)
+    Unknown {
+        avc_packet_type: AvcPacketType,
+        composition_time: u32,
+        data: Bytes,
+    },
+}
+
+impl AvcPacket {
+    pub fn demux(reader: &mut io::Cursor<Bytes>) -> io::Result<Self> {
+        let avc_packet_type = AvcPacketType::from(reader.read_u8()?);
+        let composition_time = reader.read_u24::<BigEndian>()?;
+
+        match avc_packet_type {
+            AvcPacketType::SeqHdr => Ok(Self::SequenceHeader(AVCDecoderConfigurationRecord::demux(reader)?)),
+            AvcPacketType::Nalu => Ok(Self::Nalu {
+                composition_time,
+                data: reader.extract_remaining(),
+            }),
+            AvcPacketType::EndOfSequence => Ok(Self::EndOfSequence),
+            _ => Ok(Self::Unknown {
+                avc_packet_type,
+                composition_time,
+                data: reader.extract_remaining(),
+            }),
+        }
+    }
+}
+
+nutype_enum! {
+    /// FLV AVC Packet Type
+    /// Defined in the FLV specification. Chapter 1 - AVCVIDEODATA
+    /// The AVC packet type is used to determine if the video data is a sequence
+    /// header or a NALU.
+    pub enum AvcPacketType(u8) {
+        SeqHdr = 0,
+        Nalu = 1,
+        EndOfSequence = 2,
+    }
+}
diff --git a/crates/flv/src/define.rs b/crates/flv/src/define.rs
diff --git a/crates/flv/src/errors.rs b/crates/flv/src/errors.rs
diff --git a/crates/flv/src/file.rs b/crates/flv/src/file.rs
@@ -0,0 +1,44 @@
+use byteorder::{BigEndian, ReadBytesExt};
+use bytes::{Buf, Bytes};
+
+use super::header::FlvHeader;
+use super::tag::FlvTag;
+
+/// An FLV file is a combination of a [`FlvHeader`] followed by the
+/// `FLVFileBody` (which is just a series of [`FlvTag`]s)
+///
+/// The `FLVFileBody` is defined by:
+/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Page 8)
+/// - video_file_format_spec_v10_1.pdf (Annex E.3 - The FLV File Body)
+#[derive(Debug, Clone, PartialEq)]
+pub struct FlvFile {
+    pub header: FlvHeader,
+    pub tags: Vec<FlvTag>,
+}
+
+impl FlvFile {
+    /// Demux an FLV file from a reader.
+    /// The reader needs to be a [`std::io::Cursor`] with a [`Bytes`] buffer because we
+    /// take advantage of zero-copy reading.
+    pub fn demux(reader: &mut std::io::Cursor<Bytes>) -> std::io::Result<Self> {
+        let header = FlvHeader::demux(reader)?;
+
+        let mut tags = Vec::new();
+        while reader.has_remaining() {
+            // We don't care about the previous tag size, its only really used for seeking
+            // backwards.
+            reader.read_u32::<BigEndian>()?;
+
+            // If there is no more data, we can stop reading.
+            if !reader.has_remaining() {
+                break;
+            }
+
+            // Demux the tag from the reader.
+            let tag = FlvTag::demux(reader)?;
+            tags.push(tag);
+        }
+
+        Ok(FlvFile { header, tags })
+    }
+}
diff --git a/crates/flv/src/flv.rs b/crates/flv/src/flv.rs
diff --git a/crates/flv/src/header.rs b/crates/flv/src/header.rs
@@ -0,0 +1,63 @@
+use std::io;
+
+use byteorder::{BigEndian, ReadBytesExt};
+use bytes::Bytes;
+use scuffle_bytes_util::BytesCursorExt;
+
+/// The FLV Header
+/// Whenever a FLV file is read these are the first 9 bytes of the file.
+///
+/// Defined by:
+/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV Header - Page 8)
+/// - video_file_format_spec_v10_1.pdf (Annex E.2 - The FLV Header)
+#[derive(Debug, Clone, PartialEq)]
+pub struct FlvHeader {
+    /// The version of the FLV file.
+    pub version: u8,
+    /// Whether the FLV file has audio.
+    pub has_audio: bool,
+    /// Whether the FLV file has video.
+    pub has_video: bool,
+    /// The extra data in the FLV file.
+    /// Since the header provides a data offset, this is the bytes between the
+    /// end of the header and the start of the data.
+    pub extra: Bytes,
+}
+
+impl FlvHeader {
+    /// Demux the FLV header from the given reader.
+    /// The reader will be returned in the position of the start of the data
+    /// offset.
+    pub fn demux(reader: &mut io::Cursor<Bytes>) -> io::Result<Self> {
+        let start = reader.position() as usize;
+
+        let signature = reader.read_u24::<BigEndian>()?;
+
+        // 0 byte at the beginning because we are only reading 3 bytes not 4.
+        if signature != u32::from_be_bytes([0, b'F', b'L', b'V']) {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid signature"));
+        }
+
+        let version = reader.read_u8()?;
+        let flags = reader.read_u8()?;
+        let has_audio = (flags & 0b00000100) != 0;
+        let has_video = (flags & 0b00000001) != 0;
+
+        let offset = reader.read_u32::<BigEndian>()? as usize;
+        let end = reader.position() as usize;
+        let size = end - start;
+
+        let extra = reader.extract_bytes(
+            offset
+                .checked_sub(size)
+                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid offset"))?,
+        )?;
+
+        Ok(FlvHeader {
+            version,
+            has_audio,
+            has_video,
+            extra,
+        })
+    }
+}
diff --git a/crates/flv/src/hevc.rs b/crates/flv/src/hevc.rs
@@ -0,0 +1,11 @@
+use bytes::Bytes;
+use h265::HEVCDecoderConfigurationRecord;
+
+/// HEVC Packet
+#[derive(Debug, Clone, PartialEq)]
+pub enum HevcPacket {
+    /// HEVC Sequence Start
+    SequenceStart(HEVCDecoderConfigurationRecord),
+    /// HEVC NALU
+    Nalu { composition_time: Option<i32>, data: Bytes },
+}
diff --git a/crates/flv/src/lib.rs b/crates/flv/src/lib.rs
diff --git a/crates/flv/src/macros.rs b/crates/flv/src/macros.rs
@@ -0,0 +1,66 @@
+/// Helper macro to create a new enum type with a single field.
+///
+/// This macro is used to create a new enum type with a single field.
+/// The enum type is derived with the `Clone`, `Copy`, `PartialEq`, `Eq`,
+/// `PartialOrd`, `Ord`, and `Hash` traits. The enum type is also derived with
+/// the `Debug` trait to provide a human-readable representation of the enum.
+///
+/// # Examples
+///
+/// ```rust,ignore
+/// nutype_enum! {
+///     pub enum AacPacketType(u8) {
+///         SeqHdr = 0x0,
+///         Raw = 0x1,
+///     }
+/// }
+/// ```
+macro_rules! nutype_enum {
+    (
+        $(#[$attr:meta])*
+        $vis:vis enum $name:ident($type:ty) {
+            $(
+                $(#[$variant_attr:meta])*
+                $variant:ident = $value:expr
+            ),*$(,)?
+        }
+    ) => {
+        #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+        $(#[$attr])*
+        #[repr(transparent)]
+        $vis struct $name(pub $type);
+
+        impl ::std::fmt::Debug for $name {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                match self {
+                    $(
+                        &$name::$variant => write!(f, "{}::{}", stringify!($name), stringify!($variant)),
+                    )*
+                    _ => write!(f, "{}({:?})", stringify!($name), self.0),
+                }
+            }
+        }
+
+        impl $name {
+            $(
+                $(#[$variant_attr])*
+                #[allow(non_upper_case_globals)]
+                pub const $variant: Self = Self($value);
+            )*
+        }
+
+        impl From<$type> for $name {
+            fn from(value: $type) -> Self {
+                Self(value)
+            }
+        }
+
+        impl From<$name> for $type {
+            fn from(value: $name) -> Self {
+                value.0
+            }
+        }
+    };
+}
+
+pub(crate) use nutype_enum;
diff --git a/crates/flv/src/script.rs b/crates/flv/src/script.rs
@@ -0,0 +1,56 @@
+use std::io;
+
+use bytes::Bytes;
+use scuffle_amf0::{Amf0Decoder, Amf0Marker, Amf0Value};
+use scuffle_bytes_util::BytesCursorExt;
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct ScriptData {
+    /// The name of the script data
+    pub name: String,
+    /// The data of the script data
+    pub data: Vec<Amf0Value<'static>>,
+}
+
+impl ScriptData {
+    pub fn demux(reader: &mut io::Cursor<Bytes>) -> io::Result<Self> {
+        let buf = reader.extract_remaining();
+        let mut amf0_reader = Amf0Decoder::new(&buf);
+
+        let name = match amf0_reader.decode_with_type(Amf0Marker::String) {
+            Ok(Amf0Value::String(name)) => name,
+            _ => return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid script data name")),
+        };
+
+        let data = amf0_reader
+            .decode_all()
+            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid script data"))?;
+
+        Ok(Self {
+            name: name.into_owned(),
+            data: data.into_iter().map(|v| v.to_owned()).collect(),
+        })
+    }
+}
+
+#[cfg(test)]
+#[cfg_attr(all(test, coverage_nightly), coverage(off))]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_script_data() {
+        let mut reader = io::Cursor::new(Bytes::from_static(&[
+            0x02, // String marker
+            0x00, 0x0A, // Length (10 bytes)
+            b'o', b'n', b'M', b'e', b't', b'a', b'D', b'a', b't', b'a', // "onMetaData"
+            0x05, // null marker
+            0x05, // null marker
+        ]));
+        let script_data = ScriptData::demux(&mut reader).unwrap();
+        assert_eq!(script_data.name, "onMetaData");
+        assert_eq!(script_data.data.len(), 2);
+        assert_eq!(script_data.data[0], Amf0Value::Null);
+        assert_eq!(script_data.data[1], Amf0Value::Null);
+    }
+}
diff --git a/crates/flv/src/tag.rs b/crates/flv/src/tag.rs
@@ -0,0 +1,138 @@
+use byteorder::{BigEndian, ReadBytesExt};
+use bytes::Bytes;
+use scuffle_bytes_util::BytesCursorExt;
+
+use super::audio::AudioData;
+use super::script::ScriptData;
+use super::video::VideoTagHeader;
+use crate::macros::nutype_enum;
+
+/// An FLV Tag
+///
+/// Tags have different types and thus different data structures. To accommodate
+/// this the [`FlvTagData`] enum is used.
+///
+/// Defined by:
+/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV
+///   tags)
+/// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag)
+///
+/// The v10.1 spec adds some additional fields to the tag to accomodate
+/// encryption. We dont support this because it is not needed for our use case.
+/// (and I suspect it is not used anywhere anymore.)
+///
+/// However if the Tag is encrypted the tag_type will be a larger number (one we
+/// dont support), and therefore the [`FlvTagData::Unknown`] variant will be
+/// used.
+#[derive(Debug, Clone, PartialEq)]
+pub struct FlvTag {
+    /// A timestamp in milliseconds
+    pub timestamp_ms: u32,
+    /// A stream id
+    pub stream_id: u32,
+    pub data: FlvTagData,
+}
+
+impl FlvTag {
+    /// Demux a FLV tag from the given reader.
+    ///
+    /// The reader will be advanced to the end of the tag.
+    ///
+    /// The reader needs to be a [`std::io::Cursor`] with a [`Bytes`] buffer because we
+    /// take advantage of zero-copy reading.
+    pub fn demux(reader: &mut std::io::Cursor<Bytes>) -> std::io::Result<Self> {
+        let tag_type = FlvTagType::from(reader.read_u8()?);
+
+        let data_size = reader.read_u24::<BigEndian>()?;
+        // The timestamp bit is weird. Its 24bits but then there is an extended 8 bit
+        // number to create a 32bit number.
+        let timestamp_ms = reader.read_u24::<BigEndian>()? | ((reader.read_u8()? as u32) << 24);
+
+        // The stream id according to the spec is ALWAYS 0. (likely not true)
+        let stream_id = reader.read_u24::<BigEndian>()?;
+
+        // We then extract the data from the reader. (advancing the cursor to the end of
+        // the tag)
+        let data = reader.extract_bytes(data_size as usize)?;
+
+        // Finally we demux the data.
+        let data = FlvTagData::demux(tag_type, &mut std::io::Cursor::new(data))?;
+
+        Ok(FlvTag {
+            timestamp_ms,
+            stream_id,
+            data,
+        })
+    }
+}
+
+nutype_enum! {
+    /// FLV Tag Type
+    ///
+    /// This is the type of the tag.
+    ///
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag)
+    ///
+    /// The 3 types that are supported are:
+    /// - Audio(8)
+    /// - Video(9)
+    /// - ScriptData(18)
+    ///
+    pub enum FlvTagType(u8) {
+        Audio = 8,
+        Video = 9,
+        ScriptData = 18,
+    }
+}
+
+/// FLV Tag Data
+///
+/// This is a container for the actual media data.
+/// This enum contains the data for the different types of tags.
+///
+/// Defined by:
+/// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - FLV tags)
+/// - video_file_format_spec_v10_1.pdf (Annex E.4.1 - FLV Tag)
+#[derive(Debug, Clone, PartialEq)]
+pub enum FlvTagData {
+    /// AudioData when the FlvTagType is Audio(8)
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Audio tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.2.1 - AUDIODATA)
+    Audio(AudioData),
+    /// VideoData when the FlvTagType is Video(9)
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Video tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.3.1 - VIDEODATA)
+    Video(VideoTagHeader),
+    /// ScriptData when the FlvTagType is ScriptData(18)
+    /// Defined by:
+    /// - video_file_format_spec_v10.pdf (Chapter 1 - The FLV File Format - Data tags)
+    /// - video_file_format_spec_v10_1.pdf (Annex E.4.4.1 - SCRIPTDATA)
+    ScriptData(ScriptData),
+    /// Any tag type that we dont know how to parse, with the corresponding data
+    /// being the raw bytes of the tag
+    Unknown { tag_type: FlvTagType, data: Bytes },
+}
+
+impl FlvTagData {
+    /// Demux a FLV tag data from the given reader.
+    ///
+    /// The reader will be enirely consumed.
+    ///
+    /// The reader needs to be a [`std::io::Cursor`] with a [`Bytes`] buffer because we
+    /// take advantage of zero-copy reading.
+    pub fn demux(tag_type: FlvTagType, reader: &mut std::io::Cursor<Bytes>) -> std::io::Result<Self> {
+        match tag_type {
+            FlvTagType::Audio => Ok(FlvTagData::Audio(AudioData::demux(reader)?)),
+            FlvTagType::Video => Ok(FlvTagData::Video(VideoTagHeader::demux(reader)?)),
+            FlvTagType::ScriptData => Ok(FlvTagData::ScriptData(ScriptData::demux(reader)?)),
+            _ => Ok(FlvTagData::Unknown {
+                tag_type,
+                data: reader.extract_remaining(),
+            }),
+        }
+    }
+}
diff --git a/crates/flv/src/tests/demuxer.rs b/crates/flv/src/tests/demuxer.rs
diff --git a/crates/flv/src/tests/error.rs b/crates/flv/src/tests/error.rs
diff --git a/crates/flv/src/tests/mod.rs b/crates/flv/src/tests/mod.rs
diff --git a/crates/flv/src/video.rs b/crates/flv/src/video.rs
diff --git a/crates/mp4/src/tests/demux.rs b/crates/mp4/src/tests/demux.rs
@@ -1211,6 +1211,7 @@ fn test_demux_av1_aac() {
                                 chroma_subsampling_y: true,
                                 chroma_sample_position: 1,
                                 initial_presentation_delay_minus_one: None,
+                                hdr_wcg_idc: 0,
                                 config_obu: b"\n\x0e\0\0\0$O\x7fS\0\xbe\x04\x04\x04\x04\x90".to_vec().into(),
                             },
                         },

diff --git a/crates/transmuxer/Cargo.toml b/crates/transmuxer/Cargo.toml
@@ -10,11 +10,11 @@ bytes = "1.5"
 
 h264 = { path = "../h264" }
 h265 = { path = "../h265" }
-scuffle-av1.workspace = true
 scuffle-aac = { path = "../aac" }
-scuffle-amf0.workspace = true
-flv = { path = "../flv" }
 mp4 = { path = "../mp4" }
+scuffle-av1.workspace = true
+scuffle-flv.workspace = true
+scuffle-amf0.workspace = true
 scuffle-bytes-util.workspace = true
 scuffle-workspace-hack.workspace = true
 

diff --git a/crates/transmuxer/src/codecs/aac.rs b/crates/transmuxer/src/codecs/aac.rs
@@ -1,5 +1,4 @@
 use bytes::Bytes;
-use flv::{SoundSize, SoundType};
 use mp4::types::esds::descriptor::header::DescriptorHeader;
 use mp4::types::esds::descriptor::traits::DescriptorType;
 use mp4::types::esds::descriptor::types::decoder_config::DecoderConfigDescriptor;
@@ -11,6 +10,7 @@ use mp4::types::stsd::{AudioSampleEntry, SampleEntry};
 use mp4::types::trun::{TrunSample, TrunSampleFlag};
 use mp4::DynBox;
 use scuffle_aac::PartialAudioSpecificConfig;
+use scuffle_flv::audio::{SoundSize, SoundType};
 
 use crate::TransmuxError;
 
@@ -27,10 +27,12 @@ pub fn stsd_entry(
                 match sound_type {
                     SoundType::Mono => 1,
                     SoundType::Stereo => 2,
+                    _ => return Err(TransmuxError::InvalidAudioChannels),
                 },
                 match sound_size {
                     SoundSize::Bit8 => 8,
                     SoundSize::Bit16 => 16,
+                    _ => return Err(TransmuxError::InvalidAudioSampleSize),
                 },
                 aac_config.sampling_frequency,
             )),

diff --git a/crates/transmuxer/src/codecs/av1.rs b/crates/transmuxer/src/codecs/av1.rs
@@ -1,5 +1,4 @@
 use bytes::{Buf, Bytes};
-use flv::FrameType;
 use mp4::types::av01::Av01;
 use mp4::types::av1c::Av1C;
 use mp4::types::colr::{ColorType, Colr};
@@ -9,6 +8,7 @@ use mp4::DynBox;
 use scuffle_av1::seq::SequenceHeaderObu;
 use scuffle_av1::{AV1CodecConfigurationRecord, ObuHeader, ObuType};
 use scuffle_bytes_util::BytesCursorExt;
+use scuffle_flv::video::FrameType;
 
 use crate::TransmuxError;
 

diff --git a/crates/transmuxer/src/codecs/avc.rs b/crates/transmuxer/src/codecs/avc.rs
@@ -1,12 +1,12 @@
 use bytes::Bytes;
-use flv::FrameType;
 use h264::{AVCDecoderConfigurationRecord, Sps};
 use mp4::types::avc1::Avc1;
 use mp4::types::avcc::AvcC;
 use mp4::types::colr::{ColorType, Colr};
 use mp4::types::stsd::{SampleEntry, VisualSampleEntry};
 use mp4::types::trun::{TrunSample, TrunSampleFlag};
 use mp4::DynBox;
+use scuffle_flv::video::FrameType;
 
 use crate::TransmuxError;
 

diff --git a/crates/transmuxer/src/codecs/hevc.rs b/crates/transmuxer/src/codecs/hevc.rs
@@ -1,12 +1,12 @@
 use bytes::Bytes;
-use flv::FrameType;
 use h265::{HEVCDecoderConfigurationRecord, Sps};
 use mp4::types::colr::{ColorType, Colr};
 use mp4::types::hev1::Hev1;
 use mp4::types::hvcc::HvcC;
 use mp4::types::stsd::{SampleEntry, VisualSampleEntry};
 use mp4::types::trun::{TrunSample, TrunSampleFlag};
 use mp4::DynBox;
+use scuffle_flv::video::FrameType;
 
 use crate::TransmuxError;
 

diff --git a/crates/transmuxer/src/define.rs b/crates/transmuxer/src/define.rs
@@ -1,9 +1,9 @@
 use bytes::Bytes;
-use flv::{SoundSize, SoundType};
 use h264::AVCDecoderConfigurationRecord;
 use h265::HEVCDecoderConfigurationRecord;
 use mp4::codec::{AudioCodec, VideoCodec};
 use scuffle_av1::AV1CodecConfigurationRecord;
+use scuffle_flv::audio::{SoundSize, SoundType};
 
 pub(crate) enum VideoSequenceHeader {
     Avc(AVCDecoderConfigurationRecord),

diff --git a/crates/transmuxer/src/errors.rs b/crates/transmuxer/src/errors.rs
@@ -5,18 +5,13 @@ pub enum TransmuxError {
     InvalidVideoDimensions,
     InvalidVideoFrameRate,
     InvalidAudioSampleRate,
+    InvalidAudioChannels,
+    InvalidAudioSampleSize,
     InvalidHEVCDecoderConfigurationRecord,
     InvalidAv1DecoderConfigurationRecord,
     InvalidAVCDecoderConfigurationRecord,
     NoSequenceHeaders,
     IO(io::Error),
-    FlvDemuxer(flv::FlvDemuxerError),
-}
-
-impl From<flv::FlvDemuxerError> for TransmuxError {
-    fn from(err: flv::FlvDemuxerError) -> Self {
-        Self::FlvDemuxer(err)
-    }
 }
 
 impl From<io::Error> for TransmuxError {
@@ -31,6 +26,8 @@ impl std::fmt::Display for TransmuxError {
             Self::InvalidVideoDimensions => write!(f, "invalid video dimensions"),
             Self::InvalidVideoFrameRate => write!(f, "invalid video frame rate"),
             Self::InvalidAudioSampleRate => write!(f, "invalid audio sample rate"),
+            Self::InvalidAudioChannels => write!(f, "invalid audio channels"),
+            Self::InvalidAudioSampleSize => write!(f, "invalid audio sample size"),
             Self::InvalidHEVCDecoderConfigurationRecord => {
                 write!(f, "invalid hevc decoder configuration record")
             }
@@ -42,7 +39,6 @@ impl std::fmt::Display for TransmuxError {
             }
             Self::NoSequenceHeaders => write!(f, "no sequence headers"),
             Self::IO(err) => write!(f, "io error: {}", err),
-            Self::FlvDemuxer(err) => write!(f, "flv demuxer error: {}", err),
         }
     }
 }
diff --git a/crates/transmuxer/src/lib.rs b/crates/transmuxer/src/lib.rs
@@ -7,10 +7,6 @@ use std::io;
 
 use byteorder::{BigEndian, ReadBytesExt};
 use bytes::{Buf, Bytes};
-use flv::{
-    AacPacket, Av1Packet, AvcPacket, EnhancedPacket, FlvTag, FlvTagAudioData, FlvTagData, FlvTagVideoData, FrameType,
-    HevcPacket, SoundType,
-};
 use mp4::codec::{AudioCodec, VideoCodec};
 use mp4::types::ftyp::{FourCC, Ftyp};
 use mp4::types::hdlr::{HandlerType, Hdlr};
@@ -40,6 +36,14 @@ use mp4::types::trun::Trun;
 use mp4::types::vmhd::Vmhd;
 use mp4::BoxType;
 use scuffle_amf0::Amf0Value;
+use scuffle_flv::aac::AacPacket;
+use scuffle_flv::audio::{AudioData, AudioDataBody, SoundType};
+use scuffle_flv::av1::Av1Packet;
+use scuffle_flv::avc::AvcPacket;
+use scuffle_flv::hevc::HevcPacket;
+use scuffle_flv::script::ScriptData;
+use scuffle_flv::tag::{FlvTag, FlvTagData};
+use scuffle_flv::video::{EnhancedPacket, FrameType, VideoTagBody, VideoTagHeader};
 
 mod codecs;
 mod define;
@@ -94,7 +98,7 @@ impl Transmuxer {
                 break;
             }
 
-            let tag = flv::FlvTag::demux(&mut cursor)?;
+            let tag = FlvTag::demux(&mut cursor)?;
             self.tags.push_back(tag);
         }
 
@@ -143,7 +147,7 @@ impl Transmuxer {
             let mut is_keyframe = false;
 
             let duration =
-                if self.last_video_timestamp == 0 || tag.timestamp == 0 || tag.timestamp < self.last_video_timestamp {
+                if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp {
                     1000 // the first frame is always 1000 ticks where the
                          // timescale is 1000 * fps.
                 } else {
@@ -156,7 +160,7 @@ impl Transmuxer {
                     // The reason we use a timescale which is 1000 * fps is because then we can
                     // always represent the delta as an integer. If we use a timescale of 1000, we
                     // would run into the same rounding errors.
-                    let delta = tag.timestamp as f64 - self.last_video_timestamp as f64;
+                    let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64;
                     let expected_delta = 1000.0 / video_settings.framerate;
                     if (delta - expected_delta).abs() <= 1.0 {
                         1000
@@ -166,21 +170,22 @@ impl Transmuxer {
                 };
 
             match tag.data {
-                FlvTagData::Audio {
-                    data: FlvTagAudioData::Aac(AacPacket::Raw(data)),
+                FlvTagData::Audio(AudioData {
+                    body: AudioDataBody::Aac(AacPacket::Raw(data)),
                     ..
-                } => {
+                }) => {
                     let (sample, duration) = codecs::aac::trun_sample(&data)?;
 
                     trun_sample = sample;
                     mdat_data = data;
                     total_duration = duration;
                     is_audio = true;
                 }
-                FlvTagData::Video {
+                FlvTagData::Video(VideoTagHeader {
                     frame_type,
-                    data: FlvTagVideoData::Avc(AvcPacket::Nalu { composition_time, data }),
-                } => {
+                    body: VideoTagBody::Avc(AvcPacket::Nalu { composition_time, data }),
+                    ..
+                }) => {
                     let composition_time = ((composition_time as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
 
                     let sample = codecs::avc::trun_sample(frame_type, composition_time as u32, duration, &data)?;
@@ -191,10 +196,11 @@ impl Transmuxer {
 
                     is_keyframe = frame_type == FrameType::Keyframe;
                 }
-                FlvTagData::Video {
+                FlvTagData::Video(VideoTagHeader {
                     frame_type,
-                    data: FlvTagVideoData::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw(data))),
-                } => {
+                    body: VideoTagBody::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw(data))),
+                    ..
+                }) => {
                     let sample = codecs::av1::trun_sample(frame_type, duration, &data)?;
 
                     trun_sample = sample;
@@ -203,10 +209,11 @@ impl Transmuxer {
 
                     is_keyframe = frame_type == FrameType::Keyframe;
                 }
-                FlvTagData::Video {
+                FlvTagData::Video(VideoTagHeader {
                     frame_type,
-                    data: FlvTagVideoData::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { composition_time, data })),
-                } => {
+                    body: VideoTagBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { composition_time, data })),
+                    ..
+                }) => {
                     let composition_time =
                         ((composition_time.unwrap_or_default() as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
 
@@ -278,7 +285,7 @@ impl Transmuxer {
                 })));
             } else {
                 self.video_duration += total_duration as u64;
-                self.last_video_timestamp = tag.timestamp;
+                self.last_video_timestamp = tag.timestamp_ms;
                 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
                     data: Bytes::from(writer),
                     ty: MediaType::Video,
@@ -302,37 +309,40 @@ impl Transmuxer {
             }
 
             match &tag.data {
-                FlvTagData::Video {
+                FlvTagData::Video(VideoTagHeader {
                     frame_type: _,
-                    data: FlvTagVideoData::Avc(AvcPacket::SequenceHeader(data)),
-                } => {
+                    body: VideoTagBody::Avc(AvcPacket::SequenceHeader(data)),
+                    ..
+                }) => {
                     video_sequence_header = Some(VideoSequenceHeader::Avc(data.clone()));
                 }
-                FlvTagData::Video {
+                FlvTagData::Video(VideoTagHeader {
                     frame_type: _,
-                    data: FlvTagVideoData::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart(config))),
-                } => {
+                    body: VideoTagBody::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart(config))),
+                    ..
+                }) => {
                     video_sequence_header = Some(VideoSequenceHeader::Av1(config.clone()));
                 }
-                FlvTagData::Video {
+                FlvTagData::Video(VideoTagHeader {
                     frame_type: _,
-                    data: FlvTagVideoData::Enhanced(EnhancedPacket::Hevc(HevcPacket::SequenceStart(config))),
-                } => {
+                    body: VideoTagBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::SequenceStart(config))),
+                    ..
+                }) => {
                     video_sequence_header = Some(VideoSequenceHeader::Hevc(config.clone()));
                 }
-                FlvTagData::Audio {
+                FlvTagData::Audio(AudioData {
+                    body: AudioDataBody::Aac(AacPacket::SequenceHeader(data)),
                     sound_size,
                     sound_type,
-                    sound_rate: _,
-                    data: FlvTagAudioData::Aac(AacPacket::SequenceHeader(data)),
-                } => {
+                    ..
+                }) => {
                     audio_sequence_header = Some(AudioSequenceHeader {
                         data: AudioSequenceHeaderData::Aac(data.clone()),
                         sound_size: *sound_size,
                         sound_type: *sound_type,
                     });
                 }
-                FlvTagData::ScriptData { data, name } => {
+                FlvTagData::ScriptData(ScriptData { data, name }) => {
                     if name == "@setDataFrame" || name == "onMetaData" {
                         let meta_object = data.iter().find(|v| matches!(v, Amf0Value::Object(_)));
 
@@ -492,6 +502,7 @@ impl Transmuxer {
                 audio_channels = match audio_sequence_header.sound_type {
                     SoundType::Mono => 1,
                     SoundType::Stereo => 2,
+                    _ => return Err(TransmuxError::InvalidAudioChannels),
                 };
 
                 entry

diff --git a/crates/transmuxer/src/tests/mod.rs b/crates/transmuxer/src/tests/mod.rs
@@ -4,9 +4,9 @@ use std::io::{
 use std::path::PathBuf;
 use std::process::{Command, Stdio};
 
-use flv::FlvHeader;
 use mp4::codec::{AudioCodec, VideoCodec};
 use scuffle_aac::AudioObjectType;
+use scuffle_flv::header::FlvHeader;
 
 use crate::define::{AudioSettings, VideoSettings};
 use crate::{TransmuxResult, Transmuxer};