Skip to content

Commit

Permalink
Merge commit 'bd2126e954a85806526338c75c8084a52b50f7dd'
Browse files Browse the repository at this point in the history
  • Loading branch information
toots committed Mar 24, 2024
2 parents 2046f18 + bd2126e commit 2c1a44b
Show file tree
Hide file tree
Showing 16 changed files with 200 additions and 77 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
0.3.0 (unreleased)
=====

- Add basic example.
- Add optional custom parser argument to override the default parsing mechanism.
- Add binary format for encoding frames.
- Update default metadata mappings to follow musicbrainz's picard mapping.
- Add `MIME` module to guess MIME type of files (#4).
- Generic RIFF format parser, adds support for wave files (#6).

0.2.0 (2023-07-01)
=====

- Add support for FLAC.
- id3v2: use "bpm" instead of "tempo".
- id3v2: convert "tlen" to "duration".
Expand All @@ -15,4 +20,5 @@

0.1.0 (2023-02-08)
=====

- Initial release.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The metadata library
A pure OCaml library to read metadata from various formats. For now, are
supported:

- audio formats: ID3v1 and ID3v2 (for mp3), ogg/vorbis, ogg/opus and flac
- audio formats: ID3v1 and ID3v2 (for mp3), ogg/vorbis, ogg/opus, flac and wav
- image formats: jpeg and png
- video formats: mp4 and avi

Expand Down
9 changes: 6 additions & 3 deletions examples/dune
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
(modules dump)
(libraries metadata))

(executable
(name mimetype)
(modules mimetype)
(libraries metadata))

(executable
(name test)
(modules test)
Expand Down Expand Up @@ -68,9 +73,7 @@
-i
"sine=frequency=440:sample_rate=44100:duration=10"
-f
mp3
-write_id3v1
true
wav
-metadata
"title=The title"
-metadata
Expand Down
2 changes: 2 additions & 0 deletions examples/mimetype
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
dune exec --no-print-directory ./mimetype.exe -- $@
4 changes: 4 additions & 0 deletions examples/mimetype.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
let () =
let fname = Sys.argv.(1) in
let mime = try Metadata.MIME.of_file fname with Not_found -> "unknown" in
print_endline mime
2 changes: 1 addition & 1 deletion examples/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ let () =
p "mp3v2" (Metadata.ID3v2.parse_file "test.mp3");
p "mp3v1" (Metadata.ID3v1.parse_file "test.mp3");
p "mp3" (Metadata.ID3.parse_file "test.mp3");
p "wav" (Metadata.ID3.parse_file "test.wav");
p "wav" (Metadata.WAV.parse_file "test.wav");
p "png" (Metadata.PNG.parse_file "test.png");
p "jpg" (Metadata.JPEG.parse_file "test.jpg");
p "avi" (Metadata.AVI.parse_file "test.avi");
Expand Down
8 changes: 6 additions & 2 deletions src/metadata.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module CharEncoding = MetadataCharEncoding

module MIME = MetadataMIME

module Make (E : CharEncoding.T) = struct
include MetadataBase
module ID3v1 = MetadataID3v1
Expand All @@ -10,6 +12,8 @@ module Make (E : CharEncoding.T) = struct
module PNG = MetadataPNG
module AVI = MetadataAVI
module MP4 = MetadataMP4
module WAV = MetadataWAV
module RIFF = MetadataRIFF

(** Charset conversion function. *)
let recode = E.convert
Expand Down Expand Up @@ -41,7 +45,7 @@ module Make (E : CharEncoding.T) = struct
| [] -> raise Invalid

module Audio = struct
let parsers = [ID3.parse; OGG.parse; FLAC.parse]
let parsers = [ID3.parse; OGG.parse; FLAC.parse; WAV.parse]
let parse = first_valid parsers

let parse_file ?custom_parser file =
Expand All @@ -65,7 +69,7 @@ module Make (E : CharEncoding.T) = struct
end

module Any = struct
let parsers = Audio.parsers @ Image.parsers @ Video.parsers
let parsers = Audio.parsers @ Image.parsers @ Video.parsers @ [RIFF.parse]

(** Genering parsing of metadata. *)
let parse = first_valid parsers
Expand Down
16 changes: 16 additions & 0 deletions src/metadata.mli
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@
(** Functions for handling charset conversion. *)
module CharEncoding = MetadataCharEncoding

(** Guess the MIME type of a file. *)
module MIME : sig
(** Guess the MIME type from file contents. Raises [Not_found] if none was
found. *)
val of_string : string -> string

(** Same as [of_string] but takes a file name as argument. *)
val of_file : string -> string
end

(** Generate metadata parsers given functions for converting charsets. *)
module Make : functor (_ : CharEncoding.T) -> sig
(** Raised when the metadata is not valid. *)
Expand Down Expand Up @@ -84,6 +94,12 @@ module Make : functor (_ : CharEncoding.T) -> sig
(** MP4 metadata. *)
module MP4 = MetadataMP4

(** WAV metadata. *)
module WAV = MetadataWAV

(** RIFF metdata. *)
module RIFF = MetadataRIFF

(** Convert the charset encoding of a string. *)
val recode :
?source:[ `ISO_8859_1 | `UTF_16 | `UTF_16BE | `UTF_16LE | `UTF_8 ] ->
Expand Down
62 changes: 2 additions & 60 deletions src/metadataAVI.ml
Original file line number Diff line number Diff line change
@@ -1,61 +1,3 @@
open MetadataBase
module R = Reader
let parse = MetadataRIFF.parse ~format:"AVI "

(* Tag normalization. *)
let tagn =
[
("IART", "artist");
("ICMT", "comment");
("ICOP", "copyright");
("ICRD", "date");
("ICRD", "date");
("IGNR", "genre");
("INAM", "title");
("IPRD", "album");
("IPRT", "track");
("ISFT", "encoder");
]

let parse f : metadata =
if R.read f 4 <> "RIFF" then raise Invalid;
let _ (* file size *) = R.int32_le f in
if R.read f 4 <> "AVI " then raise Invalid;
let ans = ref [] in
let chunk () =
let tag = R.read f 4 in
let size = R.int32_le f in
if tag <> "LIST" then R.drop f size
else (
let subtag = R.read f 4 in
match subtag with
| "INFO" ->
let remaining = ref (size - 4) in
while !remaining > 0 do
let tag = R.read f 4 in
let size = R.int32_le f in
match R.read_tag ~length:(size - 1) ~label:tag f with
| None -> ()
| Some s ->
R.drop f 1;
(* null-terminated *)
let padding = size mod 2 in
R.drop f padding;
remaining := !remaining - (8 + size + padding);
let tag =
match List.assoc_opt tag tagn with
| Some tag -> tag
| None -> tag
in
ans := (tag, s) :: !ans
done
| "movi" -> raise Exit (* stop parsing there *)
| _ -> R.drop f (size - 4))
in
try
while true do
chunk ()
done;
assert false
with _ -> List.rev !ans

let parse_file ?custom_parser file = R.with_file ?custom_parser parse file
let parse_file = MetadataRIFF.parse_file ~format:"AVI "
4 changes: 0 additions & 4 deletions src/metadataAVI.mli

This file was deleted.

27 changes: 21 additions & 6 deletions src/metadataID3v2.ml
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,38 @@ let normalize_id = function
| "TALB" -> "album"
| "TBPM" -> "bpm"
| "TCOM" -> "composer"
| "TCON" -> "content"
| "TCON" -> "genre"
| "TCOP" -> "copyright"
| "TDAT" -> "date"
| "TDOR" -> "original release time"
| "TDRC" -> "recording time"
| "TENC" -> "encoder"
| "TENC" -> "encodedby"
| "TEXT" -> "lyricist"
| "TIT1" -> "grouping"
| "TIT2" -> "title"
| "TIT3" -> "subtitle"
| "TKEY" -> "key"
| "TLAN" -> "language"
| "TLEN" -> "length"
| "TMED" -> "media type"
| "TOPE" -> "performer"
| "TMED" -> "media"
| "TOAL" -> "originalalbum"
| "TOFN" -> "originalfilename"
| "TOPE" -> "originalartist"
| "TPOS" -> "discnumber"
| "TPE1" -> "artist"
| "TPE2" -> "band"
| "TPUB" -> "publisher"
| "TPE2" -> "albumartist"
| "TPE3" -> "conductor"
| "TPE4" -> "remixer"
| "TPUB" -> "label"
| "TRCK" -> "tracknumber"
| "TSOA" -> "albumsort"
| "TSO2" -> "albumartistsort"
| "TSOT" -> "titlesort"
| "TSRC" -> "isrc"
| "TSSE" -> "encoder"
| "TSST" -> "discsubtitle"
| "TYER" -> "year"
| "WOAR" -> "website"
| "WXXX" -> "url"
| id -> id

Expand Down
64 changes: 64 additions & 0 deletions src/metadataMIME.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
(** Guess the mime-type of a file. *)

module String = struct
include String

let contains_at offset ~substring s =
let n = String.length substring in
if String.length s < offset + n then false
else String.sub s offset n = substring
end

let prefixes =
[
"ID3", "audio/mpeg";
"OggS", "audio/ogg";
"%PDF-", "application/pdf";
"\137PNG\013\010\026\010", "image/png";
]

let advanced =
let wav s =
String.starts_with ~prefix:"RIFF" s &&
String.contains_at 8 ~substring:"WAVEfmt " s
in
let avi s =
String.starts_with ~prefix:"RIFF" s &&
String.contains_at 8 ~substring:"AVI " s
in
[
wav, "audio/wav";
avi, "video/x-msvideo"
]

let of_string s =
let ans = ref "" in
try
List.iter
(fun (f, mime) ->
if f s then
(
ans := mime;
raise Exit
)
) advanced;
List.iter
(fun (prefix, mime) ->
if String.starts_with ~prefix s then
(
ans := mime;
raise Exit
)
) prefixes;
raise Not_found
with
| Exit -> !ans

let of_file fname =
let len = 16 in
let buf = Bytes.create len in
let ic = open_in fname in
let n = input ic buf 0 len in
let buf = if n = len then buf else Bytes.sub buf 0 n in
let s = Bytes.unsafe_to_string buf in
of_string s
3 changes: 3 additions & 0 deletions src/metadataMP4.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ let tagn =
[
("\xa9nam", "title");
("\xa9ART", "artist");
("cprt", "copyright");
("\xa9too", "encoder");
("\xa9day", "date");
("\xa9cpy", "copyright");
("\xa9gen", "genre");
("\xa9wrt", "composer");
("\xa9alb", "album");
("\xa9des", "description");
("\xa9cmt", "comment");
Expand Down
62 changes: 62 additions & 0 deletions src/metadataRIFF.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
open MetadataBase
module R = Reader

(* Tag normalization. *)
let tagn =
[
("IART", "artist");
("ICMT", "comment");
("ICOP", "copyright");
("ICRD", "date");
("ICRD", "date");
("IGNR", "genre");
("INAM", "title");
("IPRD", "album");
("IPRT", "track");
("ISFT", "encoder");
("ITRK", "track");
]

let parse ?format f : metadata =
if R.read f 4 <> "RIFF" then raise Invalid;
let _ (* file size *) = R.int32_le f in
if format <> None && Some (R.read f 4) <> format then raise Invalid;
let ans = ref [] in
let chunk () =
let tag = R.read f 4 in
let size = R.int32_le f in
if tag <> "LIST" then R.drop f size
else (
let subtag = R.read f 4 in
match subtag with
| "INFO" ->
let remaining = ref (size - 4) in
while !remaining > 0 do
let tag = R.read f 4 in
let size = R.int32_le f in
match R.read_tag ~length:(size - 1) ~label:tag f with
| None -> ()
| Some s ->
R.drop f 1;
(* null-terminated *)
let padding = size mod 2 in
R.drop f padding;
remaining := !remaining - (8 + size + padding);
let tag =
match List.assoc_opt tag tagn with
| Some tag -> tag
| None -> tag
in
ans := (tag, s) :: !ans
done
| "movi" -> raise Exit (* stop parsing there *)
| _ -> R.drop f (size - 4))
in
try
while true do
chunk ()
done;
assert false
with _ -> List.rev !ans

let parse_file ?format ?custom_parser file = R.with_file ?custom_parser (parse ?format) file
3 changes: 3 additions & 0 deletions src/metadataRIFF.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
val parse : ?format:string -> MetadataBase.Reader.t -> MetadataBase.metadata

val parse_file : ?format:string -> ?custom_parser:MetadataBase.custom_parser -> string -> MetadataBase.metadata
3 changes: 3 additions & 0 deletions src/metadataWAV.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
let parse = MetadataRIFF.parse ~format:"WAVE"

let parse_file = MetadataRIFF.parse_file ~format:"WAVE"

0 comments on commit 2c1a44b

Please sign in to comment.