Skip to content

Commit

Permalink
default ftyp detection to mp4; fix #562
Browse files Browse the repository at this point in the history
There are too many ftyp codes to look for (about 100 registered + the
unregistered ones.) Previously all ftyps where on the same level in the
detection tree. Now mp4 is parent to all other ftyps.
  • Loading branch information
gabriel-vasile committed Aug 6, 2024
1 parent b36b70f commit 71a146e
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 33 deletions.
32 changes: 21 additions & 11 deletions internal/magic/ftyp.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
package magic

import "bytes"
import (
"bytes"
)

var (
// AVIF matches an AV1 Image File Format still or animated.
// Wikipedia page seems outdated listing image/avif-sequence for animations.
// https://github.com/AOMediaCodec/av1-avif/issues/59
AVIF = ftyp([]byte("avif"), []byte("avis"))
// Mp4 matches an MP4 file.
Mp4 = ftyp(
[]byte("avc1"), []byte("dash"), []byte("iso2"), []byte("iso3"),
[]byte("iso4"), []byte("iso5"), []byte("iso6"), []byte("isom"),
[]byte("mmp4"), []byte("mp41"), []byte("mp42"), []byte("mp4v"),
[]byte("mp71"), []byte("MSNV"), []byte("NDAS"), []byte("NDSC"),
[]byte("NSDC"), []byte("NSDH"), []byte("NDSM"), []byte("NDSP"),
[]byte("NDSS"), []byte("NDXC"), []byte("NDXH"), []byte("NDXM"),
[]byte("NDXP"), []byte("NDXS"), []byte("F4V "), []byte("F4P "),
)
// ThreeGP matches a 3GPP file.
ThreeGP = ftyp(
[]byte("3gp1"), []byte("3gp2"), []byte("3gp3"), []byte("3gp4"),
Expand Down Expand Up @@ -86,3 +78,21 @@ func QuickTime(raw []byte, _ uint32) bool {
}
return bytes.Equal(raw[:8], []byte("\x00\x00\x00\x08wide"))
}

// Mp4 detects an .mp4 file. Mp4 detections only does a basic ftyp check.
// Mp4 has many registered and unregistered code points so it's hard to keep track
// of all. Detection will default on video/mp4 for all ftyp files.
// ISO_IEC_14496-12 is the specification for the iso container.
func Mp4(raw []byte, _ uint32) bool {
if len(raw) < 12 {
return false
}
// ftyps are made out of boxes. The first 4 bytes of the box represent
// its size in big-endian uint32. First box is the ftyp box and it is small
// in size. Check most significant byte is 0 to filter out false positive
// text files that happen to contain the string "ftyp" at index 4.
if raw[0] != 0 {
return false
}
return bytes.Equal(raw[4:8], []byte("ftyp"))
}
3 changes: 0 additions & 3 deletions internal/magic/magic.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,6 @@ func ftyp(sigs ...[]byte) Detector {
if len(raw) < 12 {
return false
}
if !bytes.Equal(raw[4:8], []byte("ftyp")) {
return false
}
for _, s := range sigs {
if bytes.Equal(raw[8:12], s) {
return true
Expand Down
1 change: 1 addition & 0 deletions mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ func BenchmarkCommon(b *testing.B) {
"xls.xls",
"webm.webm",
"csv.csv",
"mp4.mp4",
}
for _, file := range commonFiles {
f, err := os.ReadFile(filepath.Join(testDataDir, file))
Expand Down
20 changes: 10 additions & 10 deletions supported_mimes.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,26 @@ Extension | MIME type | Aliases
**.au** | audio/basic | -
**.mpeg** | video/mpeg | -
**.mov** | video/quicktime | -
**.mqv** | video/quicktime | -
**.mp4** | video/mp4 | -
**.webm** | video/webm | audio/webm
**.avif** | image/avif | -
**.3gp** | video/3gpp | video/3gp, audio/3gpp
**.3g2** | video/3gpp2 | video/3g2, audio/3gpp2
**.mp4** | audio/mp4 | audio/x-m4a, audio/x-mp4a
**.mqv** | video/quicktime | -
**.m4a** | audio/x-m4a | -
**.m4v** | video/x-m4v | -
**.heic** | image/heic | -
**.heic** | image/heic-sequence | -
**.heif** | image/heif | -
**.heif** | image/heif-sequence | -
**.webm** | video/webm | audio/webm
**.avi** | video/x-msvideo | video/avi, video/msvideo
**.flv** | video/x-flv | -
**.mkv** | video/x-matroska | -
**.asf** | video/x-ms-asf | video/asf, video/x-ms-wmv
**.aac** | audio/aac | -
**.voc** | audio/x-unknown | -
**.mp4** | audio/mp4 | audio/x-m4a, audio/x-mp4a
**.m4a** | audio/x-m4a | -
**.m3u** | application/vnd.apple.mpegurl | audio/mpegurl
**.m4v** | video/x-m4v | -
**.rmvb** | application/vnd.rn-realmedia-vbr | -
**.gz** | application/gzip | application/x-gzip, application/x-gunzip, application/gzipped, application/gzip-compressed, application/x-gzip-compressed, gzip/document
**.class** | application/x-java-applet | -
Expand Down Expand Up @@ -118,10 +123,6 @@ Extension | MIME type | Aliases
**.macho** | application/x-mach-binary | -
**.qcp** | audio/qcelp | -
**.icns** | image/x-icns | -
**.heic** | image/heic | -
**.heic** | image/heic-sequence | -
**.heif** | image/heif | -
**.heif** | image/heif-sequence | -
**.hdr** | image/vnd.radiance | -
**.mrc** | application/marc | -
**.mdb** | application/x-msaccess | -
Expand All @@ -138,7 +139,6 @@ Extension | MIME type | Aliases
**.pat** | image/x-gimp-pat | -
**.gbr** | image/x-gimp-gbr | -
**.glb** | model/gltf-binary | -
**.avif** | image/avif | -
**.cab** | application/x-installshield | -
**.jxr** | image/jxr | image/vnd.ms-photo
**.txt** | text/plain | -
Expand Down
17 changes: 8 additions & 9 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@ import (
var root = newMIME("application/octet-stream", "",
func([]byte, uint32) bool { return true },
xpm, sevenZ, zip, pdf, fdf, ole, ps, psd, p7s, ogg, png, jpg, jxl, jp2, jpx,
jpm, jxs, gif, webp, exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac,
midi, ape, musePack, amr, wav, aiff, au, mpeg, quickTime, mqv, mp4, webM,
threeGP, threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, m3u, m4v, rmvb,
gzip, class, swf, crx, ttf, woff, woff2, otf, ttc, eot, wasm, shx, dbf, dcm, rar,
djvu, mobi, lit, bpg, sqlite3, dwg, nes, lnk, macho, qcp, icns, heic,
heicSeq, heif, heifSeq, hdr, mrc, mdb, accdb, zstd, cab, rpm, xz, lzip,
torrent, cpio, tzif, xcf, pat, gbr, glb, avif, cabIS, jxr,
// Keep text last because it is the slowest check
jpm, jxs, gif, webp, exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3,
flac, midi, ape, musePack, amr, wav, aiff, au, mpeg, quickTime, mp4, webM,
avi, flv, mkv, asf, aac, voc, m3u, rmvb, gzip, class, swf, crx, ttf, woff,
woff2, otf, ttc, eot, wasm, shx, dbf, dcm, rar, djvu, mobi, lit, bpg,
sqlite3, dwg, nes, lnk, macho, qcp, icns, hdr, mrc, mdb, accdb, zstd, cab,
rpm, xz, lzip, torrent, cpio, tzif, xcf, pat, gbr, glb, cabIS, jxr,
// Keep text last because it is the slowest check.
text,
)

Expand Down Expand Up @@ -161,7 +160,7 @@ var (
m3u = newMIME("application/vnd.apple.mpegurl", ".m3u", magic.M3u).
alias("audio/mpegurl")
m4v = newMIME("video/x-m4v", ".m4v", magic.M4v)
mp4 = newMIME("video/mp4", ".mp4", magic.Mp4)
mp4 = newMIME("video/mp4", ".mp4", magic.Mp4, avif, threeGP, threeG2, aMp4, mqv, m4a, m4v, heic, heicSeq, heif, heifSeq)
webM = newMIME("video/webm", ".webm", magic.WebM).
alias("audio/webm")
mpeg = newMIME("video/mpeg", ".mpeg", magic.Mpeg)
Expand Down

0 comments on commit 71a146e

Please sign in to comment.