Commit 04bb8c8c authored by jvoisin's avatar jvoisin

Add mp4 support

parent 3a070b0a
Pipeline #20550 passed with stages
in 4 minutes and 37 seconds
......@@ -2,10 +2,37 @@ import os
import subprocess
import logging
from typing import Dict, Union
from . import exiftool
class AVIParser(exiftool.ExiftoolParser):
class AbstractFFmpegParser(exiftool.ExiftoolParser):
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
def remove_all(self) -> bool:
cmd = [_get_ffmpeg_path(),
'-i', self.filename, # input file
'-y', # overwrite existing output file
'-map', '0', # copy everything all streams from input to output
'-codec', 'copy', # don't decode anything, just copy (speed!)
'-loglevel', 'panic', # Don't show log
'-hide_banner', # hide the banner
'-map_metadata', '-1', # remove supperficial metadata
'-map_chapters', '-1', # remove chapters
'-disposition', '0', # Remove dispositions (check ffmpeg's manpage)
'-fflags', '+bitexact', # don't add any metadata
'-flags:v', '+bitexact', # don't add any metadata
'-flags:a', '+bitexact', # don't add any metadata
self.output_filename]
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
return True
class AVIParser(AbstractFFmpegParser):
mimetypes = {'video/x-msvideo', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
......@@ -24,25 +51,55 @@ class AVIParser(exiftool.ExiftoolParser):
'SampleRate', 'AvgBytesPerSec', 'BitsPerSample',
'Duration', 'ImageSize', 'Megapixels'}
class MP4Parser(AbstractFFmpegParser):
mimetypes = {'video/mp4', }
meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
'XResolution', 'YResolution', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
'FileName', 'FilePermissions', 'MIMEType', 'FileType',
'FileTypeExtension', 'Directory', 'ImageWidth',
'ImageSize', 'ImageHeight', 'FileSize', 'SourceFile',
'BitDepth', 'Duration', 'AudioChannels',
'AudioBitsPerSample', 'AudioSampleRate', 'Megapixels',
'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
'SourceImageHeight', 'SourceImageWidth',
'MatrixStructure', 'MediaDuration'}
meta_key_value_whitelist = { # some metadata are mandatory :/
'CreateDate': '0000:00:00 00:00:00',
'CurrentTime': '0 s',
'MediaCreateDate': '0000:00:00 00:00:00',
'MediaLanguageCode': 'und',
'MediaModifyDate': '0000:00:00 00:00:00',
'ModifyDate': '0000:00:00 00:00:00',
'OpColor': '0 0 0',
'PosterTime': '0 s',
'PreferredRate': '1',
'PreferredVolume': '100.00%',
'PreviewDuration': '0 s',
'PreviewTime': '0 s',
'SelectionDuration': '0 s',
'SelectionTime': '0 s',
'TrackCreateDate': '0000:00:00 00:00:00',
'TrackModifyDate': '0000:00:00 00:00:00',
'TrackVolume': '0.00%',
}
def remove_all(self) -> bool:
cmd = [_get_ffmpeg_path(),
'-i', self.filename, # input file
'-y', # overwrite existing output file
'-loglevel', 'panic', # Don't show log
'-hide_banner', # hide the banner
'-codec', 'copy', # don't decode anything, just copy (speed!)
'-map_metadata', '-1', # remove supperficial metadata
'-map_chapters', '-1', # remove chapters
'-fflags', '+bitexact', # don't add any metadata
'-flags:v', '+bitexact', # don't add any metadata
'-flags:a', '+bitexact', # don't add any metadata
self.output_filename]
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
return True
logging.warning('The format of "%s" (video/mp4) has some mandatory '
'metadata fields; mat2 filled them with standard data.',
self.filename)
return super().remove_all()
def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = super().get_meta()
ret = dict() # type: Dict[str, Union[str, dict]]
for key, value in meta.items():
if key in self.meta_key_value_whitelist.keys():
if value == self.meta_key_value_whitelist[key]:
continue
ret[key] = value
return ret
def _get_ffmpeg_path() -> str: # pragma: no cover
......
......@@ -20,6 +20,8 @@ __version__ = '0.5.0'
assert Tuple
assert Union
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
def __check_file(filename: str, mode: int=os.R_OK) -> bool:
if not os.path.exists(filename):
......
......@@ -521,3 +521,26 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/dirty.cleaned.cleaned.zip')
def test_mp4(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.mp4', './tests/data/clean.mp4')
p = video.MP4Parser('./tests/data/clean.mp4')
meta = p.get_meta()
self.assertEqual(meta['Encoder'], 'HandBrake 0.9.4 2009112300')
ret = p.remove_all()
self.assertTrue(ret)
p = video.MP4Parser('./tests/data/clean.cleaned.mp4')
self.assertNotIn('Encoder', p.get_meta())
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.mp4')
os.remove('./tests/data/clean.cleaned.mp4')
os.remove('./tests/data/clean.cleaned.cleaned.mp4')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment