diff --git a/libmat2/__init__.py b/libmat2/__init__.py index 91a51d8f28aaf2976595e4a0e3c0fbbae2cc079c..8ac5b094e41913693ced79939d9189b3fdfc3cd6 100644 --- a/libmat2/__init__.py +++ b/libmat2/__init__.py @@ -1,4 +1,11 @@ -#!/bin/env python3 +import re +import tempfile +import os +import json +import subprocess +import shutil + +from . import abstract # A set of extension that aren't supported, despite matching a supported mimetype unsupported_extensions = { @@ -19,3 +26,33 @@ unsupported_extensions = { '.xsd', '.xsl', } + +class _ExiftoolGetMetaParser(abstract.AbstractParser): + meta_whitelist = {} + + @staticmethod + def _handle_problematic_filename(filename:str, callback) -> str: + """ This method takes a filename with a problematic name, + and safely applies it a `callback`.""" + tmpdirname = tempfile.mkdtemp() + fname = os.path.join(tmpdirname, "temp_file") + shutil.copy(filename, fname) + out = callback(fname) + shutil.rmtree(tmpdirname) + return out + + def get_meta(self): + """ There is no way to escape the leading(s) dash(es) of the current + self.filename to prevent parameter injections, so we need to take care + of this. + """ + fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f]) + if re.search('^[a-z0-9/]', self.filename) is None: + out = self._handle_problematic_filename(self.filename, fun) + else: + out = fun(self.filename) + meta = json.loads(out.decode('utf-8'))[0] + for key in self.meta_whitelist: + meta.pop(key, None) + return meta + diff --git a/libmat2/images.py b/libmat2/images.py index 03718e6c5eddf35dba0c12f9f57bcb16b129de8e..052ea4b09e358a6e20ac2d845eacfb62d4e7947b 100644 --- a/libmat2/images.py +++ b/libmat2/images.py @@ -11,37 +11,9 @@ import gi gi.require_version('GdkPixbuf', '2.0') from gi.repository import GdkPixbuf -from . import abstract +from . import abstract, _ExiftoolGetMetaParser - -class __ImageParser(abstract.AbstractParser): - @staticmethod - def __handle_problematic_filename(filename:str, callback) -> str: - """ This method takes a filename with a problematic name, - and safely applies it a `callback`.""" - tmpdirname = tempfile.mkdtemp() - fname = os.path.join(tmpdirname, "temp_file") - shutil.copy(filename, fname) - out = callback(fname) - shutil.rmtree(tmpdirname) - return out - - def get_meta(self): - """ There is no way to escape the leading(s) dash(es) of the current - self.filename to prevent parameter injections, so we need to take care - of this. - """ - fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f]) - if re.search('^[a-z0-9/]', self.filename) is None: - out = self.__handle_problematic_filename(self.filename, fun) - else: - out = fun(self.filename) - meta = json.loads(out.decode('utf-8'))[0] - for key in self.meta_whitelist: - meta.pop(key, None) - return meta - -class PNGParser(__ImageParser): +class PNGParser(_ExiftoolGetMetaParser): mimetypes = {'image/png', } meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', @@ -64,7 +36,7 @@ class PNGParser(__ImageParser): return True -class GdkPixbufAbstractParser(__ImageParser): +class GdkPixbufAbstractParser(_ExiftoolGetMetaParser): """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it, this has the side-effect of removing metadata completely. """ diff --git a/tests/data/dirty.mov b/tests/data/dirty.mov new file mode 100644 index 0000000000000000000000000000000000000000..5e9d1788982473b16d2f8b92e79f5d53cba95093 Binary files /dev/null and b/tests/data/dirty.mov differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 4b312dedd3cf43c4e35a16e819922bb42b950d5f..34e3cb3f89ed7a8f0a55e10e54d2188f63b16794 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -6,7 +6,7 @@ import os import zipfile import tempfile -from libmat2 import pdf, images, audio, office, parser_factory, torrent +from libmat2 import pdf, images, audio, office, parser_factory, torrent, video class TestParserFactory(unittest.TestCase): @@ -153,6 +153,11 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48') self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202') + def test_quicktime(self): + p = video.QuicktimeParser('./tests/data/dirty.mov') + meta = p.get_meta() + self.assertEqual(meta['SoftwareVersion'], 'Lavf55.2.100') + class TestDeepCleaning(unittest.TestCase): def __check_deep_meta(self, p):