Skip to content
Snippets Groups Projects
Commit a9a0ee94 authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Preliminary implementation, it's not working

parent cce5de82
No related branches found
No related tags found
No related merge requests found
#!/bin/env python3
import re
import tempfile
import os
import json
import subprocess
import shutil
from . import abstract
# A set of extension that aren't supported, despite matching a supported mimetype
unsupported_extensions = {
......@@ -19,3 +26,33 @@ unsupported_extensions = {
'.xsd',
'.xsl',
}
class _ExiftoolGetMetaParser(abstract.AbstractParser):
meta_whitelist = {}
@staticmethod
def _handle_problematic_filename(filename:str, callback) -> str:
""" This method takes a filename with a problematic name,
and safely applies it a `callback`."""
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def get_meta(self):
""" There is no way to escape the leading(s) dash(es) of the current
self.filename to prevent parameter injections, so we need to take care
of this.
"""
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
if re.search('^[a-z0-9/]', self.filename) is None:
out = self._handle_problematic_filename(self.filename, fun)
else:
out = fun(self.filename)
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist:
meta.pop(key, None)
return meta
......@@ -11,37 +11,9 @@ import gi
gi.require_version('GdkPixbuf', '2.0')
from gi.repository import GdkPixbuf
from . import abstract
from . import abstract, _ExiftoolGetMetaParser
class __ImageParser(abstract.AbstractParser):
@staticmethod
def __handle_problematic_filename(filename:str, callback) -> str:
""" This method takes a filename with a problematic name,
and safely applies it a `callback`."""
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def get_meta(self):
""" There is no way to escape the leading(s) dash(es) of the current
self.filename to prevent parameter injections, so we need to take care
of this.
"""
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
if re.search('^[a-z0-9/]', self.filename) is None:
out = self.__handle_problematic_filename(self.filename, fun)
else:
out = fun(self.filename)
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist:
meta.pop(key, None)
return meta
class PNGParser(__ImageParser):
class PNGParser(_ExiftoolGetMetaParser):
mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
......@@ -64,7 +36,7 @@ class PNGParser(__ImageParser):
return True
class GdkPixbufAbstractParser(__ImageParser):
class GdkPixbufAbstractParser(_ExiftoolGetMetaParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
"""
......
File added
......@@ -6,7 +6,7 @@ import os
import zipfile
import tempfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import pdf, images, audio, office, parser_factory, torrent, video
class TestParserFactory(unittest.TestCase):
......@@ -153,6 +153,11 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48')
self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
def test_quicktime(self):
p = video.QuicktimeParser('./tests/data/dirty.mov')
meta = p.get_meta()
self.assertEqual(meta['SoftwareVersion'], 'Lavf55.2.100')
class TestDeepCleaning(unittest.TestCase):
def __check_deep_meta(self, p):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment