Commit a9a0ee94 authored by jvoisin's avatar jvoisin

Preliminary implementation, it's not working

parent cce5de82
#!/bin/env python3
import re
import tempfile
import os
import json
import subprocess
import shutil
from . import abstract
# A set of extension that aren't supported, despite matching a supported mimetype
unsupported_extensions = {
......@@ -19,3 +26,33 @@ unsupported_extensions = {
'.xsd',
'.xsl',
}
class _ExiftoolGetMetaParser(abstract.AbstractParser):
meta_whitelist = {}
@staticmethod
def _handle_problematic_filename(filename:str, callback) -> str:
""" This method takes a filename with a problematic name,
and safely applies it a `callback`."""
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def get_meta(self):
""" There is no way to escape the leading(s) dash(es) of the current
self.filename to prevent parameter injections, so we need to take care
of this.
"""
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
if re.search('^[a-z0-9/]', self.filename) is None:
out = self._handle_problematic_filename(self.filename, fun)
else:
out = fun(self.filename)
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist:
meta.pop(key, None)
return meta
......@@ -11,37 +11,9 @@ import gi
gi.require_version('GdkPixbuf', '2.0')
from gi.repository import GdkPixbuf
from . import abstract
from . import abstract, _ExiftoolGetMetaParser
class __ImageParser(abstract.AbstractParser):
@staticmethod
def __handle_problematic_filename(filename:str, callback) -> str:
""" This method takes a filename with a problematic name,
and safely applies it a `callback`."""
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def get_meta(self):
""" There is no way to escape the leading(s) dash(es) of the current
self.filename to prevent parameter injections, so we need to take care
of this.
"""
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
if re.search('^[a-z0-9/]', self.filename) is None:
out = self.__handle_problematic_filename(self.filename, fun)
else:
out = fun(self.filename)
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist:
meta.pop(key, None)
return meta
class PNGParser(__ImageParser):
class PNGParser(_ExiftoolGetMetaParser):
mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
......@@ -64,7 +36,7 @@ class PNGParser(__ImageParser):
return True
class GdkPixbufAbstractParser(__ImageParser):
class GdkPixbufAbstractParser(_ExiftoolGetMetaParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
"""
......
......@@ -6,7 +6,7 @@ import os
import zipfile
import tempfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import pdf, images, audio, office, parser_factory, torrent, video
class TestParserFactory(unittest.TestCase):
......@@ -153,6 +153,11 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48')
self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
def test_quicktime(self):
p = video.QuicktimeParser('./tests/data/dirty.mov')
meta = p.get_meta()
self.assertEqual(meta['SoftwareVersion'], 'Lavf55.2.100')
class TestDeepCleaning(unittest.TestCase):
def __check_deep_meta(self, p):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment