It's Riseup's twentieth year, help keep us going! If you can afford it, please contribute to our winter fundraising drive.

Commit a9a0ee94 authored by jvoisin's avatar jvoisin

Preliminary implementation, it's not working

parent cce5de82
#!/bin/env python3
import re
import tempfile
import os
import json
import subprocess
import shutil
from . import abstract
# A set of extension that aren't supported, despite matching a supported mimetype
unsupported_extensions = {
......@@ -19,3 +26,33 @@ unsupported_extensions = {
'.xsd',
'.xsl',
}
class _ExiftoolGetMetaParser(abstract.AbstractParser):
meta_whitelist = {}
@staticmethod
def _handle_problematic_filename(filename:str, callback) -> str:
""" This method takes a filename with a problematic name,
and safely applies it a `callback`."""
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def get_meta(self):
""" There is no way to escape the leading(s) dash(es) of the current
self.filename to prevent parameter injections, so we need to take care
of this.
"""
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
if re.search('^[a-z0-9/]', self.filename) is None:
out = self._handle_problematic_filename(self.filename, fun)
else:
out = fun(self.filename)
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist:
meta.pop(key, None)
return meta
......@@ -11,37 +11,9 @@ import gi
gi.require_version('GdkPixbuf', '2.0')
from gi.repository import GdkPixbuf
from . import abstract
from . import abstract, _ExiftoolGetMetaParser
class __ImageParser(abstract.AbstractParser):
@staticmethod
def __handle_problematic_filename(filename:str, callback) -> str:
""" This method takes a filename with a problematic name,
and safely applies it a `callback`."""
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def get_meta(self):
""" There is no way to escape the leading(s) dash(es) of the current
self.filename to prevent parameter injections, so we need to take care
of this.
"""
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
if re.search('^[a-z0-9/]', self.filename) is None:
out = self.__handle_problematic_filename(self.filename, fun)
else:
out = fun(self.filename)
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist:
meta.pop(key, None)
return meta
class PNGParser(__ImageParser):
class PNGParser(_ExiftoolGetMetaParser):
mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
......@@ -64,7 +36,7 @@ class PNGParser(__ImageParser):
return True
class GdkPixbufAbstractParser(__ImageParser):
class GdkPixbufAbstractParser(_ExiftoolGetMetaParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
"""
......
......@@ -6,7 +6,7 @@ import os
import zipfile
import tempfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import pdf, images, audio, office, parser_factory, torrent, video
class TestParserFactory(unittest.TestCase):
......@@ -153,6 +153,11 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48')
self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
def test_quicktime(self):
p = video.QuicktimeParser('./tests/data/dirty.mov')
meta = p.get_meta()
self.assertEqual(meta['SoftwareVersion'], 'Lavf55.2.100')
class TestDeepCleaning(unittest.TestCase):
def __check_deep_meta(self, p):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment