Skip to content
Snippets Groups Projects
Commit 0239ab3b authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Add some white lines to make the code more compliant

parent 9fa76c4c
No related branches found
No related tags found
No related merge requests found
......@@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool:
return False
return True
def create_arg_parser():
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*')
......@@ -29,6 +30,7 @@ def create_arg_parser():
help='list all the harmful metadata of a file without removing them')
return parser
def show_meta(filename:str):
if not __check_file(filename):
return
......@@ -44,6 +46,7 @@ def show_meta(filename:str):
except UnicodeEncodeError:
print(" %s: harmful content" % k)
def clean_meta(filename:str):
if not __check_file(filename, os.R_OK|os.W_OK):
return
......@@ -54,6 +57,7 @@ def clean_meta(filename:str):
return
p.remove_all()
def show_parsers():
print('[+] Supported formats:')
for parser in parser_factory._get_parsers():
......@@ -61,6 +65,7 @@ def show_parsers():
extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
print(' - %s (%s)' % (mtype, extensions))
def __get_files_recursively(files):
for f in files:
if os.path.isfile(f):
......
import abc
class AbstractParser(abc.ABC):
meta_list = set()
mimetypes = set()
......
......@@ -4,6 +4,7 @@ import mutagen
from . import abstract
class MutagenParser(abstract.AbstractParser):
def get_meta(self):
f = mutagen.File(self.filename)
......@@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser):
f.save()
return True
class MP3Parser(MutagenParser):
mimetypes = {'audio/mpeg', }
......@@ -28,8 +30,10 @@ class MP3Parser(MutagenParser):
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
return metadata
class OGGParser(MutagenParser):
mimetypes = {'audio/ogg', }
class FLACParser(MutagenParser):
mimetypes = {'audio/flac', }
from . import abstract
class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that do not contain metadata. """
mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}
......
......@@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf
from . import abstract
class PNGParser(abstract.AbstractParser):
mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
......@@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser):
surface.write_to_png(self.output_filename)
return True
class GdkPixbufAbstractParser(abstract.AbstractParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
......
......@@ -7,6 +7,7 @@ import zipfile
from . import abstract, parser_factory
class ArchiveBasedAbstractParser(abstract.AbstractParser):
def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
zipinfo.compress_type = zipfile.ZIP_DEFLATED
......@@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
with open(tmp_parser.output_filename, 'rb') as f:
zout.writestr(clean_zinfo, f.read())
class MSOfficeParser(ArchiveBasedAbstractParser):
mimetypes = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
......
......@@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'):
continue
importlib.import_module(name)
def _get_parsers() -> list:
""" Get all our parsers!"""
def __get_parsers(cls):
......@@ -23,6 +24,7 @@ def _get_parsers() -> list:
[g for s in cls.__subclasses__() for g in __get_parsers(s)]
return __get_parsers(abstract.AbstractParser)
def get_parser(filename: str) -> (T, str):
mtype, _ = mimetypes.guess_type(filename)
......
......@@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase):
stdout, _ = proc.communicate()
self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],
......
......@@ -8,6 +8,7 @@ import tempfile
from src import pdf, images, audio, office, parser_factory
class TestParserFactory(unittest.TestCase):
def test_subsubcalss(self):
""" Test that our module auto-detection is handling sub-sub-classes """
......@@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase):
self.assertEqual(mimetype, 'audio/mpeg')
self.assertEqual(parser.__class__, audio.MP3Parser)
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
p = pdf.PDFParser('./tests/data/dirty.pdf')
......@@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase):
os.remove('./tests/data/clean.odt')
class TestCleaning(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment