From fa7d18784ca18c5c70bbb36cde9d5915843456c3 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Wed, 16 May 2018 22:36:59 +0200
Subject: [PATCH] Do a pylint pass

---
 main.py               | 20 +++++++--------
 src/__init__.py       |  3 ++-
 src/audio.py          |  2 +-
 src/harmless.py       |  1 +
 src/images.py         | 59 +++++++++++++++++++++++--------------------
 src/office.py         | 27 ++++++++++----------
 src/parser_factory.py |  2 +-
 src/pdf.py            | 12 +++++----
 src/torrent.py        | 42 +++++++++++++++---------------
 9 files changed, 89 insertions(+), 79 deletions(-)

diff --git a/main.py b/main.py
index ab07641..a31adaa 100755
--- a/main.py
+++ b/main.py
@@ -12,7 +12,7 @@ from src import parser_factory, unsupported_extensions
 
 __version__ = '0.1.0'
 
-def __check_file(filename:str, mode:int = os.R_OK) -> bool:
+def __check_file(filename: str, mode: int = os.R_OK) -> bool:
     if not os.path.isfile(filename):
         print("[-] %s is not a regular file." % filename)
         return False
@@ -26,9 +26,9 @@ def create_arg_parser():
     parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
     parser.add_argument('files', nargs='*')
     parser.add_argument('-v', '--version', action='version',
-            version='MAT2 %s' % __version__)
+                        version='MAT2 %s' % __version__)
     parser.add_argument('-l', '--list', action='store_true',
-                      help='list all supported fileformats')
+                        help='list all supported fileformats')
 
     info = parser.add_mutually_exclusive_group()
     info.add_argument('-c', '--check', action='store_true',
@@ -40,7 +40,7 @@ def create_arg_parser():
     return parser
 
 
-def show_meta(filename:str):
+def show_meta(filename: str):
     if not __check_file(filename):
         return
 
@@ -48,18 +48,18 @@ def show_meta(filename:str):
     if p is None:
         print("[-] %s's format (%s) is not supported" % (filename, mtype))
         return
+
     print("[+] Metadata for %s:" % filename)
-    for k,v in p.get_meta().items():
+    for k, v in p.get_meta().items():
         try:  # FIXME this is ugly.
             print("  %s: %s" % (k, v))
         except UnicodeEncodeError:
             print("  %s: harmful content" % k)
 
-
-def clean_meta(params:Tuple[str, bool]) -> bool:
+def clean_meta(params: Tuple[str, bool]) -> bool:
     filename, is_lightweigth = params
     if not __check_file(filename, os.R_OK|os.W_OK):
-        return
+        return False
 
     p, mtype = parser_factory.get_parser(filename)
     if p is None:
@@ -102,12 +102,12 @@ def main():
         if not args.list:
             return arg_parser.print_help()
         show_parsers()
-        return
+        return 0
 
     elif args.show:
         for f in __get_files_recursively(args.files):
             show_meta(f)
-        return
+        return 0
 
     else:
         p = multiprocessing.Pool()
diff --git a/src/__init__.py b/src/__init__.py
index 3f5c478..07d3036 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -2,4 +2,5 @@
 
 # A set of extension that aren't supported, despite matching a supported mimetype
 unsupported_extensions = set(['bat', 'c', 'h', 'ksh', 'pl', 'txt', 'asc',
-    'text', 'pot', 'brf', 'srt', 'rdf', 'wsdl', 'xpdl', 'xsl', 'xsd'])
+                              'text', 'pot', 'brf', 'srt', 'rdf', 'wsdl',
+                              'xpdl', 'xsl', 'xsd'])
diff --git a/src/audio.py b/src/audio.py
index 4a385b2..3a6aa79 100644
--- a/src/audio.py
+++ b/src/audio.py
@@ -9,7 +9,7 @@ class MutagenParser(abstract.AbstractParser):
     def get_meta(self):
         f = mutagen.File(self.filename)
         if f.tags:
-            return {k:', '.join(v) for k,v in f.tags.items()}
+            return {k:', '.join(v) for k, v in f.tags.items()}
         return {}
 
     def remove_all(self):
diff --git a/src/harmless.py b/src/harmless.py
index fbc2897..aa00582 100644
--- a/src/harmless.py
+++ b/src/harmless.py
@@ -6,6 +6,7 @@ class HarmlessParser(abstract.AbstractParser):
     mimetypes = {'application/xml', 'text/plain'}
 
     def __init__(self, filename: str):
+        super().__init__(filename)
         self.filename = filename
         self.output_filename = filename
 
diff --git a/src/images.py b/src/images.py
index 6cc3dfe..c84952a 100644
--- a/src/images.py
+++ b/src/images.py
@@ -14,11 +14,12 @@ from . import abstract
 class PNGParser(abstract.AbstractParser):
     mimetypes = {'image/png', }
     meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
-            'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate',
-            "FileInodeChangeDate", 'FilePermissions', 'FileType',
-            'FileTypeExtension', 'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType',
-            'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize',
-            'Megapixels', 'ImageHeight'}
+                      'Directory', 'FileSize', 'FileModifyDate',
+                      'FileAccessDate', 'FileInodeChangeDate',
+                      'FilePermissions', 'FileType', 'FileTypeExtension',
+                      'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType',
+                      'Compression', 'Filter', 'Interlace', 'BackgroundColor',
+                      'ImageSize', 'Megapixels', 'ImageHeight'}
 
     def __init__(self, filename):
         super().__init__(filename)
@@ -63,36 +64,38 @@ class GdkPixbufAbstractParser(abstract.AbstractParser):
 class JPGParser(GdkPixbufAbstractParser):
     mimetypes = {'image/jpeg'}
     meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
-            'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate',
-            "FileInodeChangeDate", 'FilePermissions', 'FileType',
-            'FileTypeExtension', 'MIMEType', 'ImageWidth',
-            'ImageSize', 'BitsPerSample', 'ColorComponents', 'EncodingProcess',
-            'JFIFVersion', 'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
-            'YResolution', 'Megapixels', 'ImageHeight'}
+                      'Directory', 'FileSize', 'FileModifyDate',
+                      'FileAccessDate', "FileInodeChangeDate",
+                      'FilePermissions', 'FileType', 'FileTypeExtension',
+                      'MIMEType', 'ImageWidth', 'ImageSize', 'BitsPerSample',
+                      'ColorComponents', 'EncodingProcess', 'JFIFVersion',
+                      'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
+                      'YResolution', 'Megapixels', 'ImageHeight'}
 
 
 class TiffParser(GdkPixbufAbstractParser):
     mimetypes = {'image/tiff'}
     meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
-            'FillOrder', 'PhotometricInterpretation', 'PlanarConfiguration',
-            'RowsPerStrip', 'SamplesPerPixel', 'StripByteCounts',
-            'StripOffsets', 'BitsPerSample', 'Directory', 'ExifToolVersion',
-            'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
-            'FileName', 'FilePermissions', 'FileSize', 'FileType',
-            'FileTypeExtension', 'ImageHeight', 'ImageSize', 'ImageWidth',
-            'MIMEType', 'Megapixels', 'SourceFile'}
+                      'FillOrder', 'PhotometricInterpretation',
+                      'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
+                      'StripByteCounts', 'StripOffsets', 'BitsPerSample',
+                      'Directory', 'ExifToolVersion', 'FileAccessDate',
+                      'FileInodeChangeDate', 'FileModifyDate', 'FileName',
+                      'FilePermissions', 'FileSize', 'FileType',
+                      'FileTypeExtension', 'ImageHeight', 'ImageSize',
+                      'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
 
 
 class BMPParser(GdkPixbufAbstractParser):
     mimetypes = {'image/x-ms-bmp'}
     meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
-            'FileSize', 'FileModifyDate', 'FileAccessDate',
-            'FileInodeChangeDate', 'FilePermissions', 'FileType',
-            'FileTypeExtension', 'MIMEType', 'BMPVersion', 'ImageWidth',
-            'ImageHeight', 'Planes', 'BitDepth', 'Compression', 'ImageLength',
-            'PixelsPerMeterX', 'PixelsPerMeterY', 'NumColors',
-            'NumImportantColors', 'RedMask', 'GreenMask', 'BlueMask',
-            'AlphaMask', 'ColorSpace', 'RedEndpoint', 'GreenEndpoint',
-            'BlueEndpoint', 'GammaRed', 'GammaGreen', 'GammaBlue', 'ImageSize',
-            'Megapixels'}
-
+                      'FileSize', 'FileModifyDate', 'FileAccessDate',
+                      'FileInodeChangeDate', 'FilePermissions', 'FileType',
+                      'FileTypeExtension', 'MIMEType', 'BMPVersion',
+                      'ImageWidth', 'ImageHeight', 'Planes', 'BitDepth',
+                      'Compression', 'ImageLength', 'PixelsPerMeterX',
+                      'PixelsPerMeterY', 'NumColors', 'NumImportantColors',
+                      'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask',
+                      'ColorSpace', 'RedEndpoint', 'GreenEndpoint',
+                      'BlueEndpoint', 'GammaRed', 'GammaGreen', 'GammaBlue',
+                      'ImageSize', 'Megapixels'}
diff --git a/src/office.py b/src/office.py
index da6168e..749fc7d 100644
--- a/src/office.py
+++ b/src/office.py
@@ -9,14 +9,14 @@ from . import abstract, parser_factory
 
 
 class ArchiveBasedAbstractParser(abstract.AbstractParser):
-    def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
+    def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
         zipinfo.compress_type = zipfile.ZIP_DEFLATED
         zipinfo.create_system = 3  # Linux
         zipinfo.comment = b''
         zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
         return zipinfo
 
-    def _get_zipinfo_meta(self, zipinfo:zipfile.ZipInfo) -> dict:
+    def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> dict:
         metadata = {}
         if zipinfo.create_system == 3:
             #metadata['create_system'] = 'Linux'
@@ -35,7 +35,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
         return metadata
 
 
-    def _clean_internal_file(self, item:zipfile.ZipInfo, temp_folder:str, zin:zipfile.ZipFile, zout:zipfile.ZipFile):
+    def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str,
+                             zin: zipfile.ZipFile, zout: zipfile.ZipFile):
         zin.extract(member=item, path=temp_folder)
         tmp_parser, mtype = parser_factory.get_parser(os.path.join(temp_folder, item.filename))
         if not tmp_parser:
@@ -50,9 +51,9 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
 
 class MSOfficeParser(ArchiveBasedAbstractParser):
     mimetypes = {
-            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
-            'application/vnd.openxmlformats-officedocument.presentationml.presentation'
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        'application/vnd.openxmlformats-officedocument.presentationml.presentation'
     }
     files_to_keep = {'_rels/.rels', 'word/_rels/document.xml.rels'}
 
@@ -103,13 +104,13 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
 
 class LibreOfficeParser(ArchiveBasedAbstractParser):
     mimetypes = {
-            'application/vnd.oasis.opendocument.text',
-            'application/vnd.oasis.opendocument.spreadsheet',
-            'application/vnd.oasis.opendocument.presentation',
-            'application/vnd.oasis.opendocument.graphics',
-            'application/vnd.oasis.opendocument.chart',
-            'application/vnd.oasis.opendocument.formula',
-            'application/vnd.oasis.opendocument.image',
+        'application/vnd.oasis.opendocument.text',
+        'application/vnd.oasis.opendocument.spreadsheet',
+        'application/vnd.oasis.opendocument.presentation',
+        'application/vnd.oasis.opendocument.graphics',
+        'application/vnd.oasis.opendocument.chart',
+        'application/vnd.oasis.opendocument.formula',
+        'application/vnd.oasis.opendocument.image',
     }
 
     def get_meta(self):
diff --git a/src/parser_factory.py b/src/parser_factory.py
index 2c30659..48616b0 100644
--- a/src/parser_factory.py
+++ b/src/parser_factory.py
@@ -2,10 +2,10 @@ import os
 import mimetypes
 import importlib
 import pkgutil
+from typing import TypeVar
 
 from . import abstract, unsupported_extensions
 
-from typing import TypeVar
 
 T = TypeVar('T', bound='abstract.AbstractParser')
 
diff --git a/src/pdf.py b/src/pdf.py
index fbc5175..5b99192 100644
--- a/src/pdf.py
+++ b/src/pdf.py
@@ -21,8 +21,8 @@ logging.basicConfig(level=logging.DEBUG)
 class PDFParser(abstract.AbstractParser):
     mimetypes = {'application/pdf', }
     meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
-            'metadata', 'mod-date', 'producer', 'subject', 'title',
-            'viewer-preferences'}
+                 'metadata', 'mod-date', 'producer', 'subject', 'title',
+                 'viewer-preferences'}
 
     def __init__(self, filename):
         super().__init__(filename)
@@ -103,7 +103,8 @@ class PDFParser(abstract.AbstractParser):
 
         return True
 
-    def __remove_superficial_meta(self, in_file:str, out_file: str) -> bool:
+    @staticmethod
+    def __remove_superficial_meta(in_file: str, out_file: str) -> bool:
         document = Poppler.Document.new_from_file('file://' + in_file)
         document.set_producer('')
         document.set_creator('')
@@ -112,7 +113,8 @@ class PDFParser(abstract.AbstractParser):
         return True
 
 
-    def __parse_metadata_field(self, data:str) -> dict:
+    @staticmethod
+    def __parse_metadata_field(data: str) -> dict:
         metadata = {}
         for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
             metadata[key] = value
@@ -128,6 +130,6 @@ class PDFParser(abstract.AbstractParser):
             if document.get_property(key):
                 metadata[key] = document.get_property(key)
         if 'metadata' in metadata:
-            parsed_meta =  self.__parse_metadata_field(metadata['metadata'])
+            parsed_meta = self.__parse_metadata_field(metadata['metadata'])
             return {**metadata, **parsed_meta}
         return metadata
diff --git a/src/torrent.py b/src/torrent.py
index bdf83ce..cb4b5e3 100644
--- a/src/torrent.py
+++ b/src/torrent.py
@@ -11,7 +11,7 @@ class TorrentParser(abstract.AbstractParser):
             d = _BencodeHandler().bdecode(f.read())
         if d is None:
             return {'Unknown meta': 'Unable to parse torrent file "%s".' % self.filename}
-        for k,v in d.items():
+        for k, v in d.items():
             if k not in self.whitelist:
                 metadata[k.decode('utf-8')] = v
         return metadata
@@ -23,7 +23,7 @@ class TorrentParser(abstract.AbstractParser):
             d = _BencodeHandler().bdecode(f.read())
         if d is None:
             return False
-        for k,v in d.items():
+        for k, v in d.items():
             if k in self.whitelist:
                 cleaned[k] = v
         with open(self.output_filename, 'wb') as f:
@@ -39,21 +39,22 @@ class _BencodeHandler(object):
     """
     def __init__(self):
         self.__decode_func = {
-                    ord('d'): self.__decode_dict,
-                    ord('i'): self.__decode_int,
-                    ord('l'): self.__decode_list,
-            }
+            ord('d'): self.__decode_dict,
+            ord('i'): self.__decode_int,
+            ord('l'): self.__decode_list,
+        }
         for i in range(0, 10):
             self.__decode_func[ord(str(i))] = self.__decode_string
 
         self.__encode_func = {
-                bytes: self.__encode_string,
-                dict: self.__encode_dict,
-                int: self.__encode_int,
-                list: self.__encode_list,
+            bytes: self.__encode_string,
+            dict: self.__encode_dict,
+            int: self.__encode_int,
+            list: self.__encode_list,
         }
 
-    def __decode_int(self, s:str) -> (int, str):
+    @staticmethod
+    def __decode_int(s: str) -> (int, str):
         s = s[1:]
         next_idx = s.index(b'e')
         if s.startswith(b'-0'):
@@ -62,7 +63,8 @@ class _BencodeHandler(object):
             raise ValueError  # no leading zero except for zero itself
         return int(s[:next_idx]), s[next_idx+1:]
 
-    def __decode_string(self, s:str) -> (str, str):
+    @staticmethod
+    def __decode_string(s: str) -> (str, str):
         sep = s.index(b':')
         str_len = int(s[:sep])
         if str_len < 0:
@@ -72,7 +74,7 @@ class _BencodeHandler(object):
         s = s[1:]
         return s[sep:sep+str_len], s[sep+str_len:]
 
-    def __decode_list(self, s:str) -> (list, str):
+    def __decode_list(self, s: str) -> (list, str):
         r = list()
         s = s[1:]  # skip leading `l`
         while s[0] != ord('e'):
@@ -80,7 +82,7 @@ class _BencodeHandler(object):
             r.append(v)
         return r, s[1:]
 
-    def __decode_dict(self, s:str) -> (dict, str):
+    def __decode_dict(self, s: str) -> (dict, str):
         r = dict()
         s = s[1:]  # skip leading `d`
         while s[0] != ord(b'e'):
@@ -89,30 +91,30 @@ class _BencodeHandler(object):
         return r, s[1:]
 
     @staticmethod
-    def __encode_int(x:str) -> bytes:
+    def __encode_int(x: str) -> bytes:
         return b'i' + bytes(str(x), 'utf-8') + b'e'
 
     @staticmethod
-    def __encode_string(x:str) -> bytes:
+    def __encode_string(x: str) -> bytes:
         return bytes((str(len(x))), 'utf-8') + b':' + x
 
-    def __encode_list(self, x:str) -> bytes:
+    def __encode_list(self, x: str) -> bytes:
         ret = b''
         for i in x:
             ret += self.__encode_func[type(i)](i)
         return b'l' + ret + b'e'
 
-    def __encode_dict(self, x:str) -> bytes:
+    def __encode_dict(self, x: str) -> bytes:
         ret = b''
         for k, v in sorted(x.items()):
             ret += self.__encode_func[type(k)](k)
             ret += self.__encode_func[type(v)](v)
         return b'd' + ret + b'e'
 
-    def bencode(self, s:str) -> bytes:
+    def bencode(self, s: str) -> bytes:
         return self.__encode_func[type(s)](s)
 
-    def bdecode(self, s:str):
+    def bdecode(self, s: str):
         try:
             r, l = self.__decode_func[s[0]](s)
         except (IndexError, KeyError, ValueError) as e:
-- 
GitLab