Verified Commit 5ac91cd4 authored by Brolf's avatar Brolf Committed by georg
Browse files

Refactor {black,white}list into {block,allow}list

Closes #96
parent c3f097a8
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser):
    from a import file, hence why several parsers are re-using its `get_meta`
    method.
    """
    meta_whitelist = set()  # type: Set[str]
    meta_allowlist = set()  # type: Set[str]

    def get_meta(self) -> Dict[str, Union[str, dict]]:
        out = subprocess.run([_get_exiftool_path(), '-json', self.filename],
                             input_filename=self.filename,
                             check=True, stdout=subprocess.PIPE).stdout
        meta = json.loads(out.decode('utf-8'))[0]
        for key in self.meta_whitelist:
        for key in self.meta_allowlist:
            meta.pop(key, None)
        return meta

+4 −4
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ assert Set

class PNGParser(exiftool.ExiftoolParser):
    mimetypes = {'image/png', }
    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
                      'Directory', 'FileSize', 'FileModifyDate',
                      'FileAccessDate', 'FileInodeChangeDate',
                      'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser):

class GIFParser(exiftool.ExiftoolParser):
    mimetypes = {'image/gif'}
    meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
    meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
                      'ColorResolutionDepth', 'Directory', 'Duration',
                      'ExifToolVersion', 'FileAccessDate',
                      'FileInodeChangeDate', 'FileModifyDate', 'FileName',
@@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
class JPGParser(GdkPixbufAbstractParser):
    _type = 'jpeg'
    mimetypes = {'image/jpeg'}
    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
                      'Directory', 'FileSize', 'FileModifyDate',
                      'FileAccessDate', "FileInodeChangeDate",
                      'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
class TiffParser(GdkPixbufAbstractParser):
    _type = 'tiff'
    mimetypes = {'image/tiff'}
    meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
    meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
                      'FillOrder', 'PhotometricInterpretation',
                      'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
                      'StripByteCounts', 'StripOffsets', 'BitsPerSample',
+3 −3
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
            r'^word/theme',
            r'^word/people\.xml$',

            # we have a whitelist in self.files_to_keep,
            # we have an allowlist in self.files_to_keep,
            # so we can trash everything else
            r'^word/_rels/',
        }))
@@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
    def __fill_files_to_keep_via_content_types(self) -> bool:
        """ There is a suer-handy `[Content_Types].xml` file
        in MS Office archives, describing what each other file contains.
        The self.content_types_to_keep member contains a type whitelist,
        The self.content_types_to_keep member contains a type allowlist,
        so we're using it to fill the self.files_to_keep one.
        """
        with zipfile.ZipFile(self.filename) as zin:
@@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
                for file_to_omit in self.files_to_omit:
                    if file_to_omit.search(fname):
                        matches = map(lambda r: r.search(fname), self.files_to_keep)
                        if any(matches):  # the file is whitelisted
                        if any(matches):  # the file is in the allowlist
                            continue
                        removed_fnames.add(fname)
                        break
+3 −3
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ from . import abstract

class TorrentParser(abstract.AbstractParser):
    mimetypes = {'application/x-bittorrent', }
    whitelist = {b'announce', b'announce-list', b'info'}
    allowlist = {b'announce', b'announce-list', b'info'}

    def __init__(self, filename):
        super().__init__(filename)
@@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser):
    def get_meta(self) -> Dict[str, Union[str, dict]]:
        metadata = {}
        for key, value in self.dict_repr.items():
            if key not in self.whitelist:
            if key not in self.allowlist:
                metadata[key.decode('utf-8')] = value
        return metadata

    def remove_all(self) -> bool:
        cleaned = dict()
        for key, value in self.dict_repr.items():
            if key in self.whitelist:
            if key in self.allowlist:
                cleaned[key] = value
        with open(self.output_filename, 'wb') as f:
            f.write(_BencodeHandler().bencode(cleaned))
+9 −9
Original line number Diff line number Diff line
@@ -10,10 +10,10 @@ from . import subprocess
class AbstractFFmpegParser(exiftool.ExiftoolParser):
    """ Abstract parser for all FFmpeg-based ones, mainly for video. """
    # Some fileformats have mandatory metadata fields
    meta_key_value_whitelist = {}  # type: Dict[str, Union[str, int]]
    meta_key_value_allowlist = {}  # type: Dict[str, Union[str, int]]

    def remove_all(self) -> bool:
        if self.meta_key_value_whitelist:
        if self.meta_key_value_allowlist:
            logging.warning('The format of "%s" (%s) has some mandatory '
                            'metadata fields; mat2 filled them with standard '
                            'data.', self.filename, ', '.join(self.mimetypes))
@@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):

        ret = dict()  # type: Dict[str, Union[str, dict]]
        for key, value in meta.items():
            if key in self.meta_key_value_whitelist.keys():
                if value == self.meta_key_value_whitelist[key]:
            if key in self.meta_key_value_allowlist.keys():
                if value == self.meta_key_value_allowlist[key]:
                    continue
            ret[key] = value
        return ret
@@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):

class WMVParser(AbstractFFmpegParser):
    mimetypes = {'video/x-ms-wmv', }
    meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
    meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
                      'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',
                      'Directory', 'Duration', 'ExifToolVersion',
                      'FileAccessDate', 'FileInodeChangeDate', 'FileLength',
@@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser):
                      'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',
                      'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',
                      'SourceFile', 'StreamNumber', 'VideoCodecName', }
    meta_key_value_whitelist = {  # some metadata are mandatory :/
    meta_key_value_allowlist = {  # some metadata are mandatory :/
        'AudioCodecDescription': '',
        'CreationDate': '0000:00:00 00:00:00Z',
        'FileID': '00000000-0000-0000-0000-000000000000',
@@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser):

class AVIParser(AbstractFFmpegParser):
    mimetypes = {'video/x-msvideo', }
    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
                      'FileSize', 'FileModifyDate', 'FileAccessDate',
                      'FileInodeChangeDate', 'FilePermissions', 'FileType',
                      'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
@@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser):

class MP4Parser(AbstractFFmpegParser):
    mimetypes = {'video/mp4', }
    meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
    meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
                      'XResolution', 'YResolution', 'ExifToolVersion',
                      'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
                      'FileName', 'FilePermissions', 'MIMEType', 'FileType',
@@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser):
                      'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
                      'SourceImageHeight', 'SourceImageWidth',
                      'MatrixStructure', 'MediaDuration'}
    meta_key_value_whitelist = {  # some metadata are mandatory :/
    meta_key_value_allowlist = {  # some metadata are mandatory :/
        'CreateDate': '0000:00:00 00:00:00',
        'CurrentTime': '0 s',
        'MediaCreateDate': '0000:00:00 00:00:00',
Loading