Loading libmat2/exiftool.py +2 −2 Original line number Diff line number Diff line Loading @@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser): from a import file, hence why several parsers are re-using its `get_meta` method. """ meta_whitelist = set() # type: Set[str] meta_allowlist = set() # type: Set[str] def get_meta(self) -> Dict[str, Union[str, dict]]: out = subprocess.run([_get_exiftool_path(), '-json', self.filename], input_filename=self.filename, check=True, stdout=subprocess.PIPE).stdout meta = json.loads(out.decode('utf-8'))[0] for key in self.meta_whitelist: for key in self.meta_allowlist: meta.pop(key, None) return meta Loading libmat2/images.py +4 −4 Original line number Diff line number Diff line Loading @@ -15,7 +15,7 @@ assert Set class PNGParser(exiftool.ExiftoolParser): mimetypes = {'image/png', } meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', 'FileInodeChangeDate', 'FilePermissions', 'FileType', 'FileTypeExtension', Loading Loading @@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser): class GIFParser(exiftool.ExiftoolParser): mimetypes = {'image/gif'} meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel', meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel', 'ColorResolutionDepth', 'Directory', 'Duration', 'ExifToolVersion', 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', 'FileName', Loading Loading @@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser): class JPGParser(GdkPixbufAbstractParser): _type = 'jpeg' mimetypes = {'image/jpeg'} meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', "FileInodeChangeDate", 'FilePermissions', 'FileType', 'FileTypeExtension', Loading @@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser): class TiffParser(GdkPixbufAbstractParser): _type = 'tiff' mimetypes = {'image/tiff'} meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples', 'FillOrder', 'PhotometricInterpretation', 'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel', 'StripByteCounts', 'StripOffsets', 'BitsPerSample', Loading libmat2/office.py +3 −3 Original line number Diff line number Diff line Loading @@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): r'^word/theme', r'^word/people\.xml$', # we have a whitelist in self.files_to_keep, # we have an allowlist in self.files_to_keep, # so we can trash everything else r'^word/_rels/', })) Loading @@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): def __fill_files_to_keep_via_content_types(self) -> bool: """ There is a suer-handy `[Content_Types].xml` file in MS Office archives, describing what each other file contains. The self.content_types_to_keep member contains a type whitelist, The self.content_types_to_keep member contains a type allowlist, so we're using it to fill the self.files_to_keep one. """ with zipfile.ZipFile(self.filename) as zin: Loading Loading @@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): for file_to_omit in self.files_to_omit: if file_to_omit.search(fname): matches = map(lambda r: r.search(fname), self.files_to_keep) if any(matches): # the file is whitelisted if any(matches): # the file is in the allowlist continue removed_fnames.add(fname) break Loading libmat2/torrent.py +3 −3 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ from . import abstract class TorrentParser(abstract.AbstractParser): mimetypes = {'application/x-bittorrent', } whitelist = {b'announce', b'announce-list', b'info'} allowlist = {b'announce', b'announce-list', b'info'} def __init__(self, filename): super().__init__(filename) Loading @@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser): def get_meta(self) -> Dict[str, Union[str, dict]]: metadata = {} for key, value in self.dict_repr.items(): if key not in self.whitelist: if key not in self.allowlist: metadata[key.decode('utf-8')] = value return metadata def remove_all(self) -> bool: cleaned = dict() for key, value in self.dict_repr.items(): if key in self.whitelist: if key in self.allowlist: cleaned[key] = value with open(self.output_filename, 'wb') as f: f.write(_BencodeHandler().bencode(cleaned)) Loading libmat2/video.py +9 −9 Original line number Diff line number Diff line Loading @@ -10,10 +10,10 @@ from . import subprocess class AbstractFFmpegParser(exiftool.ExiftoolParser): """ Abstract parser for all FFmpeg-based ones, mainly for video. """ # Some fileformats have mandatory metadata fields meta_key_value_whitelist = {} # type: Dict[str, Union[str, int]] meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]] def remove_all(self) -> bool: if self.meta_key_value_whitelist: if self.meta_key_value_allowlist: logging.warning('The format of "%s" (%s) has some mandatory ' 'metadata fields; mat2 filled them with standard ' 'data.', self.filename, ', '.join(self.mimetypes)) Loading Loading @@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): ret = dict() # type: Dict[str, Union[str, dict]] for key, value in meta.items(): if key in self.meta_key_value_whitelist.keys(): if value == self.meta_key_value_whitelist[key]: if key in self.meta_key_value_allowlist.keys(): if value == self.meta_key_value_allowlist[key]: continue ret[key] = value return ret Loading @@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): class WMVParser(AbstractFFmpegParser): mimetypes = {'video/x-ms-wmv', } meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', 'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets', 'Directory', 'Duration', 'ExifToolVersion', 'FileAccessDate', 'FileInodeChangeDate', 'FileLength', Loading @@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser): 'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize', 'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration', 'SourceFile', 'StreamNumber', 'VideoCodecName', } meta_key_value_whitelist = { # some metadata are mandatory :/ meta_key_value_allowlist = { # some metadata are mandatory :/ 'AudioCodecDescription': '', 'CreationDate': '0000:00:00 00:00:00Z', 'FileID': '00000000-0000-0000-0000-000000000000', Loading @@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser): class AVIParser(AbstractFFmpegParser): mimetypes = {'video/x-msvideo', } meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', 'FileInodeChangeDate', 'FilePermissions', 'FileType', 'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate', Loading @@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser): class MP4Parser(AbstractFFmpegParser): mimetypes = {'video/mp4', } meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', 'XResolution', 'YResolution', 'ExifToolVersion', 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', 'FileName', 'FilePermissions', 'MIMEType', 'FileType', Loading @@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser): 'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale', 'SourceImageHeight', 'SourceImageWidth', 'MatrixStructure', 'MediaDuration'} meta_key_value_whitelist = { # some metadata are mandatory :/ meta_key_value_allowlist = { # some metadata are mandatory :/ 'CreateDate': '0000:00:00 00:00:00', 'CurrentTime': '0 s', 'MediaCreateDate': '0000:00:00 00:00:00', Loading Loading
libmat2/exiftool.py +2 −2 Original line number Diff line number Diff line Loading @@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser): from a import file, hence why several parsers are re-using its `get_meta` method. """ meta_whitelist = set() # type: Set[str] meta_allowlist = set() # type: Set[str] def get_meta(self) -> Dict[str, Union[str, dict]]: out = subprocess.run([_get_exiftool_path(), '-json', self.filename], input_filename=self.filename, check=True, stdout=subprocess.PIPE).stdout meta = json.loads(out.decode('utf-8'))[0] for key in self.meta_whitelist: for key in self.meta_allowlist: meta.pop(key, None) return meta Loading
libmat2/images.py +4 −4 Original line number Diff line number Diff line Loading @@ -15,7 +15,7 @@ assert Set class PNGParser(exiftool.ExiftoolParser): mimetypes = {'image/png', } meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', 'FileInodeChangeDate', 'FilePermissions', 'FileType', 'FileTypeExtension', Loading Loading @@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser): class GIFParser(exiftool.ExiftoolParser): mimetypes = {'image/gif'} meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel', meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel', 'ColorResolutionDepth', 'Directory', 'Duration', 'ExifToolVersion', 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', 'FileName', Loading Loading @@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser): class JPGParser(GdkPixbufAbstractParser): _type = 'jpeg' mimetypes = {'image/jpeg'} meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', "FileInodeChangeDate", 'FilePermissions', 'FileType', 'FileTypeExtension', Loading @@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser): class TiffParser(GdkPixbufAbstractParser): _type = 'tiff' mimetypes = {'image/tiff'} meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples', 'FillOrder', 'PhotometricInterpretation', 'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel', 'StripByteCounts', 'StripOffsets', 'BitsPerSample', Loading
libmat2/office.py +3 −3 Original line number Diff line number Diff line Loading @@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): r'^word/theme', r'^word/people\.xml$', # we have a whitelist in self.files_to_keep, # we have an allowlist in self.files_to_keep, # so we can trash everything else r'^word/_rels/', })) Loading @@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): def __fill_files_to_keep_via_content_types(self) -> bool: """ There is a suer-handy `[Content_Types].xml` file in MS Office archives, describing what each other file contains. The self.content_types_to_keep member contains a type whitelist, The self.content_types_to_keep member contains a type allowlist, so we're using it to fill the self.files_to_keep one. """ with zipfile.ZipFile(self.filename) as zin: Loading Loading @@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): for file_to_omit in self.files_to_omit: if file_to_omit.search(fname): matches = map(lambda r: r.search(fname), self.files_to_keep) if any(matches): # the file is whitelisted if any(matches): # the file is in the allowlist continue removed_fnames.add(fname) break Loading
libmat2/torrent.py +3 −3 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ from . import abstract class TorrentParser(abstract.AbstractParser): mimetypes = {'application/x-bittorrent', } whitelist = {b'announce', b'announce-list', b'info'} allowlist = {b'announce', b'announce-list', b'info'} def __init__(self, filename): super().__init__(filename) Loading @@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser): def get_meta(self) -> Dict[str, Union[str, dict]]: metadata = {} for key, value in self.dict_repr.items(): if key not in self.whitelist: if key not in self.allowlist: metadata[key.decode('utf-8')] = value return metadata def remove_all(self) -> bool: cleaned = dict() for key, value in self.dict_repr.items(): if key in self.whitelist: if key in self.allowlist: cleaned[key] = value with open(self.output_filename, 'wb') as f: f.write(_BencodeHandler().bencode(cleaned)) Loading
libmat2/video.py +9 −9 Original line number Diff line number Diff line Loading @@ -10,10 +10,10 @@ from . import subprocess class AbstractFFmpegParser(exiftool.ExiftoolParser): """ Abstract parser for all FFmpeg-based ones, mainly for video. """ # Some fileformats have mandatory metadata fields meta_key_value_whitelist = {} # type: Dict[str, Union[str, int]] meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]] def remove_all(self) -> bool: if self.meta_key_value_whitelist: if self.meta_key_value_allowlist: logging.warning('The format of "%s" (%s) has some mandatory ' 'metadata fields; mat2 filled them with standard ' 'data.', self.filename, ', '.join(self.mimetypes)) Loading Loading @@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): ret = dict() # type: Dict[str, Union[str, dict]] for key, value in meta.items(): if key in self.meta_key_value_whitelist.keys(): if value == self.meta_key_value_whitelist[key]: if key in self.meta_key_value_allowlist.keys(): if value == self.meta_key_value_allowlist[key]: continue ret[key] = value return ret Loading @@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): class WMVParser(AbstractFFmpegParser): mimetypes = {'video/x-ms-wmv', } meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', 'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets', 'Directory', 'Duration', 'ExifToolVersion', 'FileAccessDate', 'FileInodeChangeDate', 'FileLength', Loading @@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser): 'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize', 'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration', 'SourceFile', 'StreamNumber', 'VideoCodecName', } meta_key_value_whitelist = { # some metadata are mandatory :/ meta_key_value_allowlist = { # some metadata are mandatory :/ 'AudioCodecDescription': '', 'CreationDate': '0000:00:00 00:00:00Z', 'FileID': '00000000-0000-0000-0000-000000000000', Loading @@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser): class AVIParser(AbstractFFmpegParser): mimetypes = {'video/x-msvideo', } meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', 'FileInodeChangeDate', 'FilePermissions', 'FileType', 'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate', Loading @@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser): class MP4Parser(AbstractFFmpegParser): mimetypes = {'video/mp4', } meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', 'XResolution', 'YResolution', 'ExifToolVersion', 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', 'FileName', 'FilePermissions', 'MIMEType', 'FileType', Loading @@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser): 'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale', 'SourceImageHeight', 'SourceImageWidth', 'MatrixStructure', 'MediaDuration'} meta_key_value_whitelist = { # some metadata are mandatory :/ meta_key_value_allowlist = { # some metadata are mandatory :/ 'CreateDate': '0000:00:00 00:00:00', 'CurrentTime': '0 s', 'MediaCreateDate': '0000:00:00 00:00:00', Loading