From 5ac91cd4f94a822c81bd0bc55a2f7034b31eee7a Mon Sep 17 00:00:00 2001
From: Brolf <0xacab.brolf@magheute.net>
Date: Wed, 20 Feb 2019 00:45:27 +0100
Subject: [PATCH] Refactor {black,white}list into {block,allow}list

Closes #96
---
 libmat2/exiftool.py |  4 ++--
 libmat2/images.py   |  8 ++++----
 libmat2/office.py   |  6 +++---
 libmat2/torrent.py  |  6 +++---
 libmat2/video.py    | 18 +++++++++---------
 libmat2/web.py      | 36 ++++++++++++++++++------------------
 6 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py
index db92f60..d18b1fb 100644
--- a/libmat2/exiftool.py
+++ b/libmat2/exiftool.py
@@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser):
     from a import file, hence why several parsers are re-using its `get_meta`
     method.
     """
-    meta_whitelist = set()  # type: Set[str]
+    meta_allowlist = set()  # type: Set[str]
 
     def get_meta(self) -> Dict[str, Union[str, dict]]:
         out = subprocess.run([_get_exiftool_path(), '-json', self.filename],
                              input_filename=self.filename,
                              check=True, stdout=subprocess.PIPE).stdout
         meta = json.loads(out.decode('utf-8'))[0]
-        for key in self.meta_whitelist:
+        for key in self.meta_allowlist:
             meta.pop(key, None)
         return meta
 
diff --git a/libmat2/images.py b/libmat2/images.py
index dd3be53..32a329f 100644
--- a/libmat2/images.py
+++ b/libmat2/images.py
@@ -15,7 +15,7 @@ assert Set
 
 class PNGParser(exiftool.ExiftoolParser):
     mimetypes = {'image/png', }
-    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
+    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
                       'Directory', 'FileSize', 'FileModifyDate',
                       'FileAccessDate', 'FileInodeChangeDate',
                       'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser):
 
 class GIFParser(exiftool.ExiftoolParser):
     mimetypes = {'image/gif'}
-    meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
+    meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
                       'ColorResolutionDepth', 'Directory', 'Duration',
                       'ExifToolVersion', 'FileAccessDate',
                       'FileInodeChangeDate', 'FileModifyDate', 'FileName',
@@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
 class JPGParser(GdkPixbufAbstractParser):
     _type = 'jpeg'
     mimetypes = {'image/jpeg'}
-    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
+    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
                       'Directory', 'FileSize', 'FileModifyDate',
                       'FileAccessDate', "FileInodeChangeDate",
                       'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
 class TiffParser(GdkPixbufAbstractParser):
     _type = 'tiff'
     mimetypes = {'image/tiff'}
-    meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
+    meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
                       'FillOrder', 'PhotometricInterpretation',
                       'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
                       'StripByteCounts', 'StripOffsets', 'BitsPerSample',
diff --git a/libmat2/office.py b/libmat2/office.py
index f3a5b22..2c9cbff 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
             r'^word/theme',
             r'^word/people\.xml$',
 
-            # we have a whitelist in self.files_to_keep,
+            # we have an allowlist in self.files_to_keep,
             # so we can trash everything else
             r'^word/_rels/',
         }))
@@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
     def __fill_files_to_keep_via_content_types(self) -> bool:
         """ There is a suer-handy `[Content_Types].xml` file
         in MS Office archives, describing what each other file contains.
-        The self.content_types_to_keep member contains a type whitelist,
+        The self.content_types_to_keep member contains a type allowlist,
         so we're using it to fill the self.files_to_keep one.
         """
         with zipfile.ZipFile(self.filename) as zin:
@@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
                 for file_to_omit in self.files_to_omit:
                     if file_to_omit.search(fname):
                         matches = map(lambda r: r.search(fname), self.files_to_keep)
-                        if any(matches):  # the file is whitelisted
+                        if any(matches):  # the file is in the allowlist
                             continue
                         removed_fnames.add(fname)
                         break
diff --git a/libmat2/torrent.py b/libmat2/torrent.py
index c006f9c..6021d75 100644
--- a/libmat2/torrent.py
+++ b/libmat2/torrent.py
@@ -6,7 +6,7 @@ from . import abstract
 
 class TorrentParser(abstract.AbstractParser):
     mimetypes = {'application/x-bittorrent', }
-    whitelist = {b'announce', b'announce-list', b'info'}
+    allowlist = {b'announce', b'announce-list', b'info'}
 
     def __init__(self, filename):
         super().__init__(filename)
@@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser):
     def get_meta(self) -> Dict[str, Union[str, dict]]:
         metadata = {}
         for key, value in self.dict_repr.items():
-            if key not in self.whitelist:
+            if key not in self.allowlist:
                 metadata[key.decode('utf-8')] = value
         return metadata
 
     def remove_all(self) -> bool:
         cleaned = dict()
         for key, value in self.dict_repr.items():
-            if key in self.whitelist:
+            if key in self.allowlist:
                 cleaned[key] = value
         with open(self.output_filename, 'wb') as f:
             f.write(_BencodeHandler().bencode(cleaned))
diff --git a/libmat2/video.py b/libmat2/video.py
index 4f15b19..0060f78 100644
--- a/libmat2/video.py
+++ b/libmat2/video.py
@@ -10,10 +10,10 @@ from . import subprocess
 class AbstractFFmpegParser(exiftool.ExiftoolParser):
     """ Abstract parser for all FFmpeg-based ones, mainly for video. """
     # Some fileformats have mandatory metadata fields
-    meta_key_value_whitelist = {}  # type: Dict[str, Union[str, int]]
+    meta_key_value_allowlist = {}  # type: Dict[str, Union[str, int]]
 
     def remove_all(self) -> bool:
-        if self.meta_key_value_whitelist:
+        if self.meta_key_value_allowlist:
             logging.warning('The format of "%s" (%s) has some mandatory '
                             'metadata fields; mat2 filled them with standard '
                             'data.', self.filename, ', '.join(self.mimetypes))
@@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
 
         ret = dict()  # type: Dict[str, Union[str, dict]]
         for key, value in meta.items():
-            if key in self.meta_key_value_whitelist.keys():
-                if value == self.meta_key_value_whitelist[key]:
+            if key in self.meta_key_value_allowlist.keys():
+                if value == self.meta_key_value_allowlist[key]:
                     continue
             ret[key] = value
         return ret
@@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
 
 class WMVParser(AbstractFFmpegParser):
     mimetypes = {'video/x-ms-wmv', }
-    meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
+    meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
                       'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',
                       'Directory', 'Duration', 'ExifToolVersion',
                       'FileAccessDate', 'FileInodeChangeDate', 'FileLength',
@@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser):
                       'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',
                       'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',
                       'SourceFile', 'StreamNumber', 'VideoCodecName', }
-    meta_key_value_whitelist = {  # some metadata are mandatory :/
+    meta_key_value_allowlist = {  # some metadata are mandatory :/
         'AudioCodecDescription': '',
         'CreationDate': '0000:00:00 00:00:00Z',
         'FileID': '00000000-0000-0000-0000-000000000000',
@@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser):
 
 class AVIParser(AbstractFFmpegParser):
     mimetypes = {'video/x-msvideo', }
-    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
+    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
                       'FileSize', 'FileModifyDate', 'FileAccessDate',
                       'FileInodeChangeDate', 'FilePermissions', 'FileType',
                       'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
@@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser):
 
 class MP4Parser(AbstractFFmpegParser):
     mimetypes = {'video/mp4', }
-    meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
+    meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
                       'XResolution', 'YResolution', 'ExifToolVersion',
                       'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
                       'FileName', 'FilePermissions', 'MIMEType', 'FileType',
@@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser):
                       'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
                       'SourceImageHeight', 'SourceImageWidth',
                       'MatrixStructure', 'MediaDuration'}
-    meta_key_value_whitelist = {  # some metadata are mandatory :/
+    meta_key_value_allowlist = {  # some metadata are mandatory :/
         'CreateDate': '0000:00:00 00:00:00',
         'CurrentTime': '0 s',
         'MediaCreateDate': '0000:00:00 00:00:00',
diff --git a/libmat2/web.py b/libmat2/web.py
index 34426b8..0a61908 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -37,15 +37,15 @@ class CSSParser(abstract.AbstractParser):
 
 
 class AbstractHTMLParser(abstract.AbstractParser):
-    tags_blacklist = set()  # type: Set[str]
+    tags_blocklist = set()  # type: Set[str]
     # In some html/xml-based formats some tags are mandatory,
     # so we're keeping them, but are discarding their content
-    tags_required_blacklist = set()  # type: Set[str]
+    tags_required_blocklist = set()  # type: Set[str]
 
     def __init__(self, filename):
         super().__init__(filename)
-        self.__parser = _HTMLParser(self.filename, self.tags_blacklist,
-                                    self.tags_required_blacklist)
+        self.__parser = _HTMLParser(self.filename, self.tags_blocklist,
+                                    self.tags_required_blocklist)
         with open(filename, encoding='utf-8') as f:
             self.__parser.feed(f.read())
         self.__parser.close()
@@ -59,13 +59,13 @@ class AbstractHTMLParser(abstract.AbstractParser):
 
 class HTMLParser(AbstractHTMLParser):
     mimetypes = {'text/html', }
-    tags_blacklist = {'meta', }
-    tags_required_blacklist = {'title', }
+    tags_blocklist = {'meta', }
+    tags_required_blocklist = {'title', }
 
 
 class DTBNCXParser(AbstractHTMLParser):
     mimetypes = {'application/x-dtbncx+xml', }
-    tags_required_blacklist = {'title', 'doctitle', 'meta'}
+    tags_required_blocklist = {'title', 'doctitle', 'meta'}
 
 
 class _HTMLParser(parser.HTMLParser):
@@ -79,7 +79,7 @@ class _HTMLParser(parser.HTMLParser):
 
     Also, gotcha: the `tag` parameters are always in lowercase.
     """
-    def __init__(self, filename, blacklisted_tags, required_blacklisted_tags):
+    def __init__(self, filename, blocklisted_tags, required_blocklisted_tags):
         super().__init__()
         self.filename = filename
         self.__textrepr = ''
@@ -90,24 +90,24 @@ class _HTMLParser(parser.HTMLParser):
         self.__in_dangerous_but_required_tag = 0
         self.__in_dangerous_tag = 0
 
-        if required_blacklisted_tags & blacklisted_tags:  # pragma: nocover
+        if required_blocklisted_tags & blocklisted_tags:  # pragma: nocover
             raise ValueError("There is an overlap between %s and %s" % (
-                required_blacklisted_tags, blacklisted_tags))
-        self.tag_required_blacklist = required_blacklisted_tags
-        self.tag_blacklist = blacklisted_tags
+                required_blocklisted_tags, blocklisted_tags))
+        self.tag_required_blocklist = required_blocklisted_tags
+        self.tag_blocklist = blocklisted_tags
 
     def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):
         original_tag = self.get_starttag_text()
         self.__validation_queue.append(original_tag)
 
-        if tag in self.tag_blacklist:
+        if tag in self.tag_blocklist:
             self.__in_dangerous_tag += 1
 
         if self.__in_dangerous_tag == 0:
             if self.__in_dangerous_but_required_tag == 0:
                 self.__textrepr += original_tag
 
-        if tag in self.tag_required_blacklist:
+        if tag in self.tag_required_blocklist:
             self.__in_dangerous_but_required_tag += 1
 
     def handle_endtag(self, tag: str):
@@ -123,7 +123,7 @@ class _HTMLParser(parser.HTMLParser):
                              "tag %s in %s" %
                              (tag, previous_tag, self.filename))
 
-        if tag in self.tag_required_blacklist:
+        if tag in self.tag_required_blocklist:
             self.__in_dangerous_but_required_tag -= 1
 
         if self.__in_dangerous_tag == 0:
@@ -131,7 +131,7 @@ class _HTMLParser(parser.HTMLParser):
                 # There is no `get_endtag_text()` method :/
                 self.__textrepr += '</' + previous_tag + '>'
 
-        if tag in self.tag_blacklist:
+        if tag in self.tag_blocklist:
             self.__in_dangerous_tag -= 1
 
     def handle_data(self, data: str):
@@ -141,14 +141,14 @@ class _HTMLParser(parser.HTMLParser):
                     self.__textrepr += escape(data)
 
     def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]):
-        if tag in self.tag_required_blacklist | self.tag_blacklist:
+        if tag in self.tag_required_blocklist | self.tag_blocklist:
             meta = {k:v for k, v in attrs}
             name = meta.get('name', 'harmful metadata')
             content = meta.get('content', 'harmful data')
             self.__meta[name] = content
 
             if self.__in_dangerous_tag == 0:
-                if tag in self.tag_required_blacklist:
+                if tag in self.tag_required_blocklist:
                     self.__textrepr += '<' + tag + ' />'
                 return
 
-- 
GitLab