From f67cd9d7dcf465bb83597cf9dd64fb8b6bc053db Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Sun, 15 Dec 2019 06:44:21 -0800
Subject: [PATCH] Improve the robustness of the CSS parser

---
 libmat2/web.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libmat2/web.py b/libmat2/web.py
index b770200..2864d60 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -17,7 +17,11 @@ class CSSParser(abstract.AbstractParser):
 
     def remove_all(self) -> bool:
         with open(self.filename, encoding='utf-8') as f:
-            cleaned = re.sub(r'/\*.*?\*/', '', f.read(), 0, self.flags)
+            try:
+                content = f.read()
+            except UnicodeDecodeError:  # pragma: no cover
+                raise ValueError
+            cleaned = re.sub(r'/\*.*?\*/', '', content, 0, self.flags)
         with open(self.output_filename, 'w', encoding='utf-8') as f:
             f.write(cleaned)
         return True
@@ -25,7 +29,11 @@ class CSSParser(abstract.AbstractParser):
     def get_meta(self) -> Dict[str, Any]:
         metadata = {}
         with open(self.filename, encoding='utf-8') as f:
-            cssdoc = re.findall(r'/\*(.*?)\*/', f.read(), self.flags)
+            try:
+                content = f.read()
+            except UnicodeDecodeError:  # pragma: no cover
+                raise ValueError
+        cssdoc = re.findall(r'/\*(.*?)\*/', content, self.flags)
         for match in cssdoc:
             for line in match.splitlines():
                 try:
-- 
GitLab