From b6497f65de36d1fdca8734c4af14141dc455a54d Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Sun, 15 Dec 2019 14:18:50 +0100
Subject: [PATCH] Improve the robustness of the HTML parser

---
 libmat2/web.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libmat2/web.py b/libmat2/web.py
index b770200..75633a3 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -96,6 +96,15 @@ class _HTMLParser(parser.HTMLParser):
         self.tag_required_blocklist = required_blocklisted_tags
         self.tag_blocklist = blocklisted_tags
 
+    def error(self, msg):
+        """ Amusingly, Python's documentation doesn't mention that this
+        function needs to be implemented in subclasses of the parent class
+        of parser.HTMLParser. This was found by fuzzing,
+        triggering the following exception:
+            NotImplementedError: subclasses of ParserBase must override error()
+        """
+        raise ValueError(msg)
+
     def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
         # Ignore the type, because mypy is too stupid to infer
         # that get_starttag_text() can't return None.
-- 
GitLab