From b6497f65de36d1fdca8734c4af14141dc455a54d Mon Sep 17 00:00:00 2001 From: jvoisin <julien.voisin@dustri.org> Date: Sun, 15 Dec 2019 14:18:50 +0100 Subject: [PATCH] Improve the robustness of the HTML parser --- libmat2/web.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libmat2/web.py b/libmat2/web.py index b770200..75633a3 100644 --- a/libmat2/web.py +++ b/libmat2/web.py @@ -96,6 +96,15 @@ class _HTMLParser(parser.HTMLParser): self.tag_required_blocklist = required_blocklisted_tags self.tag_blocklist = blocklisted_tags + def error(self, msg): + """ Amusingly, Python's documentation doesn't mention that this + function needs to be implemented in subclasses of the parent class + of parser.HTMLParser. This was found by fuzzing, + triggering the following exception: + NotImplementedError: subclasses of ParserBase must override error() + """ + raise ValueError(msg) + def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]): # Ignore the type, because mypy is too stupid to infer # that get_starttag_text() can't return None. -- GitLab