Commit efa525c1 authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Improve the robustness of the HTML parser

parent f67cd9d7
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -104,6 +104,15 @@ class _HTMLParser(parser.HTMLParser):
        self.tag_required_blocklist = required_blocklisted_tags
        self.tag_blocklist = blocklisted_tags

    def error(self, message):  # pragma: no cover
        """ Amusingly, Python's documentation doesn't mention that this
        function needs to be implemented in subclasses of the parent class
        of parser.HTMLParser. This was found by fuzzing,
        triggering the following exception:
            NotImplementedError: subclasses of ParserBase must override error()
        """
        raise ValueError(message)

    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
        # Ignore the type, because mypy is too stupid to infer
        # that get_starttag_text() can't return None.