From 072ee1814d2d40788a93622fe6e753a9f434d515 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Wed, 5 Sep 2018 18:41:08 +0200
Subject: [PATCH] Remove defusedxml support and document why

---
 doc/implementation_notes.md | 8 ++++++++
 libmat2/office.py           | 6 +-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/doc/implementation_notes.md b/doc/implementation_notes.md
index b763835..3b8e49d 100644
--- a/doc/implementation_notes.md
+++ b/doc/implementation_notes.md
@@ -61,3 +61,11 @@ Images handling
 When possible, images are handled like PDF: rendered on a surface, then saved
 to the filesystem. This ensures that every metadata is removed.
 
+XML attacks
+-----------
+
+Since our thread model conveniently excludes files crafted to specifically
+bypass MAT2, fileformats containing harmful XML are out of our scope.
+But since MAT2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
+to process XML, it's "only" vulnerable to DoS, and not memory corruption:
+odds are that the user will notice that the cleaning didn't succeed.
diff --git a/libmat2/office.py b/libmat2/office.py
index 224067c..29100df 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -7,11 +7,7 @@ import zipfile
 import logging
 from typing import Dict, Set, Pattern
 
-try:  # protect against DoS
-    from defusedxml import ElementTree as ET  # type: ignore
-except ImportError:
-    import xml.etree.ElementTree as ET  # type: ignore
-
+import xml.etree.ElementTree as ET  # type: ignore
 
 from . import abstract, parser_factory
 
-- 
GitLab