diff --git a/libmat2/archive.py b/libmat2/archive.py
index d81253134be85f8f561f7cb1eeed02a3e9c34c13..b29d690166b58d58e4bd2cd8bc8bcd7c3650e477 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -17,7 +17,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
     """ Office files (.docx, .odt, …) are zipped files. """
     # Those are the files that have a format that _isn't_
     # supported by MAT2, but that we want to keep anyway.
-    files_to_keep = set()  # type: Set[str]
+    files_to_keep = set()  # type: Set[Pattern]
 
     # Those are the files that we _do not_ want to keep,
     # no matter if they are supported or not.
@@ -89,7 +89,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
                     abort = True
                     continue
 
-                if item.filename in self.files_to_keep:
+                if any(map(lambda r: r.search(item.filename), self.files_to_keep)):
                     # those files aren't supported, but we want to add them anyway
                     pass
                 elif any(map(lambda r: r.search(item.filename), self.files_to_omit)):
diff --git a/libmat2/office.py b/libmat2/office.py
index 91bf2a64f6dd0316bcdc89e6bcc7a94516f10034..3abf10887def046df9b283c39ee867f9c978ca61 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -50,25 +50,75 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
         'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
         'application/vnd.openxmlformats-officedocument.presentationml.presentation'
     }
-    files_to_keep = {
-        '[Content_Types].xml',
-        '_rels/.rels',
-        'word/_rels/document.xml.rels',
-        'word/document.xml',
-        'word/fontTable.xml',
-        'word/settings.xml',
-        'word/styles.xml',
-        'docProps/app.xml',
-        'docProps/core.xml',
+    content_types_to_keep = {
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml',  # /word/endnotes.xml
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml',  # /word/footnotes.xml
+        'application/vnd.openxmlformats-officedocument.extended-properties+xml',  # /docProps/app.xml
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml',  # /word/document.xml
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml',  # /word/fontTable.xml
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml',  # /word/footer.xml
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml',  # /word/header.xml
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml',  # /word/styles.xml
+        'application/vnd.openxmlformats-package.core-properties+xml',  # /docProps/core.xml
+
+        # Do we want to keep the following ones?
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
+
+        # See https://0xacab.org/jvoisin/mat2/issues/71
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml',  # /word/numbering.xml
+    }
+    files_to_keep = set(map(re.compile, {  # type: ignore
+        r'^\[Content_Types\]\.xml$',
+        r'^_rels/\.rels$',
+        r'^word/_rels/document\.xml\.rels$',
+        r'^word/_rels/footer[0-9]*\.xml\.rels$',
+        r'^word/_rels/header[0-9]*\.xml\.rels$',
 
         # https://msdn.microsoft.com/en-us/library/dd908153(v=office.12).aspx
-        'word/stylesWithEffects.xml',
-    }
+        r'^word/stylesWithEffects\.xml$',
+    }))
     files_to_omit = set(map(re.compile, {  # type: ignore
-        'word/webSettings.xml',
-        'word/theme',
+        r'^customXml/',
+        r'webSettings\.xml$',
+        r'^docProps/custom\.xml$',
+        r'^word/printerSettings/',
+        r'^word/theme',
+
+        # we have a whitelist in self.files_to_keep,
+        # so we can trash everything else
+        r'^word/_rels/',
     }))
 
+    def __init__(self, filename):
+        super().__init__(filename)
+        if self.__fill_files_to_keep_via_content_types() is False:
+            raise ValueError
+
+    def __fill_files_to_keep_via_content_types(self) -> bool:
+        """ There is a suer-handy `[Content_Types].xml` file
+        in MS Office archives, describing what each other file contains.
+        The self.content_types_to_keep member contains a type whitelist,
+        so we're using it to fill the self.files_to_keep one.
+        """
+        with zipfile.ZipFile(self.filename) as zin:
+            if '[Content_Types].xml' not in zin.namelist():
+                return False
+            xml_data = zin.read('[Content_Types].xml')
+
+        self.content_types = dict()  # type: Dict[str, str]
+        try:
+            tree = ET.fromstring(xml_data)
+        except ET.ParseError:
+            return False
+        for c in tree:
+            if 'PartName' not in c.attrib or 'ContentType' not in c.attrib:
+                continue
+            elif c.attrib['ContentType'] in self.content_types_to_keep:
+                fname = c.attrib['PartName'][1:]  # remove leading `/`
+                re_fname = re.compile('^' + re.escape(fname) + '$')
+                self.files_to_keep.add(re_fname)  # type: ignore
+        return True
+
     @staticmethod
     def __remove_rsid(full_path: str) -> bool:
         """ The method will remove "revision session ID".  We're '}rsid'
@@ -270,18 +320,18 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
         'application/vnd.oasis.opendocument.formula',
         'application/vnd.oasis.opendocument.image',
     }
-    files_to_keep = {
-        'META-INF/manifest.xml',
-        'content.xml',
-        'manifest.rdf',
-        'mimetype',
-        'settings.xml',
-        'styles.xml',
-    }
+    files_to_keep = set(map(re.compile, {  # type: ignore
+        r'^META-INF/manifest\.xml$',
+        r'^content\.xml$',
+        r'^manifest\.rdf$',
+        r'^mimetype$',
+        r'^settings\.xml$',
+        r'^styles\.xml$',
+    }))
     files_to_omit = set(map(re.compile, {  # type: ignore
         r'^meta\.xml$',
-        '^Configurations2/',
-        '^Thumbnails/',
+        r'^Configurations2/',
+        r'^Thumbnails/',
     }))
 
     @staticmethod
diff --git a/tests/data/broken_xml_content_types.docx b/tests/data/broken_xml_content_types.docx
new file mode 100644
index 0000000000000000000000000000000000000000..41e0e49e9fbb843ce24f4b4538adf6e8991b3e3c
Binary files /dev/null and b/tests/data/broken_xml_content_types.docx differ
diff --git a/tests/data/malformed_content_types.docx b/tests/data/malformed_content_types.docx
index 43ac7437618f8f49e52c2006526efa087cb0c011..cc5caf3515b228391273bd7f0ec615ab0bad915a 100644
Binary files a/tests/data/malformed_content_types.docx and b/tests/data/malformed_content_types.docx differ
diff --git a/tests/data/no_content_types.docx b/tests/data/no_content_types.docx
new file mode 100644
index 0000000000000000000000000000000000000000..d0e0330fd236d7752b4c3660f70c5c1e33b601ab
Binary files /dev/null and b/tests/data/no_content_types.docx differ
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index 4ac26783c0ed27da34f02a8286be68504828ced7..8d7c252529e6b905cc49706f81c14a49ccc2f0d5 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -86,14 +86,26 @@ class TestExplicitelyUnsupportedFiles(unittest.TestCase):
         os.remove('./tests/data/clean.py')
 
 
-class TestCorruptedContentTypesOffice(unittest.TestCase):
-    def test_office(self):
+class TestWrongContentTypesFileOffice(unittest.TestCase):
+    def test_office_incomplete(self):
         shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
         p = office.MSOfficeParser('./tests/data/clean.docx')
         self.assertIsNotNone(p)
         self.assertFalse(p.remove_all())
         os.remove('./tests/data/clean.docx')
 
+    def test_office_broken(self):
+        shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')
+        with self.assertRaises(ValueError):
+            office.MSOfficeParser('./tests/data/clean.docx')
+        os.remove('./tests/data/clean.docx')
+
+    def test_office_absent(self):
+        shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')
+        with self.assertRaises(ValueError):
+            office.MSOfficeParser('./tests/data/clean.docx')
+        os.remove('./tests/data/clean.docx')
+
 class TestCorruptedFiles(unittest.TestCase):
     def test_pdf(self):
         shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')