Commit 5a5c642a authored by jvoisin's avatar jvoisin

Don't break office files for MS Office

We didn't take the whitelist into account while
removing dangling files from [Content_types].xml
parent 84e302ac
......@@ -217,8 +217,13 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
removed_fnames = set()
with zipfile.ZipFile(self.filename) as zin:
for fname in [item.filename for item in zin.infolist()]:
if any(map(lambda r:, self.files_to_omit)): # type: ignore
for file_to_omit in self.files_to_omit:
matches = map(lambda r:, self.files_to_keep)
if any(matches): # the file is whitelisted
root = tree.getroot()
for item in root.findall('{%s}Override' % namespace['']):
