From 5a5c642a463523bf8cc56ad13817b82900661bd4 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Wed, 3 Oct 2018 16:35:36 +0200
Subject: [PATCH] Don't break office files for MS Office

We didn't take the whitelist into account while
removing dangling files from [Content_types].xml
---
 libmat2/office.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libmat2/office.py b/libmat2/office.py
index 997a247..372d69a 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -217,8 +217,13 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
         removed_fnames = set()
         with zipfile.ZipFile(self.filename) as zin:
             for fname in [item.filename for item in zin.infolist()]:
-                if any(map(lambda r: r.search(fname), self.files_to_omit)):  # type: ignore
-                    removed_fnames.add(fname)
+                for file_to_omit in self.files_to_omit:
+                    if file_to_omit.search(fname):
+                        matches = map(lambda r: r.search(fname), self.files_to_keep)
+                        if any(matches):  # the file is whitelisted
+                            continue
+                        removed_fnames.add(fname)
+                        break
 
         root = tree.getroot()
         for item in root.findall('{%s}Override' % namespace['']):
-- 
GitLab