From 0b094b594bd1db017ed3d063a10714f6b2a7b9f3 Mon Sep 17 00:00:00 2001 From: jvoisin <julien.voisin@dustri.org> Date: Wed, 14 Jul 2021 23:34:02 +0200 Subject: [PATCH] Improve xlsx support This should close #156 --- libmat2/office.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libmat2/office.py b/libmat2/office.py index 19e2ce5..f77c0f5 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -88,6 +88,7 @@ class MSOfficeParser(ZipParser): r'^\[Content_Types\]\.xml$', r'^_rels/\.rels$', r'^xl/sharedStrings\.xml$', # https://docs.microsoft.com/en-us/office/open-xml/working-with-the-shared-string-table + r'^xl/calcChain\.xml$', r'^(?:word|ppt|xl)/_rels/document\.xml\.rels$', r'^(?:word|ppt|xl)/_rels/footer[0-9]*\.xml\.rels$', r'^(?:word|ppt|xl)/_rels/header[0-9]*\.xml\.rels$', @@ -108,6 +109,8 @@ class MSOfficeParser(ZipParser): r'^ppt/slideMasters/slideMaster[0-9]+\.xml', r'^ppt/slideMasters/_rels/slideMaster[0-9]+\.xml\.rels', r'^xl/worksheets/_rels/sheet[0-9]+\.xml\.rels', + r'^xl/drawings/vmlDrawing[0-9]+\.vml', + r'^xl/drawings/drawing[0-9]+\.xml', })) self.files_to_omit = set(map(re.compile, { # type: ignore r'^\[trash\]/', @@ -124,6 +127,7 @@ class MSOfficeParser(ZipParser): # Additional presentation-wide properties like printing properties, # presentation show properties etc. r'^(?:word|ppt|xl)/presProps\.xml$', + r'^(?:word|ppt|xl)/comments[0-9]+\.xml$', # we have an allowlist in self.files_to_keep, # so we can trash everything else -- GitLab