diff --git a/README.md b/README.md
index 5f902feaf056214ea971b21637f16fbf813b48b1..c81daffb50ba23fa5891509c22d9e4e00d60b45b 100644
--- a/README.md
+++ b/README.md
@@ -152,6 +152,8 @@ Copyright 2016 Marie-Rose for mat2's logo
 The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3,
 and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx
 
+The `narrated_powerpoint_presentation.pptx` file is in the public domain.
+
 # Thanks
 
 mat2 wouldn't exist without:
diff --git a/libmat2/office.py b/libmat2/office.py
index 369ae9e8a1fe3461cca4a268e64f03f943ed64fc..2da37cdab9b64256ec06aaa3a1afddb71d6e3757 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -160,7 +160,7 @@ class MSOfficeParser(ZipParser):
         """
         try:
             tree, namespace = _parse_xml(full_path)
-        except ET.ParseError as e:
+        except ET.ParseError as e:  # pragma: no cover
             logging.error("Unable to parse %s: %s", full_path, e)
             return False
 
@@ -220,7 +220,7 @@ class MSOfficeParser(ZipParser):
     def __remove_revisions(full_path: str) -> bool:
         try:
             tree, namespace = _parse_xml(full_path)
-        except ET.ParseError as e:
+        except ET.ParseError as e:  # pragma: no cover
             logging.error("Unable to parse %s: %s", full_path, e)
             return False
 
@@ -299,7 +299,7 @@ class MSOfficeParser(ZipParser):
         """ MSOffice documents are using various counters for cross-references,
         we collect them all, to make sure that they're effectively counters,
         and not unique id used for fingerprinting."""
-        with open(full_path) as f:
+        with open(full_path, encoding='utf-8') as f:
             content = f.read()
             # relationship id
             for i in re.findall(r'(?:\s|r:)[iIdD]="rId([0-9]+)"(?:\s|/)', content):
diff --git a/tests/data/narrated_powerpoint_presentation.pptx b/tests/data/narrated_powerpoint_presentation.pptx
new file mode 100644
index 0000000000000000000000000000000000000000..ef041324b47a69ab5d3315687e57e21d789e4875
Binary files /dev/null and b/tests/data/narrated_powerpoint_presentation.pptx differ
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 9e208ecd1693b5e589ca890c8b467623be7a2a51..30552daa7a719104923a7a9c93ab19c03763dc04 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -777,3 +777,16 @@ class TestNoSandbox(unittest.TestCase):
         os.remove('./tests/data/clean.png')
         os.remove('./tests/data/clean.cleaned.png')
         os.remove('./tests/data/clean.cleaned.cleaned.png')
+
+class TestComplexOfficeFiles(unittest.TestCase):
+    def test_complex_pptx(self):
+        target = './tests/data/clean.pptx'
+        shutil.copy('./tests/data/narrated_powerpoint_presentation.pptx', target)
+        p = office.MSOfficeParser(target)
+        self.assertTrue(p.remove_all())
+
+        os.remove(target)
+        os.remove(p.output_filename)
+
+
+