diff --git a/libmat2/office.py b/libmat2/office.py index e813fae76bb94556588a1319362a2ccbf4d8dadf..34ae7a226c57003bbd88ba5515f0a9b95948cd3b 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -146,6 +146,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): files_to_omit = set(map(re.compile, { # type: ignore '^meta\.xml$', '^Configurations2/', + '^Thumbnails/', })) def get_meta(self) -> Dict[str, str]: diff --git a/tests/data/revision.odt b/tests/data/revision.odt new file mode 100644 index 0000000000000000000000000000000000000000..d3b209b805f7a9064d476679360b322c305e34ed Binary files /dev/null and b/tests/data/revision.odt differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index b34e7a46fb3973ce6fc238e0a1fca18bcc287a7f..3ea044f91b0a82ad6316076cf59f1cb350ed2036 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -105,6 +105,23 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202') +class TestRemovingThumbnails(unittest.TestCase): + def test_odt(self): + shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt') + + zipin = zipfile.ZipFile(os.path.abspath('./tests/data/clean.odt')) + self.assertIn('Thumbnails/thumbnail.png', zipin.namelist()) + zipin.close() + + p = office.LibreOfficeParser('./tests/data/clean.odt') + self.assertTrue(p.remove_all()) + + zipin = zipfile.ZipFile(os.path.abspath('./tests/data/clean.cleaned.odt')) + self.assertNotIn('Thumbnails/thumbnail.png', zipin.namelist()) + zipin.close() + + os.remove('./tests/data/clean.cleaned.odt') + class TestDeepCleaning(unittest.TestCase): def __check_deep_meta(self, p): tempdir = tempfile.mkdtemp()