From 5b38bd7ccd97cdca864351b4af0fcbaa227f509e Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Thu, 21 Jun 2018 23:18:50 +0200
Subject: [PATCH] Improve the reliability of the office parser

---
 libmat2/office.py     | 7 +++++++
 tests/test_libmat2.py | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/libmat2/office.py b/libmat2/office.py
index a5a49cf..aea56b9 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -16,6 +16,13 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
     files_to_keep = set()  # type: Set[str] 
     files_to_omit = set() # type: Set[Pattern] 
 
+    def __init__(self, filename):
+        super().__init__(filename)
+        try:  # better fail here than later
+            zipfile.ZipFile(self.filename)
+        except zipfile.BadZipFile:
+            raise ValueError
+
     def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
         zipinfo.create_system = 3  # Linux
         zipinfo.comment = b''
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 4b312de..e1d949d 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -90,6 +90,11 @@ class TestCorruptedFiles(unittest.TestCase):
 
         os.remove('./tests/data/clean.torrent')
 
+    def test_odg(self):
+        shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
+        with self.assertRaises(ValueError):
+            office.LibreOfficeParser('./tests/data/clean.odg')
+
 class TestGetMeta(unittest.TestCase):
     def test_pdf(self):
         p = pdf.PDFParser('./tests/data/dirty.pdf')
-- 
GitLab