Skip to content
Snippets Groups Projects
Commit 5b38bd7c authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Improve the reliability of the office parser

parent 846a2614
No related branches found
No related tags found
No related merge requests found
......@@ -16,6 +16,13 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
files_to_keep = set() # type: Set[str]
files_to_omit = set() # type: Set[Pattern]
def __init__(self, filename):
super().__init__(filename)
try: # better fail here than later
zipfile.ZipFile(self.filename)
except zipfile.BadZipFile:
raise ValueError
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
zipinfo.create_system = 3 # Linux
zipinfo.comment = b''
......
......@@ -90,6 +90,11 @@ class TestCorruptedFiles(unittest.TestCase):
os.remove('./tests/data/clean.torrent')
def test_odg(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
with self.assertRaises(ValueError):
office.LibreOfficeParser('./tests/data/clean.odg')
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
p = pdf.PDFParser('./tests/data/dirty.pdf')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment