Commit 5b38bd7c authored by jvoisin's avatar jvoisin
Browse files

Improve the reliability of the office parser

parent 846a2614
Pipeline #16126 passed with stages
in 2 minutes and 44 seconds
......@@ -16,6 +16,13 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
files_to_keep = set() # type: Set[str]
files_to_omit = set() # type: Set[Pattern]
def __init__(self, filename):
super().__init__(filename)
try: # better fail here than later
zipfile.ZipFile(self.filename)
except zipfile.BadZipFile:
raise ValueError
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
zipinfo.create_system = 3 # Linux
zipinfo.comment = b''
......
......@@ -90,6 +90,11 @@ class TestCorruptedFiles(unittest.TestCase):
os.remove('./tests/data/clean.torrent')
def test_odg(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
with self.assertRaises(ValueError):
office.LibreOfficeParser('./tests/data/clean.odg')
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
p = pdf.PDFParser('./tests/data/dirty.pdf')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment