Commit 3d284280 authored by jvoisin's avatar jvoisin

Split the tests

parent a1a06d02
Pipeline #19008 failed with stages
in 1 minute and 7 seconds
#!/usr/bin/env python3
import unittest
import shutil
import os
import zipfile
import tempfile
from libmat2 import office, parser_factory
class TestZipMetadata(unittest.TestCase):
def __check_deep_meta(self, p):
tempdir = tempfile.mkdtemp()
zipin = zipfile.ZipFile(p.filename)
zipin.extractall(tempdir)
for subdir, dirs, files in os.walk(tempdir):
for f in files:
complete_path = os.path.join(subdir, f)
inside_p, _ = parser_factory.get_parser(complete_path)
if inside_p is None:
continue
self.assertEqual(inside_p.get_meta(), {})
shutil.rmtree(tempdir)
def __check_zip_meta(self, p):
zipin = zipfile.ZipFile(p.filename)
for item in zipin.infolist():
self.assertEqual(item.comment, b'')
self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0))
self.assertEqual(item.create_system, 3) # 3 is UNIX
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
self.assertEqual(p.get_meta(), {})
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
self.assertEqual(p.get_meta(), {})
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.odt')
os.remove('./tests/data/clean.cleaned.odt')
...@@ -182,70 +182,6 @@ class TestRevisionsCleaning(unittest.TestCase): ...@@ -182,70 +182,6 @@ class TestRevisionsCleaning(unittest.TestCase):
os.remove('./tests/data/revision_clean.docx') os.remove('./tests/data/revision_clean.docx')
os.remove('./tests/data/revision_clean.cleaned.docx') os.remove('./tests/data/revision_clean.cleaned.docx')
class TestDeepCleaning(unittest.TestCase):
def __check_deep_meta(self, p):
tempdir = tempfile.mkdtemp()
zipin = zipfile.ZipFile(p.filename)
zipin.extractall(tempdir)
for subdir, dirs, files in os.walk(tempdir):
for f in files:
complete_path = os.path.join(subdir, f)
inside_p, _ = parser_factory.get_parser(complete_path)
if inside_p is None:
continue
self.assertEqual(inside_p.get_meta(), {})
shutil.rmtree(tempdir)
def __check_zip_meta(self, p):
zipin = zipfile.ZipFile(p.filename)
for item in zipin.infolist():
self.assertEqual(item.comment, b'')
self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0))
self.assertEqual(item.create_system, 3) # 3 is UNIX
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
self.assertEqual(p.get_meta(), {})
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
self.assertEqual(p.get_meta(), {})
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.odt')
os.remove('./tests/data/clean.cleaned.odt')
class TestLightWeightCleaning(unittest.TestCase): class TestLightWeightCleaning(unittest.TestCase):
def test_pdf(self): def test_pdf(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment