Commit 3a070b0a authored by jvoisin's avatar jvoisin

Add support for zip files

parent 283e5e57
Pipeline #20240 passed with stages
in 4 minutes and 55 seconds
......@@ -157,3 +157,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
os.remove(self.output_filename)
return False
return True
class ZipParser(ArchiveBasedAbstractParser):
mimetypes = {'application/zip'}
......@@ -4,6 +4,7 @@ import unittest
import shutil
import os
import logging
import zipfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import harmless, video
......@@ -222,3 +223,17 @@ class TestCorruptedFiles(unittest.TestCase):
p = video.AVIParser('./tests/data/--output.avi')
self.assertFalse(p.remove_all())
os.remove('./tests/data/--output.avi')
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
zout.write('./tests/data/embedded_corrupted.docx')
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
self.assertFalse(p.remove_all())
os.remove('./tests/data/dirty.zip')
......@@ -6,7 +6,7 @@ import os
import zipfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
from libmat2 import check_dependencies, video
from libmat2 import check_dependencies, video, archive
class TestCheckDependencies(unittest.TestCase):
......@@ -153,6 +153,18 @@ class TestGetMeta(unittest.TestCase):
meta = p.get_meta()
self.assertEqual(meta, {})
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
os.remove('./tests/data/dirty.zip')
class TestRemovingThumbnails(unittest.TestCase):
def test_odt(self):
......@@ -488,3 +500,24 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/clean.avi')
os.remove('./tests/data/clean.cleaned.avi')
os.remove('./tests/data/clean.cleaned.cleaned.avi')
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
p = archive.ZipParser('./tests/data/dirty.zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/dirty.zip')
os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/dirty.cleaned.cleaned.zip')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment