From 3a070b0ab70c4d4a456bdd12d0cd490ad127e320 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Thu, 25 Oct 2018 11:56:46 +0200
Subject: [PATCH] Add support for zip files

---
 libmat2/archive.py            |  5 +++++
 tests/test_corrupted_files.py | 15 +++++++++++++++
 tests/test_libmat2.py         | 35 ++++++++++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/libmat2/archive.py b/libmat2/archive.py
index b4700c3..bcf8d33 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -157,3 +157,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
             os.remove(self.output_filename)
             return False
         return True
+
+
+
+class ZipParser(ArchiveBasedAbstractParser):
+    mimetypes = {'application/zip'}
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index 181d4d2..e7d3c2a 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -4,6 +4,7 @@ import unittest
 import shutil
 import os
 import logging
+import zipfile
 
 from libmat2 import pdf, images, audio, office, parser_factory, torrent
 from libmat2 import harmless, video
@@ -222,3 +223,17 @@ class TestCorruptedFiles(unittest.TestCase):
         p = video.AVIParser('./tests/data/--output.avi')
         self.assertFalse(p.remove_all())
         os.remove('./tests/data/--output.avi')
+
+    def test_zip(self):
+        with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
+            zout.write('./tests/data/dirty.flac')
+            zout.write('./tests/data/dirty.docx')
+            zout.write('./tests/data/dirty.jpg')
+            zout.write('./tests/data/embedded_corrupted.docx')
+        p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
+        self.assertEqual(mimetype, 'application/zip')
+        meta = p.get_meta()
+        self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
+        self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
+        self.assertFalse(p.remove_all())
+        os.remove('./tests/data/dirty.zip')
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 46d6aaa..1602480 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -6,7 +6,7 @@ import os
 import zipfile
 
 from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
-from libmat2 import check_dependencies, video
+from libmat2 import check_dependencies, video, archive
 
 
 class TestCheckDependencies(unittest.TestCase):
@@ -153,6 +153,18 @@ class TestGetMeta(unittest.TestCase):
         meta = p.get_meta()
         self.assertEqual(meta, {})
 
+    def test_zip(self):
+        with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
+            zout.write('./tests/data/dirty.flac')
+            zout.write('./tests/data/dirty.docx')
+            zout.write('./tests/data/dirty.jpg')
+        p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
+        self.assertEqual(mimetype, 'application/zip')
+        meta = p.get_meta()
+        self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
+        self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
+        os.remove('./tests/data/dirty.zip')
+
 
 class TestRemovingThumbnails(unittest.TestCase):
     def test_odt(self):
@@ -488,3 +500,24 @@ class TestCleaning(unittest.TestCase):
         os.remove('./tests/data/clean.avi')
         os.remove('./tests/data/clean.cleaned.avi')
         os.remove('./tests/data/clean.cleaned.cleaned.avi')
+
+    def test_zip(self):
+        with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
+            zout.write('./tests/data/dirty.flac')
+            zout.write('./tests/data/dirty.docx')
+            zout.write('./tests/data/dirty.jpg')
+        p = archive.ZipParser('./tests/data/dirty.zip')
+        meta = p.get_meta()
+        self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
+
+        ret = p.remove_all()
+        self.assertTrue(ret)
+
+        p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
+        self.assertEqual(p.get_meta(), {})
+        self.assertTrue(p.remove_all())
+
+        os.remove('./tests/data/dirty.zip')
+        os.remove('./tests/data/dirty.cleaned.zip')
+        os.remove('./tests/data/dirty.cleaned.cleaned.zip')
+
-- 
GitLab