From b4ef0c9622a0741bcfa0da1f65d9082251fb4107 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Fri, 22 Jun 2018 20:38:29 +0200
Subject: [PATCH] Improve reliability against corrupted image files

---
 libmat2/images.py     | 11 +++++++++++
 tests/test_libmat2.py | 16 ++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/libmat2/images.py b/libmat2/images.py
index 03718e6..a7a9cad 100644
--- a/libmat2/images.py
+++ b/libmat2/images.py
@@ -1,4 +1,5 @@
 import subprocess
+import imghdr
 import json
 import os
 import shutil
@@ -68,6 +69,8 @@ class GdkPixbufAbstractParser(__ImageParser):
     """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
         this has the side-effect of removing metadata completely.
     """
+    _type = ''
+
     def remove_all(self):
         _, extension = os.path.splitext(self.filename)
         pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
@@ -76,8 +79,14 @@ class GdkPixbufAbstractParser(__ImageParser):
         pixbuf.savev(self.output_filename, extension[1:], [], [])
         return True
 
+    def __init__(self, filename):
+        super().__init__(filename)
+        if imghdr.what(filename) != self._type:  # better safe than sorry
+            raise ValueError
+
 
 class JPGParser(GdkPixbufAbstractParser):
+    _type = 'jpeg'
     mimetypes = {'image/jpeg'}
     meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
                       'Directory', 'FileSize', 'FileModifyDate',
@@ -90,6 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
 
 
 class TiffParser(GdkPixbufAbstractParser):
+    _type = 'tiff'
     mimetypes = {'image/tiff'}
     meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
                       'FillOrder', 'PhotometricInterpretation',
@@ -103,6 +113,7 @@ class TiffParser(GdkPixbufAbstractParser):
 
 
 class BMPParser(GdkPixbufAbstractParser):
+    _type = 'bmp'
     mimetypes = {'image/x-ms-bmp'}
     meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
                       'FileSize', 'FileModifyDate', 'FileAccessDate',
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index e1d949d..0df333d 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -87,13 +87,25 @@ class TestCorruptedFiles(unittest.TestCase):
             f.write("trailing garbage")
         p = torrent.TorrentParser('./tests/data/clean.torrent')
         self.assertEqual(p.get_meta(), expected)
-
         os.remove('./tests/data/clean.torrent')
 
     def test_odg(self):
         shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
         with self.assertRaises(ValueError):
             office.LibreOfficeParser('./tests/data/clean.odg')
+        os.remove('./tests/data/clean.odg')
+
+    def test_bmp(self):
+        shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
+        with self.assertRaises(ValueError):
+            p = images.BMPParser('./tests/data/clean.bmp')
+        os.remove('./tests/data/clean.bmp')
+
+    def test_docx(self):
+        shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
+        with self.assertRaises(ValueError):
+            p = office.MSOfficeParser('./tests/data/clean.docx')
+        os.remove('./tests/data/clean.docx')
 
 class TestGetMeta(unittest.TestCase):
     def test_pdf(self):
@@ -123,7 +135,7 @@ class TestGetMeta(unittest.TestCase):
         self.assertEqual(meta['Comment'], 'Created with GIMP')
 
     def test_tiff(self):
-        p = images.JPGParser('./tests/data/dirty.tiff')
+        p = images.TiffParser('./tests/data/dirty.tiff')
         meta = p.get_meta()
         self.assertEqual(meta['Make'], 'OLYMPUS IMAGING CORP.')
         self.assertEqual(meta['Model'], 'C7070WZ')
-- 
GitLab