Skip to content
Snippets Groups Projects
Commit b4ef0c96 authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Improve reliability against corrupted image files

parent dfccf79f
No related branches found
No related tags found
No related merge requests found
import subprocess
import imghdr
import json
import os
import shutil
......@@ -68,6 +69,8 @@ class GdkPixbufAbstractParser(__ImageParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
"""
_type = ''
def remove_all(self):
_, extension = os.path.splitext(self.filename)
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
......@@ -76,8 +79,14 @@ class GdkPixbufAbstractParser(__ImageParser):
pixbuf.savev(self.output_filename, extension[1:], [], [])
return True
def __init__(self, filename):
super().__init__(filename)
if imghdr.what(filename) != self._type: # better safe than sorry
raise ValueError
class JPGParser(GdkPixbufAbstractParser):
_type = 'jpeg'
mimetypes = {'image/jpeg'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
......@@ -90,6 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
class TiffParser(GdkPixbufAbstractParser):
_type = 'tiff'
mimetypes = {'image/tiff'}
meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
'FillOrder', 'PhotometricInterpretation',
......@@ -103,6 +113,7 @@ class TiffParser(GdkPixbufAbstractParser):
class BMPParser(GdkPixbufAbstractParser):
_type = 'bmp'
mimetypes = {'image/x-ms-bmp'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
......
......@@ -87,13 +87,25 @@ class TestCorruptedFiles(unittest.TestCase):
f.write("trailing garbage")
p = torrent.TorrentParser('./tests/data/clean.torrent')
self.assertEqual(p.get_meta(), expected)
os.remove('./tests/data/clean.torrent')
def test_odg(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
with self.assertRaises(ValueError):
office.LibreOfficeParser('./tests/data/clean.odg')
os.remove('./tests/data/clean.odg')
def test_bmp(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
with self.assertRaises(ValueError):
p = images.BMPParser('./tests/data/clean.bmp')
os.remove('./tests/data/clean.bmp')
def test_docx(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
with self.assertRaises(ValueError):
p = office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
......@@ -123,7 +135,7 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['Comment'], 'Created with GIMP')
def test_tiff(self):
p = images.JPGParser('./tests/data/dirty.tiff')
p = images.TiffParser('./tests/data/dirty.tiff')
meta = p.get_meta()
self.assertEqual(meta['Make'], 'OLYMPUS IMAGING CORP.')
self.assertEqual(meta['Model'], 'C7070WZ')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment