Commit b4ef0c96 authored by jvoisin's avatar jvoisin
Browse files

Improve reliability against corrupted image files

parent dfccf79f
Pipeline #16143 passed with stages
in 2 minutes and 42 seconds
import subprocess
import imghdr
import json
import os
import shutil
......@@ -68,6 +69,8 @@ class GdkPixbufAbstractParser(__ImageParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
"""
_type = ''
def remove_all(self):
_, extension = os.path.splitext(self.filename)
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
......@@ -76,8 +79,14 @@ class GdkPixbufAbstractParser(__ImageParser):
pixbuf.savev(self.output_filename, extension[1:], [], [])
return True
def __init__(self, filename):
super().__init__(filename)
if imghdr.what(filename) != self._type: # better safe than sorry
raise ValueError
class JPGParser(GdkPixbufAbstractParser):
_type = 'jpeg'
mimetypes = {'image/jpeg'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
......@@ -90,6 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
class TiffParser(GdkPixbufAbstractParser):
_type = 'tiff'
mimetypes = {'image/tiff'}
meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
'FillOrder', 'PhotometricInterpretation',
......@@ -103,6 +113,7 @@ class TiffParser(GdkPixbufAbstractParser):
class BMPParser(GdkPixbufAbstractParser):
_type = 'bmp'
mimetypes = {'image/x-ms-bmp'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
......
......@@ -87,13 +87,25 @@ class TestCorruptedFiles(unittest.TestCase):
f.write("trailing garbage")
p = torrent.TorrentParser('./tests/data/clean.torrent')
self.assertEqual(p.get_meta(), expected)
os.remove('./tests/data/clean.torrent')
def test_odg(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
with self.assertRaises(ValueError):
office.LibreOfficeParser('./tests/data/clean.odg')
os.remove('./tests/data/clean.odg')
def test_bmp(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
with self.assertRaises(ValueError):
p = images.BMPParser('./tests/data/clean.bmp')
os.remove('./tests/data/clean.bmp')
def test_docx(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
with self.assertRaises(ValueError):
p = office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
......@@ -123,7 +135,7 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['Comment'], 'Created with GIMP')
def test_tiff(self):
p = images.JPGParser('./tests/data/dirty.tiff')
p = images.TiffParser('./tests/data/dirty.tiff')
meta = p.get_meta()
self.assertEqual(meta['Make'], 'OLYMPUS IMAGING CORP.')
self.assertEqual(meta['Model'], 'C7070WZ')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment