diff --git a/src/parsers/png.py b/src/parsers/png.py index 3c0a5077f7b16af67c93dee52593cd614f44be28..20cd234a8e08b40f12df554c1d5df10c160934f3 100644 --- a/src/parsers/png.py +++ b/src/parsers/png.py @@ -7,12 +7,21 @@ from . import abstract class PNGParser(abstract.AbstractParser): mimetypes = {'image/png', } - meta_list = set() + meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', + 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', + "FileInodeChangeDate", 'FilePermissions', 'FileType', + 'FileTypeExtension', 'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType', + 'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize', + 'Megapixels', 'ImageHeight'} def get_meta(self): out = subprocess.check_output(['exiftool', '-json', self.filename]) - return json.loads(out)[0] + meta = json.loads(out)[0] + for key in self.meta_whitelist: + meta.pop(key, None) + return meta def remove_all(self): surface = cairo.ImageSurface.create_from_png(self.filename) surface.write_to_png(self.output_filename) + return True diff --git a/tests/data/dirty.png b/tests/data/dirty.png new file mode 100644 index 0000000000000000000000000000000000000000..00acab0cc3bba0dd92369b8121f471192130b68d Binary files /dev/null and b/tests/data/dirty.png differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 4b36270381001efe85c2ea27b1aad760712360fc..930508056087570aab8b780878972ee96669da6d 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -5,7 +5,7 @@ import shutil import os from src import parsers -from src.parsers import pdf +from src.parsers import pdf, png class TestGetMeta(unittest.TestCase): def test_pdf(self): @@ -14,14 +14,15 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') self.assertEqual(meta['creator'], "'Certified by IEEE PDFeXpress at 03/19/2016 2:56:07 AM'") -class TestCleaning(unittest.TestCase): - def setUp(self): - shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') - - def tearDown(self): - os.remove('./tests/data/clean.pdf') + def test_png(self): + p = png.PNGParser('./tests/data/dirty.png') + meta = p.get_meta() + self.assertEqual(meta['Comment'], 'This is a comment, be careful!') + self.assertEqual(meta['ModifyDate'], "2018:03:20 21:59:25") +class TestCleaning(unittest.TestCase): def test_pdf(self): + shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') p = pdf.PDFParser('./tests/data/clean.pdf') meta = p.get_meta() @@ -33,3 +34,20 @@ class TestCleaning(unittest.TestCase): p = pdf.PDFParser('./tests/data/clean.pdf.cleaned') expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1} self.assertEqual(p.get_meta(), expected_meta) + + os.remove('./tests/data/clean.pdf') + + def test_png(self): + shutil.copy('./tests/data/dirty.png', './tests/data/clean.png') + p = png.PNGParser('./tests/data/clean.png') + + meta = p.get_meta() + self.assertEqual(meta['Comment'], 'This is a comment, be careful!') + + ret = p.remove_all() + self.assertTrue(ret) + + p = png.PNGParser('./tests/data/clean.png.cleaned') + self.assertEqual(p.get_meta(), {}) + + os.remove('./tests/data/clean.png')