Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tguinot/mat2
  • jvoisin/mat2
  • dachary/mat2
  • mejo-/mat2
  • LogicalDash/mat2
  • dkg/mat2
  • christian/mat2
  • Selflike323/mat2
  • fz/mat2
  • iwwmidatlanticgdc/mat2
  • Gu1nn3zz/mat2
  • smagnin/mat2
  • flashcode/mat2
  • MANCASTILLEJA/mat2
  • jboursier/mat2
  • tails/mat2
  • matiargs/mat2
  • Brolf/mat2
  • madaidan/mat2
  • Delmer84/mat2
  • yuebyzua/mat2
  • yyyyyyyan/mat2
  • rmnvgr/mat2
  • Marxism-Leninism/mat2
  • GNUtoo/mat2
  • allexj/mat2
  • b068931cc450442b63f5b3d276ea4297/mat2
  • chenrui/mat2
  • nosec13346/mat2
  • anelki/mat2
30 results
Show changes
Commits on Source (5)
...@@ -2,6 +2,7 @@ import zipfile ...@@ -2,6 +2,7 @@ import zipfile
import datetime import datetime
import tempfile import tempfile
import os import os
import sys
import logging import logging
import shutil import shutil
from typing import Dict, Set, Pattern from typing import Dict, Set, Pattern
...@@ -48,11 +49,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): ...@@ -48,11 +49,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
zipinfo.date_time = (1980, 1, 1, 0, 0, 0) # this is as early as a zipfile can be zipinfo.date_time = (1980, 1, 1, 0, 0, 0) # this is as early as a zipfile can be
return zipinfo return zipinfo
@staticmethod def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> Dict[str, str]:
def _get_zipinfo_meta(zipinfo: zipfile.ZipInfo) -> Dict[str, str]:
metadata = {} metadata = {}
if zipinfo.create_system == 3: # this is Linux if zipinfo.create_system == 3: # this is Linux
pass metadata['create_system'] = 'Linux'
elif zipinfo.create_system == 2: elif zipinfo.create_system == 2:
metadata['create_system'] = 'Windows' metadata['create_system'] = 'Windows'
else: else:
...@@ -64,11 +64,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): ...@@ -64,11 +64,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
if zipinfo.date_time != (1980, 1, 1, 0, 0, 0): if zipinfo.date_time != (1980, 1, 1, 0, 0, 0):
metadata['date_time'] = str(datetime.datetime(*zipinfo.date_time)) metadata['date_time'] = str(datetime.datetime(*zipinfo.date_time))
ret = self._parse_files()
metadata_files = ret[0]
for name, _ in metadata_files.items():
metadata[name] = metadata_files
return metadata return metadata
def remove_all(self) -> bool: # pylint: disable=too-many-branches
# pylint: disable=too-many-branches def _parse_files(self) -> tuple:
metadata = {} # type: dict
caller = sys._getframe(1).f_code.co_name
with zipfile.ZipFile(self.filename) as zin,\ with zipfile.ZipFile(self.filename) as zin,\
zipfile.ZipFile(self.output_filename, 'w') as zout: zipfile.ZipFile(self.output_filename, 'w') as zout:
...@@ -84,11 +89,12 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): ...@@ -84,11 +89,12 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
zin.extract(member=item, path=temp_folder) zin.extract(member=item, path=temp_folder)
full_path = os.path.join(temp_folder, item.filename) full_path = os.path.join(temp_folder, item.filename)
if self._specific_cleanup(full_path) is False: if caller == "remove_all":
logging.warning("Something went wrong during deep cleaning of %s", if self._specific_cleanup(full_path) is False:
item.filename) logging.warning("Something went wrong during deep cleaning of %s",
abort = True item.filename)
continue abort = True
continue
if any(map(lambda r: r.search(item.filename), self.files_to_keep)): if any(map(lambda r: r.search(item.filename), self.files_to_keep)):
# those files aren't supported, but we want to add them anyway # those files aren't supported, but we want to add them anyway
...@@ -112,8 +118,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): ...@@ -112,8 +118,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
abort = True abort = True
continue continue
if tmp_parser: if tmp_parser:
tmp_parser.remove_all() if caller == "remove_all":
os.rename(tmp_parser.output_filename, full_path) tmp_parser.remove_all()
os.rename(tmp_parser.output_filename, full_path)
else:
metadata[item.filename] = tmp_parser.get_meta()
zinfo = zipfile.ZipInfo(item.filename) # type: ignore zinfo = zipfile.ZipInfo(item.filename) # type: ignore
clean_zinfo = self._clean_zipinfo(zinfo) clean_zinfo = self._clean_zipinfo(zinfo)
...@@ -123,5 +132,20 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): ...@@ -123,5 +132,20 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
shutil.rmtree(temp_folder) shutil.rmtree(temp_folder)
if abort: if abort:
os.remove(self.output_filename) os.remove(self.output_filename)
return False return metadata, abort
return True
def remove_all(self) -> bool:
ret = self._parse_files()
return not ret[1]
class ZIPParser(ArchiveBasedAbstractParser):
mimetypes = {'application/zip'}
def get_meta(self) -> Dict[str, str]:
metadata = {}
zipin = zipfile.ZipFile(self.filename)
for item in zipin.infolist():
for key, value in self._get_zipinfo_meta(item).items():
metadata[key] = value
zipin.close()
return metadata
...@@ -67,12 +67,7 @@ def show_meta(filename: str): ...@@ -67,12 +67,7 @@ def show_meta(filename: str):
return return
print("[+] Metadata for %s:" % filename) print("[+] Metadata for %s:" % filename)
metadata = p.get_meta().items() for k, v in p.get_meta().items():
if not metadata:
print(" No metadata found")
return
for k, v in metadata:
try: # FIXME this is ugly. try: # FIXME this is ugly.
print(" %s: %s" % (k, v)) print(" %s: %s" % (k, v))
except UnicodeEncodeError: except UnicodeEncodeError:
......
File added
...@@ -65,7 +65,10 @@ class TestCorruptedEmbedded(unittest.TestCase): ...@@ -65,7 +65,10 @@ class TestCorruptedEmbedded(unittest.TestCase):
os.remove('./tests/data/clean.docx') os.remove('./tests/data/clean.docx')
def test_odt(self): def test_odt(self):
expected = { expected = {'Pictures/100002010000021D0000039CFEBF39BEE21A25FB.png':
{'PixelUnits': 'meters',
'PixelsPerUnitX': 341,
'PixelsPerUnitY': 341},
'create_system': 'Weird', 'create_system': 'Weird',
'date_time': '2018-06-10 17:18:18', 'date_time': '2018-06-10 17:18:18',
'meta.xml': 'harmful content' 'meta.xml': 'harmful content'
......
...@@ -41,7 +41,7 @@ class TestZipMetadata(unittest.TestCase): ...@@ -41,7 +41,7 @@ class TestZipMetadata(unittest.TestCase):
self.assertTrue(ret) self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx') p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
self.assertEqual(p.get_meta(), {}) self.assertEqual(p.get_meta(), {'create_system': 'Linux', 'word/media/image1.png': {'word/media/image1.png': {}}})
self.__check_zip_meta(p) self.__check_zip_meta(p)
self.__check_deep_meta(p) self.__check_deep_meta(p)
...@@ -60,7 +60,7 @@ class TestZipMetadata(unittest.TestCase): ...@@ -60,7 +60,7 @@ class TestZipMetadata(unittest.TestCase):
self.assertTrue(ret) self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt') p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
self.assertEqual(p.get_meta(), {}) self.assertEqual(p.get_meta(), {'Pictures/1000000000000032000000311EC5314D.png': {'Pictures/1000000000000032000000311EC5314D.png': {}}, 'create_system': 'Linux'})
self.__check_zip_meta(p) self.__check_zip_meta(p)
self.__check_deep_meta(p) self.__check_deep_meta(p)
......
...@@ -336,13 +336,30 @@ class TestCleaning(unittest.TestCase): ...@@ -336,13 +336,30 @@ class TestCleaning(unittest.TestCase):
self.assertTrue(ret) self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx') p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
self.assertEqual(p.get_meta(), {}) self.assertEqual(p.get_meta(), {'create_system': 'Linux', 'word/media/image1.png': {'word/media/image1.png': {}}})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx') os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx') os.remove('./tests/data/clean.cleaned.docx')
os.remove('./tests/data/clean.cleaned.cleaned.docx') os.remove('./tests/data/clean.cleaned.cleaned.docx')
def test_zip(self):
shutil.copy('./tests/data/test.zip', './tests/data/clean.zip')
p = office.MSOfficeParser('./tests/data/clean.zip')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.zip')
self.assertEqual(p.get_meta(), {'create_system': 'Linux',
'dirty.mp3': {'dirty.mp3': {}, 'dirty.png': {}},
'dirty.png': {'dirty.mp3': {}, 'dirty.png': {}}})
os.remove('./tests/data/clean.zip')
os.remove('./tests/data/clean.cleaned.zip')
def test_libreoffice(self): def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt') shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt') p = office.LibreOfficeParser('./tests/data/clean.odt')
...@@ -354,7 +371,7 @@ class TestCleaning(unittest.TestCase): ...@@ -354,7 +371,7 @@ class TestCleaning(unittest.TestCase):
self.assertTrue(ret) self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt') p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
self.assertEqual(p.get_meta(), {}) self.assertEqual(p.get_meta(), {'Pictures/1000000000000032000000311EC5314D.png': {'Pictures/1000000000000032000000311EC5314D.png': {}}, 'create_system': 'Linux'})
self.assertTrue(p.remove_all()) self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odt') os.remove('./tests/data/clean.odt')
...@@ -426,8 +443,8 @@ class TestCleaning(unittest.TestCase): ...@@ -426,8 +443,8 @@ class TestCleaning(unittest.TestCase):
self.assertTrue(ret) self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odf') p = office.LibreOfficeParser('./tests/data/clean.cleaned.odf')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all()) self.assertTrue(p.remove_all())
self.assertEqual(p.get_meta(), {'create_system': 'Linux'})
os.remove('./tests/data/clean.odf') os.remove('./tests/data/clean.odf')
os.remove('./tests/data/clean.cleaned.odf') os.remove('./tests/data/clean.cleaned.odf')
...@@ -444,8 +461,7 @@ class TestCleaning(unittest.TestCase): ...@@ -444,8 +461,7 @@ class TestCleaning(unittest.TestCase):
self.assertTrue(ret) self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odg') p = office.LibreOfficeParser('./tests/data/clean.cleaned.odg')
self.assertEqual(p.get_meta(), {}) self.assertEqual(p.get_meta(), {'create_system': 'Linux'})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odg') os.remove('./tests/data/clean.odg')
os.remove('./tests/data/clean.cleaned.odg') os.remove('./tests/data/clean.cleaned.odg')
......