Skip to content
Snippets Groups Projects
Commit b832a594 authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Refactor lightweight mode implementation

parent 6ce88b8b
Branches
Tags
No related merge requests found
......@@ -19,6 +19,7 @@ class AbstractParser(abc.ABC):
self.filename = filename
fname, extension = os.path.splitext(filename)
self.output_filename = fname + '.cleaned' + extension
self.lightweight_cleaning = False
@abc.abstractmethod
def get_meta(self) -> Dict[str, str]:
......@@ -27,10 +28,3 @@ class AbstractParser(abc.ABC):
@abc.abstractmethod
def remove_all(self) -> bool:
pass # pragma: no cover
def remove_all_lightweight(self) -> bool:
""" This method removes _SOME_ metadata.
It might be useful to implement it for fileformats that do
not support non-destructive cleaning.
"""
return self.remove_all()
......@@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser):
except GLib.GError: # Invalid PDF
raise ValueError
def remove_all_lightweight(self):
def remove_all(self) -> bool:
if self.lightweight_cleaning is True:
return self.__remove_all_lightweight()
return self.__remove_all_thorough()
def __remove_all_lightweight(self) -> bool:
"""
Load the document into Poppler, render pages on a new PDFSurface.
"""
......@@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser):
return True
def remove_all(self):
def __remove_all_thorough(self) -> bool:
"""
Load the document into Poppler, render pages on PNG,
and shove those PNG into a new PDF.
......
......@@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
print("[-] %s's format (%s) is not supported" % (filename, mtype))
return False
p.unknown_member_policy = policy
if is_lightweight:
return p.remove_all_lightweight()
p.lightweight_cleaning = is_lightweight
return p.remove_all()
......
......@@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase):
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
ret = p.remove_all_lightweight()
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
......@@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase):
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
ret = p.remove_all_lightweight()
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment