Commit b832a594 authored by jvoisin's avatar jvoisin

Refactor lightweight mode implementation

parent 6ce88b8b
Pipeline #19584 failed with stages
in 4 minutes and 26 seconds
......@@ -19,6 +19,7 @@ class AbstractParser(abc.ABC):
self.filename = filename
fname, extension = os.path.splitext(filename)
self.output_filename = fname + '.cleaned' + extension
self.lightweight_cleaning = False
@abc.abstractmethod
def get_meta(self) -> Dict[str, str]:
......@@ -27,10 +28,3 @@ class AbstractParser(abc.ABC):
@abc.abstractmethod
def remove_all(self) -> bool:
pass # pragma: no cover
def remove_all_lightweight(self) -> bool:
""" This method removes _SOME_ metadata.
It might be useful to implement it for fileformats that do
not support non-destructive cleaning.
"""
return self.remove_all()
......@@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser):
except GLib.GError: # Invalid PDF
raise ValueError
def remove_all_lightweight(self):
def remove_all(self) -> bool:
if self.lightweight_cleaning is True:
return self.__remove_all_lightweight()
return self.__remove_all_thorough()
def __remove_all_lightweight(self) -> bool:
"""
Load the document into Poppler, render pages on a new PDFSurface.
"""
......@@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser):
return True
def remove_all(self):
def __remove_all_thorough(self) -> bool:
"""
Load the document into Poppler, render pages on PNG,
and shove those PNG into a new PDF.
......
......@@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
print("[-] %s's format (%s) is not supported" % (filename, mtype))
return False
p.unknown_member_policy = policy
if is_lightweight:
return p.remove_all_lightweight()
p.lightweight_cleaning = is_lightweight
return p.remove_all()
......
......@@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase):
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
ret = p.remove_all_lightweight()
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
......@@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase):
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
ret = p.remove_all_lightweight()
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment