From b832a5941458083dd6147efb652036552f95b786 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Fri, 12 Oct 2018 11:49:24 +0200
Subject: [PATCH] Refactor lightweight mode implementation

---
 libmat2/abstract.py   | 8 +-------
 libmat2/pdf.py        | 9 +++++++--
 mat2                  | 3 +--
 tests/test_libmat2.py | 6 ++++--
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/libmat2/abstract.py b/libmat2/abstract.py
index cd72f2c..5bcaa69 100644
--- a/libmat2/abstract.py
+++ b/libmat2/abstract.py
@@ -19,6 +19,7 @@ class AbstractParser(abc.ABC):
         self.filename = filename
         fname, extension = os.path.splitext(filename)
         self.output_filename = fname + '.cleaned' + extension
+        self.lightweight_cleaning = False
 
     @abc.abstractmethod
     def get_meta(self) -> Dict[str, str]:
@@ -27,10 +28,3 @@ class AbstractParser(abc.ABC):
     @abc.abstractmethod
     def remove_all(self) -> bool:
         pass  # pragma: no cover
-
-    def remove_all_lightweight(self) -> bool:
-        """ This method removes _SOME_ metadata.
-        It might be useful to implement it for fileformats that do
-        not support non-destructive cleaning.
-        """
-        return self.remove_all()
diff --git a/libmat2/pdf.py b/libmat2/pdf.py
index c8769aa..140b4f4 100644
--- a/libmat2/pdf.py
+++ b/libmat2/pdf.py
@@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser):
         except GLib.GError:  # Invalid PDF
             raise ValueError
 
-    def remove_all_lightweight(self):
+    def remove_all(self) -> bool:
+        if self.lightweight_cleaning is True:
+            return self.__remove_all_lightweight()
+        return self.__remove_all_thorough()
+
+    def __remove_all_lightweight(self) -> bool:
         """
             Load the document into Poppler, render pages on a new PDFSurface.
         """
@@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser):
 
         return True
 
-    def remove_all(self):
+    def __remove_all_thorough(self) -> bool:
         """
             Load the document into Poppler, render pages on PNG,
             and shove those PNG into a new PDF.
diff --git a/mat2 b/mat2
index b4a6033..ba1f0ac 100755
--- a/mat2
+++ b/mat2
@@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
         print("[-] %s's format (%s) is not supported" % (filename, mtype))
         return False
     p.unknown_member_policy = policy
-    if is_lightweight:
-        return p.remove_all_lightweight()
+    p.lightweight_cleaning = is_lightweight
     return p.remove_all()
 
 
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 6a2af91..665bab0 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase):
         meta = p.get_meta()
         self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
 
-        ret = p.remove_all_lightweight()
+        p.lightweight_cleaning = True
+        ret = p.remove_all()
         self.assertTrue(ret)
 
         p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
@@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase):
         meta = p.get_meta()
         self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
 
-        ret = p.remove_all_lightweight()
+        p.lightweight_cleaning = True
+        ret = p.remove_all()
         self.assertTrue(ret)
 
         p = images.PNGParser('./tests/data/clean.cleaned.png')
-- 
GitLab