From 53271495f74bde7fde2329b7c5c938654a36b7dc Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Fri, 6 Jul 2018 00:42:09 +0200
Subject: [PATCH] Add support for .txt files

---
 libmat2/__init__.py           |  2 --
 libmat2/harmless.py           |  7 ++-----
 tests/data/dirty.txt          |  1 +
 tests/test_corrupted_files.py |  8 ++++----
 tests/test_libmat2.py         | 24 +++++++++++++++++++++++-
 5 files changed, 30 insertions(+), 12 deletions(-)
 create mode 100644 tests/data/dirty.txt

diff --git a/libmat2/__init__.py b/libmat2/__init__.py
index 91a51d8..190abe5 100644
--- a/libmat2/__init__.py
+++ b/libmat2/__init__.py
@@ -12,8 +12,6 @@ unsupported_extensions = {
     '.pot',
     '.rdf',
     '.srt',
-    '.text',
-    '.txt',
     '.wsdl',
     '.xpdl',
     '.xsd',
diff --git a/libmat2/harmless.py b/libmat2/harmless.py
index 2878571..9032caf 100644
--- a/libmat2/harmless.py
+++ b/libmat2/harmless.py
@@ -1,3 +1,4 @@
+import shutil
 from typing import Dict
 from . import abstract
 
@@ -6,13 +7,9 @@ class HarmlessParser(abstract.AbstractParser):
     """ This is the parser for filetypes that do not contain metadata. """
     mimetypes = {'text/plain', }
 
-    def __init__(self, filename: str) -> None:
-        super().__init__(filename)
-        self.filename = filename
-        self.output_filename = filename
-
     def get_meta(self) -> Dict[str, str]:
         return dict()
 
     def remove_all(self) -> bool:
+        shutil.copy(self.filename, self.output_filename)
         return True
diff --git a/tests/data/dirty.txt b/tests/data/dirty.txt
new file mode 100644
index 0000000..952975e
--- /dev/null
+++ b/tests/data/dirty.txt
@@ -0,0 +1 @@
+I'm a file that can't have metadata, but I'm supposed to be supported anyway.
\ No newline at end of file
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index b784b0e..4b2243d 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -18,11 +18,11 @@ class TestUnsupportedFiles(unittest.TestCase):
 
 class TestExplicitelyUnsupportedFiles(unittest.TestCase):
     def test_pdf(self):
-        shutil.copy('./tests/test_libmat2.py', './tests/clean.txt')
-        parser, mimetype = parser_factory.get_parser('./tests/data/clean.txt')
-        self.assertEqual(mimetype, 'text/plain')
+        shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py')
+        parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
+        self.assertEqual(mimetype, 'text/x-python')
         self.assertEqual(parser, None)
-        os.remove('./tests/clean.txt')
+        os.remove('./tests/data/clean.py')
 
 
 class TestCorruptedFiles(unittest.TestCase):
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 4df6385..90f37a8 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -6,7 +6,7 @@ import os
 import zipfile
 import tempfile
 
-from libmat2 import pdf, images, audio, office, parser_factory, torrent
+from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
 
 
 class TestParserFactory(unittest.TestCase):
@@ -104,6 +104,12 @@ class TestGetMeta(unittest.TestCase):
         self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48')
         self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
 
+    def test_txt(self):
+        p, mimetype = parser_factory.get_parser('./tests/data/dirty.txt')
+        self.assertEqual(mimetype, 'text/plain')
+        meta = p.get_meta()
+        self.assertEqual(meta, {})
+
 
 class TestRemovingThumbnails(unittest.TestCase):
     def test_odt(self):
@@ -473,3 +479,19 @@ class TestCleaning(unittest.TestCase):
 
         os.remove('./tests/data/clean.odg')
         os.remove('./tests/data/clean.cleaned.odg')
+
+    def test_txt(self):
+        shutil.copy('./tests/data/dirty.txt', './tests/data/clean.txt')
+        p = harmless.HarmlessParser('./tests/data/clean.txt')
+
+        meta = p.get_meta()
+        self.assertEqual(meta, {})
+
+        ret = p.remove_all()
+        self.assertTrue(ret)
+
+        p = harmless.HarmlessParser('./tests/data/clean.cleaned.txt')
+        self.assertEqual(p.get_meta(), {})
+
+        os.remove('./tests/data/clean.txt')
+        os.remove('./tests/data/clean.cleaned.txt')
-- 
GitLab