From 8ff57c5803152c619f88e44ffded28540a289d44 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Wed, 7 Nov 2018 21:54:34 +0100
Subject: [PATCH] Do not display control characters in output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kudos to Sherry Taylor for reporting this issue ♥
---
 mat2                  | 10 ++++++++++
 tests/test_climat2.py |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/mat2 b/mat2
index a36f62d..351d97b 100755
--- a/mat2
+++ b/mat2
@@ -6,6 +6,7 @@ import sys
 import mimetypes
 import argparse
 import logging
+import unicodedata
 
 try:
     from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
@@ -83,6 +84,15 @@ def __print_meta(filename: str, metadata: dict, depth: int=1):
         if isinstance(v, dict):
             __print_meta(k, v, depth+1)
             continue
+
+        # Remove control characters
+        # We might use 'Cc' instead of 'C', but better safe than sorry
+        # https://www.unicode.org/reports/tr44/#GC_Values_Table
+        try:
+            v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C'))
+        except TypeError:
+            pass  # for things that aren't iterable
+
         try:  # FIXME this is ugly.
             print(padding + "  %s: %s" % (k, v))
         except UnicodeEncodeError:
diff --git a/tests/test_climat2.py b/tests/test_climat2.py
index dd7c9b9..53e4f5b 100644
--- a/tests/test_climat2.py
+++ b/tests/test_climat2.py
@@ -121,7 +121,7 @@ class TestGetMeta(unittest.TestCase):
         proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'],
                 stdout=subprocess.PIPE)
         stdout, _ = proc.communicate()
-        self.assertIn(b'producer: pdfTeX-1.40.14', stdout)
+        self.assertIn(b'Producer: pdfTeX-1.40.14', stdout)
 
     def test_png(self):
         proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.png'],
-- 
GitLab