Commit 8ff57c58 authored by jvoisin's avatar jvoisin

Do not display control characters in output

Kudos to Sherry Taylor for reporting this issue 
parent 04bb8c8c
Pipeline #20659 passed with stages
in 4 minutes and 25 seconds
......@@ -6,6 +6,7 @@ import sys
import mimetypes
import argparse
import logging
import unicodedata
try:
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
......@@ -83,6 +84,15 @@ def __print_meta(filename: str, metadata: dict, depth: int=1):
if isinstance(v, dict):
__print_meta(k, v, depth+1)
continue
# Remove control characters
# We might use 'Cc' instead of 'C', but better safe than sorry
# https://www.unicode.org/reports/tr44/#GC_Values_Table
try:
v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C'))
except TypeError:
pass # for things that aren't iterable
try: # FIXME this is ugly.
print(padding + " %s: %s" % (k, v))
except UnicodeEncodeError:
......
......@@ -121,7 +121,7 @@ class TestGetMeta(unittest.TestCase):
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'producer: pdfTeX-1.40.14', stdout)
self.assertIn(b'Producer: pdfTeX-1.40.14', stdout)
def test_png(self):
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.png'],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment