From 6ef6aaa2221340f03c2571a3141c67ca027f5a4f Mon Sep 17 00:00:00 2001 From: jvoisin <julien.voisin@dustri.org> Date: Fri, 8 Feb 2019 23:23:56 +0100 Subject: [PATCH] Improve a bit get_meta for libreoffice files --- libmat2/office.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmat2/office.py b/libmat2/office.py index 0c9caa8..f3a5b22 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -384,7 +384,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): return {} with open(full_path, encoding='utf-8') as f: try: - results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", f.read(), re.I|re.M) + results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>", f.read(), re.I|re.M) return {k:v for (k, v) in results} except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file # We didn't manage to parse the xml file -- GitLab