Skip to content
Snippets Groups Projects
Commit 6b4e343a authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Bump coverage

parent 51bded33
Branches
Tags
No related merge requests found
......@@ -300,18 +300,13 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want.
"""
if not file_path.startswith('docProps/'):
return {}
elif not file_path.endswith('.xml'):
if not file_path.startswith('docProps/') and not file_path.endswith('.xml'):
return {}
with open(full_path, encoding='utf-8') as f:
try:
results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I|re.M)
metadata = {}
for (key, value) in results:
metadata[key] = value
return metadata
return {k:v for (k, v) in results}
except (TypeError, UnicodeDecodeError):
# We didn't manage to parse the xml file
return {file_path: 'harmful content', }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment