Skip to content
Snippets Groups Projects
Commit 0d25b18d authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Improve both the typing and the comments

parent d0f3534e
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,7 @@ bandit:
script: # TODO: remove B405 and B314
- apt-get -qqy update
- apt-get -qqy install --no-install-recommends python3-bandit
- bandit ./mat2 --format txt
- bandit ./mat2 --format txt --skip B101
- bandit -r ./nautilus/ --format txt --skip B101
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314
......
......@@ -2,7 +2,7 @@ import logging
import os
import re
import zipfile
from typing import Dict, Set, Pattern
from typing import Dict, Set, Pattern, Tuple
import xml.etree.ElementTree as ET # type: ignore
......@@ -14,9 +14,8 @@ from .archive import ArchiveBasedAbstractParser
assert Set
assert Pattern
def _parse_xml(full_path: str):
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
""" This function parses XML, with namespace support. """
namespace_map = dict()
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
# The ns[0-9]+ namespaces are reserved for internal usage, so
......@@ -183,20 +182,20 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
parent_map = {c:p for p in tree.iter() for c in p}
elements = list()
elements_del = list()
for element in tree.iterfind('.//w:del', namespace):
elements.append(element)
for element in elements:
elements_del.append(element)
for element in elements_del:
parent_map[element].remove(element)
elements = list()
elements_ins = list()
for element in tree.iterfind('.//w:ins', namespace):
for position, item in enumerate(tree.iter()): # pragma: no cover
if item == element:
for children in element.iterfind('./*'):
elements.append((element, position, children))
elements_ins.append((element, position, children))
break
for (element, position, children) in elements:
for (element, position, children) in elements_ins:
parent_map[element].insert(position, children)
parent_map[element].remove(element)
......
#!/usr/bin/env python3
import os
from typing import Tuple
from typing import Tuple, Generator, List
import sys
import mimetypes
import argparse
......@@ -16,6 +16,10 @@ except ValueError as e:
__version__ = '0.4.0'
# Make pyflakes happy
assert Tuple
def __check_file(filename: str, mode: int=os.R_OK) -> bool:
if not os.path.exists(filename):
print("[-] %s is doesn't exist." % filename)
......@@ -29,7 +33,7 @@ def __check_file(filename: str, mode: int=os.R_OK) -> bool:
return True
def create_arg_parser():
def create_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*', help='the files to process')
parser.add_argument('-v', '--version', action='version',
......@@ -63,19 +67,18 @@ def show_meta(filename: str):
return
print("[+] Metadata for %s:" % filename)
meta = p.get_meta().items()
if not meta:
metadata = p.get_meta().items()
if not metadata:
print(" No metadata found")
return
for k, v in meta:
for k, v in metadata:
try: # FIXME this is ugly.
print(" %s: %s" % (k, v))
except UnicodeEncodeError:
print(" %s: harmful content" % k)
def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
filename, is_lightweight, unknown_member_policy = params
def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool:
if not __check_file(filename, os.R_OK|os.W_OK):
return False
......@@ -83,7 +86,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype))
return False
p.unknown_member_policy = unknown_member_policy
p.unknown_member_policy = policy
if is_lightweight:
return p.remove_all_lightweight()
return p.remove_all()
......@@ -91,7 +94,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
def show_parsers():
print('[+] Supported formats:')
formats = list()
formats = set()
for parser in parser_factory._get_parsers():
for mtype in parser.mimetypes:
extensions = set()
......@@ -102,11 +105,11 @@ def show_parsers():
# we're not supporting a single extension in the current
# mimetype, so there is not point in showing the mimetype at all
continue
formats.append(' - %s (%s)' % (mtype, ', '.join(extensions)))
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
print('\n'.join(sorted(formats)))
def __get_files_recursively(files):
def __get_files_recursively(files: List[str]) -> Generator[str, None, None]:
for f in files:
if os.path.isdir(f):
for path, _, _files in os.walk(f):
......@@ -141,13 +144,13 @@ def main():
return 0
else:
unknown_member_policy = UnknownMemberPolicy(args.unknown_members)
if unknown_member_policy == UnknownMemberPolicy.KEEP:
policy = UnknownMemberPolicy(args.unknown_members)
if policy == UnknownMemberPolicy.KEEP:
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
no_failure = True
for f in __get_files_recursively(args.files):
if clean_meta([f, args.lightweight, unknown_member_policy]) is False:
if clean_meta(f, args.lightweight, policy) is False:
no_failure = False
return 0 if no_failure is True else -1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment