Skip to content
Snippets Groups Projects
mat2 5.52 KiB
Newer Older
  • Learn to ignore specific revisions
  • #!/usr/bin/env python3
    
    import os
    
    from typing import Tuple, Generator, List
    
    import mimetypes
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    import argparse
    
    dkg's avatar
    dkg committed
    import logging
    
        from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
        from libmat2 import check_dependencies, UnknownMemberPolicy
    
    except ValueError as e:
        print(e)
        sys.exit(1)
    
    __version__ = '0.4.0'
    
    # Make pyflakes happy
    assert Tuple
    
    
    
    def __check_file(filename: str, mode: int=os.R_OK) -> bool:
    
        if not os.path.exists(filename):
            print("[-] %s is doesn't exist." % filename)
            return False
        elif not os.path.isfile(filename):
    
            print("[-] %s is not a regular file." % filename)
            return False
        elif not os.access(filename, mode):
            print("[-] %s is not readable and writeable." % filename)
            return False
        return True
    
    def create_arg_parser() -> argparse.ArgumentParser:
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
        parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
    
        parser.add_argument('files', nargs='*', help='the files to process')
    
        parser.add_argument('-v', '--version', action='version',
    
                            version='MAT2 %s' % __version__)
    
        parser.add_argument('-l', '--list', action='store_true',
    
                            help='list all supported fileformats')
    
        parser.add_argument('--check-dependencies', action='store_true',
    
                            help='check if MAT2 has all the dependencies it needs')
    
    dkg's avatar
    dkg committed
        parser.add_argument('-V', '--verbose', action='store_true',
                            help='show more verbose status information')
    
        parser.add_argument('--unknown-members', metavar='policy', default='abort',
    
                            help='how to handle unknown members of archive-style files (policy should' +
    
                            ' be one of: %s)' % ', '.join(p.value for p in UnknownMemberPolicy))
    
        info = parser.add_mutually_exclusive_group()
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
        info.add_argument('-s', '--show', action='store_true',
    
                          help='list harmful metadata detectable by MAT2 without removing them')
    
        info.add_argument('-L', '--lightweight', action='store_true',
                          help='remove SOME metadata')
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
        return parser
    
    
    def show_meta(filename: str):
    
        if not __check_file(filename):
            return
    
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    
        if p is None:
    
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
    
    Simon Magnin-Feysot's avatar
    Simon Magnin-Feysot committed
        print("[+] Metadata for %s:" % filename)
        metadata = p.get_meta().items() # type: dict
    
        __print_meta(metadata)
    
    def __print_meta(metadata: dict):
    
        for k, v in metadata:
    
            if not isinstance(v, dict):
                try:  # FIXME this is ugly.
    
                    print("  %s: %s" % (k, v))
    
                except UnicodeEncodeError:
                    print("  %s: harmful content" % k)
            else:
                __print_meta(v)
        return
    
    def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool:
    
        if not __check_file(filename, os.R_OK|os.W_OK):
    
            return False
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    
        if p is None:
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
    
        p.unknown_member_policy = policy
    
    dkg's avatar
    dkg committed
        if is_lightweight:
    
            return p.remove_all_lightweight()
        return p.remove_all()
    
    def show_parsers():
        print('[+] Supported formats:')
    
        for parser in parser_factory._get_parsers():
            for mtype in parser.mimetypes:
    
                extensions = set()
                for extension in mimetypes.guess_all_extensions(mtype):
    
                    if extension not in UNSUPPORTED_EXTENSIONS:
    
                        extensions.add(extension)
                if not extensions:
                    # we're not supporting a single extension in the current
                    # mimetype, so there is not point in showing the mimetype at all
                    continue
    
                formats.add('  - %s (%s)' % (mtype, ', '.join(extensions)))
    
        print('\n'.join(sorted(formats)))
    
    def __get_files_recursively(files: List[str]) -> Generator[str, None, None]:
    
                for path, _, _files in os.walk(f):
                    for _f in _files:
    
                        fname = os.path.join(path, _f)
                        if __check_file(fname):
                            yield fname
            elif __check_file(f):
                yield f
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    def main():
    
        arg_parser = create_arg_parser()
        args = arg_parser.parse_args()
    
    dkg's avatar
    dkg committed
        if args.verbose:
            logging.basicConfig(level=logging.INFO)
    
    
            if args.list:
                show_parsers()
            elif args.check_dependencies:
                print("Dependencies required for MAT2 %s:" % __version__)
                for key, value in sorted(check_dependencies().items()):
                    print('- %s: %s' % (key, 'yes' if value else 'no'))
            else:
    
                return arg_parser.print_help()
    
            return 0
    
        elif args.show:
    
            for f in __get_files_recursively(args.files):
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
                show_meta(f)
    
            return 0
    
            policy = UnknownMemberPolicy(args.unknown_members)
            if policy == UnknownMemberPolicy.KEEP:
    
                logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
    
            no_failure = True
    
            for f in __get_files_recursively(args.files):
    
                if clean_meta(f, args.lightweight, policy) is False:
    
                    no_failure = False
            return 0 if no_failure is True else -1
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    
    if __name__ == '__main__':