Skip to content
Snippets Groups Projects
mat2 4.66 KiB
Newer Older
  • Learn to ignore specific revisions
  • #!/usr/bin/python3
    
    import os
    
    from typing import Tuple
    import sys
    import itertools
    
    import mimetypes
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    import argparse
    
    import multiprocessing
    
    dkg's avatar
    dkg committed
    import logging
    
        from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS, check_dependencies
    
    except ValueError as e:
        print(e)
        sys.exit(1)
    
    __version__ = '0.3.1'
    
    def __check_file(filename: str, mode: int=os.R_OK) -> bool:
    
        if not os.path.exists(filename):
            print("[-] %s is doesn't exist." % filename)
            return False
        elif not os.path.isfile(filename):
    
            print("[-] %s is not a regular file." % filename)
            return False
        elif not os.access(filename, mode):
            print("[-] %s is not readable and writeable." % filename)
            return False
        return True
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    def create_arg_parser():
        parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
    
        parser.add_argument('files', nargs='*', help='the files to process')
    
        parser.add_argument('-v', '--version', action='version',
    
                            version='MAT2 %s' % __version__)
    
        parser.add_argument('-l', '--list', action='store_true',
    
                            help='list all supported fileformats')
    
        parser.add_argument('-c', '--check-dependencies', action='store_true',
    
                            help='check if MAT2 has all the dependencies it needs')
    
    dkg's avatar
    dkg committed
        parser.add_argument('-V', '--verbose', action='store_true',
                            help='show more verbose status information')
    
        info = parser.add_mutually_exclusive_group()
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
        info.add_argument('-s', '--show', action='store_true',
    
                          help='list harmful metadata detectable by MAT2 without removing them')
    
        info.add_argument('-L', '--lightweight', action='store_true',
                          help='remove SOME metadata')
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
        return parser
    
    
    def show_meta(filename: str):
    
        if not __check_file(filename):
            return
    
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    
        if p is None:
    
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
    
        print("[+] Metadata for %s:" % filename)
    
        for k, v in p.get_meta().items():
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
            try:  # FIXME this is ugly.
                print("  %s: %s" % (k, v))
            except UnicodeEncodeError:
                print("  %s: harmful content" % k)
    
    def clean_meta(params: Tuple[str, bool]) -> bool:
    
    dkg's avatar
    dkg committed
        filename, is_lightweight = params
    
        if not __check_file(filename, os.R_OK|os.W_OK):
    
            return False
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    
        if p is None:
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
    
    dkg's avatar
    dkg committed
        if is_lightweight:
    
            return p.remove_all_lightweight()
        return p.remove_all()
    
    def show_parsers():
        print('[+] Supported formats:')
    
        formats = list()
    
        for parser in parser_factory._get_parsers():
            for mtype in parser.mimetypes:
    
                extensions = set()
                for extension in mimetypes.guess_all_extensions(mtype):
    
                    if extension[1:] not in UNSUPPORTED_EXTENSIONS:  # skip the dot
    
                        extensions.add(extension)
                if not extensions:
                    # we're not supporting a single extension in the current
                    # mimetype, so there is not point in showing the mimetype at all
                    continue
    
                formats.append('  - %s (%s)' % (mtype, ', '.join(extensions)))
        print('\n'.join(sorted(formats)))
    
    def __get_files_recursively(files):
        for f in files:
    
                for path, _, _files in os.walk(f):
                    for _f in _files:
    
                        fname = os.path.join(path, _f)
                        if __check_file(fname):
                            yield fname
            elif __check_file(f):
                yield f
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    def main():
    
        arg_parser = create_arg_parser()
        args = arg_parser.parse_args()
    
    dkg's avatar
    dkg committed
        if args.verbose:
            logging.basicConfig(level=logging.INFO)
    
    
            if args.list:
                show_parsers()
            elif args.check_dependencies:
                print("Dependencies required for MAT2 %s:" % __version__)
                for key, value in sorted(check_dependencies().items()):
                    print('- %s: %s' % (key, 'yes' if value else 'no'))
            else:
    
                return arg_parser.print_help()
    
            return 0
    
        elif args.show:
    
            for f in __get_files_recursively(args.files):
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
                show_meta(f)
    
            return 0
    
        else:
            p = multiprocessing.Pool()
    
            mode = (args.lightweight is True)
    
            l = zip(__get_files_recursively(args.files), itertools.repeat(mode))
    
            ret = list(p.imap_unordered(clean_meta, list(l)))
            return 0 if all(ret) else -1
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    
    if __name__ == '__main__':