Skip to content
Snippets Groups Projects
mat2 4.45 KiB
Newer Older
  • Learn to ignore specific revisions
  • #!/usr/bin/python3
    
    import os
    
    from typing import Tuple
    import sys
    import itertools
    
    import mimetypes
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    import argparse
    
    import multiprocessing
    
        from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS, check_dependencies
    
    except ValueError as e:
        print(e)
        sys.exit(1)
    
    __version__ = '0.3.0'
    
    def __check_file(filename: str, mode: int=os.R_OK) -> bool:
    
        if not os.path.exists(filename):
            print("[-] %s is doesn't exist." % filename)
            return False
        elif not os.path.isfile(filename):
    
            print("[-] %s is not a regular file." % filename)
            return False
        elif not os.access(filename, mode):
            print("[-] %s is not readable and writeable." % filename)
            return False
        return True
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    def create_arg_parser():
        parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
    
        parser.add_argument('files', nargs='*', help='the files to process')
    
        parser.add_argument('-v', '--version', action='version',
    
                            version='MAT2 %s' % __version__)
    
        parser.add_argument('-l', '--list', action='store_true',
    
                            help='list all supported fileformats')
    
        parser.add_argument('-c', '--check-dependencies', action='store_true',
    
                            help='check if MAT2 has all the dependencies it needs')
    
        info = parser.add_mutually_exclusive_group()
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
        info.add_argument('-s', '--show', action='store_true',
    
                          help='list harmful metadata detectable by MAT2 without removing them')
    
        info.add_argument('-L', '--lightweight', action='store_true',
                          help='remove SOME metadata')
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
        return parser
    
    
    def show_meta(filename: str):
    
        if not __check_file(filename):
            return
    
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    
        if p is None:
    
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
    
        print("[+] Metadata for %s:" % filename)
    
        for k, v in p.get_meta().items():
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
            try:  # FIXME this is ugly.
                print("  %s: %s" % (k, v))
            except UnicodeEncodeError:
                print("  %s: harmful content" % k)
    
    def clean_meta(params: Tuple[str, bool]) -> bool:
    
        filename, is_lightweigth = params
    
        if not __check_file(filename, os.R_OK|os.W_OK):
    
            return False
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    
        if p is None:
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
    
        if is_lightweigth:
    
            return p.remove_all_lightweight()
        return p.remove_all()
    
    def show_parsers():
        print('[+] Supported formats:')
    
        formats = list()
    
        for parser in parser_factory._get_parsers():
            for mtype in parser.mimetypes:
    
                extensions = set()
                for extension in mimetypes.guess_all_extensions(mtype):
    
                    if extension[1:] not in UNSUPPORTED_EXTENSIONS:  # skip the dot
    
                        extensions.add(extension)
                if not extensions:
                    # we're not supporting a single extension in the current
                    # mimetype, so there is not point in showing the mimetype at all
                    continue
    
                formats.append('  - %s (%s)' % (mtype, ', '.join(extensions)))
        print('\n'.join(sorted(formats)))
    
    def __get_files_recursively(files):
        for f in files:
    
                for path, _, _files in os.walk(f):
                    for _f in _files:
    
                        fname = os.path.join(path, _f)
                        if __check_file(fname):
                            yield fname
            elif __check_file(f):
                yield f
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    def main():
    
        arg_parser = create_arg_parser()
        args = arg_parser.parse_args()
    
            if args.list:
                show_parsers()
            elif args.check_dependencies:
                print("Dependencies required for MAT2 %s:" % __version__)
                for key, value in sorted(check_dependencies().items()):
                    print('- %s: %s' % (key, 'yes' if value else 'no'))
            else:
    
                return arg_parser.print_help()
    
            return 0
    
        elif args.show:
    
            for f in __get_files_recursively(args.files):
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
                show_meta(f)
    
            return 0
    
        else:
            p = multiprocessing.Pool()
    
            mode = (args.lightweight is True)
    
            l = zip(__get_files_recursively(args.files), itertools.repeat(mode))
    
            ret = list(p.imap_unordered(clean_meta, list(l)))
            return 0 if all(ret) else -1
    
    Julien (jvoisin) Voisin's avatar
    Julien (jvoisin) Voisin committed
    
    if __name__ == '__main__':