Skip to content
Snippets Groups Projects
Select Git revision
  • 6b7e8ad8c04df96180e825a6e0afdf5781c08243
  • master default
  • fix_heic
  • 0.13.5 protected
  • 0.13.4 protected
  • 0.13.3 protected
  • 0.13.2 protected
  • 0.13.1 protected
  • 0.13.0 protected
  • 0.12.4 protected
  • 0.12.3 protected
  • 0.12.2 protected
  • 0.12.1 protected
  • 0.12.0 protected
  • 0.11.0 protected
  • 0.10.1 protected
  • 0.10.0 protected
  • 0.9.0 protected
  • 0.8.0 protected
  • 0.7.0 protected
  • 0.6.0 protected
  • 0.5.0 protected
  • 0.4.0 protected
23 results

mat2

Blame
  • mat2 4.66 KiB
    #!/usr/bin/python3
    
    import os
    from typing import Tuple
    import sys
    import itertools
    import mimetypes
    import argparse
    import multiprocessing
    import logging
    
    try:
        from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS, check_dependencies
    except ValueError as e:
        print(e)
        sys.exit(1)
    
    __version__ = '0.3.1'
    
    def __check_file(filename: str, mode: int=os.R_OK) -> bool:
        if not os.path.exists(filename):
            print("[-] %s is doesn't exist." % filename)
            return False
        elif not os.path.isfile(filename):
            print("[-] %s is not a regular file." % filename)
            return False
        elif not os.access(filename, mode):
            print("[-] %s is not readable and writeable." % filename)
            return False
        return True
    
    
    def create_arg_parser():
        parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
        parser.add_argument('files', nargs='*', help='the files to process')
        parser.add_argument('-v', '--version', action='version',
                            version='MAT2 %s' % __version__)
        parser.add_argument('-l', '--list', action='store_true',
                            help='list all supported fileformats')
        parser.add_argument('-c', '--check-dependencies', action='store_true',
                            help='check if MAT2 has all the dependencies it needs')
        parser.add_argument('-V', '--verbose', action='store_true',
                            help='show more verbose status information')
    
    
        info = parser.add_mutually_exclusive_group()
        info.add_argument('-s', '--show', action='store_true',
                          help='list harmful metadata detectable by MAT2 without removing them')
        info.add_argument('-L', '--lightweight', action='store_true',
                          help='remove SOME metadata')
        return parser
    
    
    def show_meta(filename: str):
        if not __check_file(filename):
            return
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
        if p is None:
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
            return
    
        print("[+] Metadata for %s:" % filename)
        for k, v in p.get_meta().items():
            try:  # FIXME this is ugly.
                print("  %s: %s" % (k, v))
            except UnicodeEncodeError:
                print("  %s: harmful content" % k)
    
    def clean_meta(params: Tuple[str, bool]) -> bool:
        filename, is_lightweight = params
        if not __check_file(filename, os.R_OK|os.W_OK):
            return False
    
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
        if p is None:
            print("[-] %s's format (%s) is not supported" % (filename, mtype))
            return False
        if is_lightweight:
            return p.remove_all_lightweight()
        return p.remove_all()
    
    
    def show_parsers():
        print('[+] Supported formats:')
        formats = list()
        for parser in parser_factory._get_parsers():
            for mtype in parser.mimetypes:
                extensions = set()
                for extension in mimetypes.guess_all_extensions(mtype):
                    if extension[1:] not in UNSUPPORTED_EXTENSIONS:  # skip the dot
                        extensions.add(extension)
                if not extensions:
                    # we're not supporting a single extension in the current
                    # mimetype, so there is not point in showing the mimetype at all
                    continue
                formats.append('  - %s (%s)' % (mtype, ', '.join(extensions)))
        print('\n'.join(sorted(formats)))
    
    
    def __get_files_recursively(files):
        for f in files:
            if os.path.isdir(f):
                for path, _, _files in os.walk(f):
                    for _f in _files:
                        fname = os.path.join(path, _f)
                        if __check_file(fname):
                            yield fname
            elif __check_file(f):
                yield f
    
    def main():
        arg_parser = create_arg_parser()
        args = arg_parser.parse_args()
    
        if args.verbose:
            logging.basicConfig(level=logging.INFO)
    
        if not args.files:
            if args.list:
                show_parsers()
            elif args.check_dependencies:
                print("Dependencies required for MAT2 %s:" % __version__)
                for key, value in sorted(check_dependencies().items()):
                    print('- %s: %s' % (key, 'yes' if value else 'no'))
            else:
                return arg_parser.print_help()
            return 0
    
        elif args.show:
            for f in __get_files_recursively(args.files):
                show_meta(f)
            return 0
    
        else:
            p = multiprocessing.Pool()
            mode = (args.lightweight is True)
            l = zip(__get_files_recursively(args.files), itertools.repeat(mode))
    
            ret = list(p.imap_unordered(clean_meta, list(l)))
            return 0 if all(ret) else -1
    
    if __name__ == '__main__':
        sys.exit(main())