""" Handle PDF

"""

import os
import re
import logging
import tempfile
import io

import cairo
import gi
gi.require_version('Poppler', '0.18')
from gi.repository import Poppler

from . import abstract

logging.basicConfig(level=logging.DEBUG)


class PDFParser(abstract.AbstractParser):
    mimetypes = {'application/pdf', }
    meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
            'metadata', 'mod-date', 'producer', 'subject', 'title',
            'viewer-preferences'}

    def __init__(self, filename):
        super().__init__(filename)
        self.uri = 'file://' + os.path.abspath(self.filename)
        self.__scale = 2  # how much precision do we want for the render

    def remove_all(self):
        """
            Load the document into Poppler, render pages on PNG,
            and shove those PNG into a new PDF. Metadata from the new
            PDF are removed via Poppler, because there is no way to tell
            cairo to not add "created by cairo" during rendering.
        """
        document = Poppler.Document.new_from_file(self.uri, None)
        pages_count = document.get_n_pages()

        _, tmp_path = tempfile.mkstemp()
        pdf_surface = cairo.PDFSurface(tmp_path, 128, 128)
        pdf_context = cairo.Context(pdf_surface)

        for pagenum in range(pages_count):
            page = document.get_page(pagenum)
            page_width, page_height = page.get_size()
            logging.info("Rendering page %d/%d", pagenum + 1, pages_count)

            img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width) * self.__scale, int(page_height) * self.__scale)
            img_context = cairo.Context(img_surface)

            img_context.scale(self.__scale, self.__scale)
            page.render_for_printing(img_context)
            img_context.show_page()

            buf = io.BytesIO()
            img_surface.write_to_png(buf)
            img_surface.finish()
            buf.seek(0)

            img = cairo.ImageSurface.create_from_png(buf)
            pdf_surface.set_size(page_width*self.__scale, page_height*self.__scale)
            pdf_context.set_source_surface(img, 0, 0)
            pdf_context.paint()
            pdf_context.show_page()

        pdf_surface.finish()

        # Removes metadata added by Poppler
        document = Poppler.Document.new_from_file('file://' + tmp_path)
        document.set_producer('')
        document.set_creator('')
        document.save('file://' + os.path.abspath(self.output_filename))
        os.remove(tmp_path)

        return True


    def __parse_metadata_field(self, data:str) -> dict:
        metadata = {}
        for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
            metadata[key] = value
        return metadata

    def get_meta(self):
        """ Return a dict with all the meta of the file
        """
        document = Poppler.Document.new_from_file(self.uri, None)
        metadata = {}
        for key in self.meta_list:
            if document.get_property(key):
                metadata[key] = document.get_property(key)
        if 'metadata' in metadata:
            parsed_meta =  self.__parse_metadata_field(metadata['metadata'])
            return {**metadata, **parsed_meta}
        return metadata