Skip to content
Snippets Groups Projects
Commit 12e2330c authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Remove some useless files

parent acb9b2d1
No related branches found
No related tags found
No related merge requests found
__version__ = '2.0'
class AbstractParser(object):
def __init__(self, filename: str):
self.filename = filename
self.meta_list = set()
def get_meta(self):
raise NotImplementedError
def remove_all(self):
raise NotImplementedError
""" Handle PDF
"""
import os
import logging
import tempfile
import shutil
import io
import cairo
import gi
gi.require_version('Poppler', '0.18')
from gi.repository import Poppler, Gio, GLib
try:
from PIL import Image
except ImportError:
Image = None
from . import abstract
logging.basicConfig(level=logging.DEBUG)
class PDFParser(abstract.AbstractParser):
def __init__(self, filename):
super().__init__(filename)
self.meta_list = {'title', 'author', 'subject',
'keywords', 'creator', 'producer', 'metadata'}
self.uri = 'file://' + os.path.abspath(self.filename)
self.password = None
def remove_all(self):
"""
Load the document into Poppler, render pages on PNG,
and shove those PNG into a new PDF. Metadata from the new
PDF are removed via Poppler, because there is no way to tell
cairo to not add "created by cairo" during rendering.
TODO: Improve the resolution
TODO: Don't use a temp file
"""
document = Poppler.Document.new_from_file(self.uri, self.password)
pdf_out = io.BytesIO()
pdf_surface = cairo.PDFSurface(pdf_out, 128, 128)
pdf_context = cairo.Context(pdf_surface)
for pagenum in range(document.get_n_pages()):
page = document.get_page(pagenum)
page_width, page_height = page.get_size()
logging.info("Rendering page %d/%d", pagenum + 1, document.get_n_pages())
img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width)*2, int(page_height)*2)
img_context = cairo.Context(img_surface)
img_context.scale(2, 2)
page.render_for_printing_with_options(img_context, Poppler.PrintFlags.DOCUMENT)
img_context.show_page()
buf = io.BytesIO()
img_surface.write_to_png(buf)
img_surface.finish()
buf.seek(0)
img = cairo.ImageSurface.create_from_png(buf)
pdf_surface.set_size(page_width*2, page_height*2)
pdf_context.set_source_surface(img, 0, 0)
pdf_context.paint()
pdf_context.show_page()
pdf_surface.finish()
b = GLib.Bytes(pdf_out.getvalue())
input_stream = Gio.MemoryInputStream.new_from_bytes(b)
out_document = Poppler.Document.new_from_stream(input_stream, -1, self.password, None)
metadata = {}
for key in self.meta_list:
if out_document.get_property(key):
metadata[key] = str(out_document.get_property(key))
out_document.set_producer('totally not MAT2 ;)')
out_document.set_creator('')
print("AFTER")
metadata = {}
for key in self.meta_list:
if out_document.get_property(key):
metadata[key] = str(out_document.get_property(key))
print("LOL")
out_document.save('file://' + os.path.abspath("olol.pdf"))
print(metadata)
return True
def get_meta(self):
""" Return a dict with all the meta of the file
"""
print("URI: %s", self.uri)
document = Poppler.Document.new_from_file(self.uri, self.password)
metadata = {}
for key in self.meta_list:
if document.get_property(key):
metadata[key] = str(document.get_property(key))
return metadata
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment