Hello! We are running our annual fundraising. Please consider making a donation if you value this freely available service or want to support people around the world working towards liberatory social change. https://riseup.net/donate.

Commit 545dccc3 authored by jvoisin's avatar jvoisin
Browse files

In archive-based formats, the `mimetype` file comes first

This should improve epub compatibility,
along with other formats as a side-effect
parent 524bae59
Pipeline #22978 failed with stages
in 3 minutes and 52 seconds
...@@ -4,13 +4,14 @@ import tempfile ...@@ -4,13 +4,14 @@ import tempfile
import os import os
import logging import logging
import shutil import shutil
from typing import Dict, Set, Pattern, Union, Any from typing import Dict, Set, Pattern, Union, Any, List
from . import abstract, UnknownMemberPolicy, parser_factory from . import abstract, UnknownMemberPolicy, parser_factory
# Make pyflakes happy # Make pyflakes happy
assert Set assert Set
assert Pattern assert Pattern
assert List
assert Union assert Union
...@@ -115,9 +116,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): ...@@ -115,9 +116,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
temp_folder = tempfile.mkdtemp() temp_folder = tempfile.mkdtemp()
abort = False abort = False
items = list() # type: List[zipfile.ZipInfo]
for item in sorted(zin.infolist(), key=lambda z: z.filename):
if item.filename == 'mimetype':
items = [item] + items
else:
items.append(item)
# Since files order is a fingerprint factor, # Since files order is a fingerprint factor,
# we're iterating (and thus inserting) them in lexicographic order. # we're iterating (and thus inserting) them in lexicographic order.
for item in sorted(zin.infolist(), key=lambda z: z.filename): for item in items:
if item.filename[-1] == '/': # `is_dir` is added in Python3.6 if item.filename[-1] == '/': # `is_dir` is added in Python3.6
continue # don't keep empty folders continue # don't keep empty folders
......
...@@ -83,6 +83,8 @@ class TestZipOrder(unittest.TestCase): ...@@ -83,6 +83,8 @@ class TestZipOrder(unittest.TestCase):
previous_name = '' previous_name = ''
for item in zin.infolist(): for item in zin.infolist():
if previous_name == '': if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename previous_name = item.filename
continue continue
elif item.filename < previous_name: elif item.filename < previous_name:
...@@ -97,6 +99,8 @@ class TestZipOrder(unittest.TestCase): ...@@ -97,6 +99,8 @@ class TestZipOrder(unittest.TestCase):
previous_name = '' previous_name = ''
for item in zin.infolist(): for item in zin.infolist():
if previous_name == '': if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename previous_name = item.filename
continue continue
self.assertGreaterEqual(item.filename, previous_name) self.assertGreaterEqual(item.filename, previous_name)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment