Commit 545dccc3 authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

In archive-based formats, the `mimetype` file comes first

This should improve epub compatibility,
along with other formats as a side-effect
parent 524bae59
Loading
Loading
Loading
Loading
+10 −2
Original line number Diff line number Diff line
@@ -4,13 +4,14 @@ import tempfile
import os
import logging
import shutil
from typing import Dict, Set, Pattern, Union, Any
from typing import Dict, Set, Pattern, Union, Any, List

from . import abstract, UnknownMemberPolicy, parser_factory

# Make pyflakes happy
assert Set
assert Pattern
assert List
assert Union


@@ -115,9 +116,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
            temp_folder = tempfile.mkdtemp()
            abort = False

            items = list()  # type: List[zipfile.ZipInfo]
            for item in sorted(zin.infolist(), key=lambda z: z.filename):
                if item.filename == 'mimetype':
                    items = [item] + items
                else:
                    items.append(item)

            # Since files order is a fingerprint factor,
            # we're iterating (and thus inserting) them in lexicographic order.
            for item in sorted(zin.infolist(), key=lambda z: z.filename):
            for item in items:
                if item.filename[-1] == '/':  # `is_dir` is added in Python3.6
                    continue  # don't keep empty folders

+4 −0
Original line number Diff line number Diff line
@@ -83,6 +83,8 @@ class TestZipOrder(unittest.TestCase):
            previous_name = ''
            for item in zin.infolist():
                if previous_name == '':
                    if item.filename == 'mimetype':
                        continue
                    previous_name = item.filename
                    continue
                elif item.filename < previous_name:
@@ -97,6 +99,8 @@ class TestZipOrder(unittest.TestCase):
            previous_name = ''
            for item in zin.infolist():
                if previous_name == '':
                    if item.filename == 'mimetype':
                        continue
                    previous_name = item.filename
                    continue
                self.assertGreaterEqual(item.filename, previous_name)