Skip to content
Snippets Groups Projects
Commit 8591558b authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

Minor code cleanup

parent f9943c51
No related branches found
No related tags found
1 merge request!39Add tar archive support
Pipeline #24293 passed
......@@ -16,6 +16,9 @@ assert Pattern
# pylint: disable=not-callable,assignment-from-no-return
# An ArchiveClass is a class representing an archive,
# while an ArchiveMember is a class representing an element
# (usually a file) of an archive.
ArchiveClass = Union[zipfile.ZipFile, tarfile.TarFile]
ArchiveMember = Union[zipfile.ZipInfo, tarfile.TarInfo]
......@@ -57,7 +60,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
self.is_archive_valid()
def is_archive_valid(self):
"""Raise a ValueError is the current file isn't valid."""
"""Raise a ValueError is the current archive isn't a valid one."""
def _specific_cleanup(self, full_path: str) -> bool:
""" This method can be used to apply specific treatment
......@@ -104,10 +107,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
temp_folder = tempfile.mkdtemp()
for item in self._get_all_members(zin):
local_meta = dict() # type: Dict[str, Union[str, Dict]]
for k, v in self._get_member_meta(item).items():
local_meta[k] = v
local_meta = self._get_member_meta(item)
member_name = self._get_member_name(item)
if member_name[-1] == '/': # pragma: no cover
......@@ -118,13 +118,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
full_path = os.path.join(temp_folder, member_name)
specific_meta = self._specific_get_meta(full_path, member_name)
for (k, v) in specific_meta.items():
local_meta[k] = v
local_meta = {**local_meta, **specific_meta}
tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore
if tmp_parser:
for k, v in tmp_parser.get_meta().items():
local_meta[k] = v
member_parser, _ = parser_factory.get_parser(full_path) # type: ignore
if member_parser:
local_meta = {**local_meta, **member_parser.get_meta()}
if local_meta:
meta[member_name] = local_meta
......@@ -144,7 +142,6 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# Sort the items to process, to reduce fingerprinting,
# and keep them in the `items` variable.
items = list() # type: List[ArchiveMember]
for item in sorted(self._get_all_members(zin), key=self._get_member_name):
# Some fileformats do require to have the `mimetype` file
# as the first file in the archive.
......@@ -175,8 +172,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
elif any(map(lambda r: r.search(member_name), self.files_to_omit)):
continue
else: # supported files that we want to first clean, then add
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not tmp_parser:
member_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not member_parser:
if self.unknown_member_policy == UnknownMemberPolicy.OMIT:
logging.warning("In file %s, omitting unknown element %s (format: %s)",
self.filename, member_name, mtype)
......@@ -190,15 +187,15 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
self.filename, member_name, mtype)
abort = True
continue
if tmp_parser:
if tmp_parser.remove_all() is False:
else:
if member_parser.remove_all() is False:
logging.warning("In file %s, something went wrong \
with the cleaning of %s \
(format: %s)",
self.filename, member_name, mtype)
abort = True
continue
os.rename(tmp_parser.output_filename, full_path)
os.rename(member_parser.output_filename, full_path)
zinfo = self.member_class(member_name) # type: ignore
clean_zinfo = self._clean_member(zinfo)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment