Skip to content
Snippets Groups Projects
Commit 30e567de authored by Julien (jvoisin) Voisin's avatar Julien (jvoisin) Voisin
Browse files

tmp

parent 9826de35
Branches
No related tags found
No related merge requests found
......@@ -12,7 +12,7 @@ assert Set
assert Pattern
def _parse_xml(full_path: str):
""" This function parse XML, with namespace support. """
""" This function parses XML, with namespace support. """
namespace_map = dict()
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
......@@ -22,6 +22,21 @@ def _parse_xml(full_path: str):
return ET.parse(full_path), namespace_map
def _sort_xml_attributes(full_path: str) -> bool:
tree = ET.parse(full_path)
root = tree.getroot()
for c in root:
c[:] = sorted(c, key=lambda child: (child.tag, child.get('desc')))
print('CLENAING %s' % full_path)
xmlstr = ET.tostring(root, encoding="utf-8", method="xml")
print(xmlstr.decode("utf-8"))
tree.write(full_path, xml_declaration=True)
return True
class MSOfficeParser(ArchiveBasedAbstractParser):
mimetypes = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
......@@ -154,6 +169,9 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
return True
def _specific_cleanup(self, full_path: str) -> bool:
if os.path.basename(full_path).endswith('.xml'):
_sort_xml_attributes(full_path)
if os.path.basename(full_path) == 'content.xml':
return self.__remove_revisions(full_path)
return True
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment