From 12b3b39d4d5520af04233578ec93138eb192621e Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Sat, 31 Mar 2018 21:20:21 +0200
Subject: [PATCH] Add support for .odt

---
 src/libreoffice.py    |  54 ++++++++++++++++++++++++++++++++++++++++++
 tests/data/dirty.odt  | Bin 0 -> 14114 bytes
 tests/test_libmat2.py |  26 +++++++++++++++++++-
 3 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 src/libreoffice.py
 create mode 100644 tests/data/dirty.odt

diff --git a/src/libreoffice.py b/src/libreoffice.py
new file mode 100644
index 0000000..b7e0dfb
--- /dev/null
+++ b/src/libreoffice.py
@@ -0,0 +1,54 @@
+import re
+import subprocess
+import json
+import zipfile
+import tempfile
+import shutil
+import os
+
+from . import abstract, parser_factory
+
+class LibreOfficeParser(abstract.AbstractParser):
+    mimetypes = {
+            'application/vnd.oasis.opendocument.text',
+    }
+
+    def get_meta(self):
+        """
+        Yes, I know that parsing xml with regexp ain't pretty,
+        be my guest and fix it if you want.
+        """
+        metadata = {}
+        zipin = zipfile.ZipFile(self.filename)
+        for item in zipin.namelist():
+            if item == 'meta.xml':
+                content = zipin.read(item).decode('utf-8')
+                for (key, value) in re.findall(r"<((?:meta|dc).+?)>(.+)</\1>", content, re.I):
+                    metadata[key] = value
+                if not metadata:  # better safe than sorry
+                    metadata[item] = 'harmful content'
+        zipin.close()
+        return metadata
+
+    def remove_all(self):
+        zin = zipfile.ZipFile(self.filename, 'r')
+        zout = zipfile.ZipFile(self.output_filename, 'w')
+        temp_folder = tempfile.mkdtemp()
+
+        for item in zin.infolist():
+            if item.filename[-1] == '/':
+                continue  # `is_dir` is added in Python3.6
+            elif item.filename == 'meta.xml':
+                    continue  # don't keep metadata files
+
+            zin.extract(member=item, path=temp_folder)
+            tmp_parser = parser_factory.get_parser(os.path.join(temp_folder, item.filename))
+            if tmp_parser is None:
+                print("%s isn't supported" % item.filename)
+                continue
+            tmp_parser.remove_all()
+            zout.write(tmp_parser.output_filename, item.filename)
+        shutil.rmtree(temp_folder)
+        zout.close()
+        zin.close()
+        return True
diff --git a/tests/data/dirty.odt b/tests/data/dirty.odt
new file mode 100644
index 0000000000000000000000000000000000000000..926ebff39ef53a4d3cc5caeccb5118383482e8a4
GIT binary patch
literal 14114
zcmeHubzD?i*FPX2-2xJl(#_D_AzdN}2*UuQ0|Ud*f;5+wk`g4OyQBo9MH*DPL_$F6
zM))1{diCDN`+VN_^ZWPRaAxN0v)9^j*7~ls*I7sB))jOz6cjAr|JsSPd6xhlD+&tA
z<pETo*hB3hNDoJdrK2MZ3bsT-;SRj64%R$yOJ}Gv58M&rU=0Vm*h3tUJV=N;Qs-~X
zf@B*VT!>LnE)Q;inGO_;bU{FzdHMN%ErJ5y&jk7Tl@x>o`9X?2jt;h=I$A3DI8-<Q
z9ln~Xq8{)q037^SXuu~$kG(4Jh3%+%#~B5MWD_`&P)+faC{a*|($o~?42;`r?mW=Z
zzdbf9Cg#VCAja@xWBlWuVm${%c<22bws-e2phb9LS}y7Z<Z0q>$mAqAaQ5{eI$X1C
zS$G2En`~$tR(t_}V4`DZ_{m}-0*KngSWizYPSW)@9()$r{X&=QJmn?hRZ{b8znGSw
zi;^LbsEyKDlqQ0|o$oFWMX4AK!HG6lhA*`rUG9~Tyxm8U?8!M-zs<RA9EOfo%BpNZ
zy!Hi0EdSNb&9b7R_0?5btYMJmXmvuuHTl<*laq6Ek3MvS4u!Wbyo~f=rmvd7t7=Q?
z@9)P9XuZeBhbL&$mxN8qqLC(^Fu0Q1Zxcu)N3@|s<BG-?YhKoZ{1mH|hl`E9y|WXm
zo`ctp71XvqRwf}TN`#&{wk{<lm7SH<6-C?2*EfahO--XYdhOaZZf<TZEiE!7&2&kR
zz9fE=5^{#O6BAHNOJZVTO9%u5<=nVr0%pT+XlR%a9}k<Z8}8|WPQ9=P=<xCJtt#B7
z>zqb&HNB2^AV?%Ax;qsFjnyjvQAJ_~FtD>@D^p_NMi^LF%#Ds_>lGY5^Z|ZW*$z@s
zQ>Q(BYQT}udhd_$@ZFsqgo{g)<AOXAS#8X9PI{eMqqDP9uK?K{MH>_p<l*5_Qc}{|
z+L|hCk8pIfb8sNX2@+2mn4L`@T$!4f;Ky@5-dWO?KDKW4#~AXCPn%z84<W?G#jS<U
zIe@5ys3OhFL@4pZ)1rEp-Yo5SpB`YN`;)S29i5)8EH8I;b)B4^wtK3sTIlQRo0+90
zBqTg}@+3W7E7n?5Gjamf?6Us;ZGpb`!-rjw)GDJ$FRw=BY<@~SW;GTT7Oiy2>tGF{
z4mP-a@x2I{qxmaWuZB!cn)uV#-7S@+-`d)m-*&gRx5o^SV~JY=dh6)mGJUb}p)*|O
z{3shJs>}kZ{a9d0Ny(>ApK9nDC@W*iv(eMvyLWGXyaJr3{VW$sC5(Gxc>X<1m>Mtm
z{M;ujYo1&hsFmhR)=*GT*yzq~AjHQ9c+WO@iLkO>WvKS~>REp`JtYNamWfzi1U`#L
zK)|9_Sy@TKKz9B5@z#QZic0tDs@-@nx}5I=yOiZ^cdE!sYG8#1SK^qVASw?@b9eWn
zj!;teun*hr0?8YnM)Tgke~&JQhmS8NCf1)U7{aXfeg?07%l#!jweZKKCCy|3C3>Q}
zEk0j?wc_F7iKdewARthtTnxBMD1fK0r{_<UBIV_3YC6>#aFxG{Oeyj%C#3>Mf{l$$
zv(MK_m~e~dzB*$J2n3RRbS}xty0YA#O1T(L!J97TteiL6<i4HN+-#mVT2oyO^tQjZ
z=X>*&jpOj}u&$1d<KC)n;L!GBx0jcfcDiIJKa0481Ry)NhyrVBgo+n|sqHTJUtzQq
z5u^48gx<8)AwfOI&(AMCJzY*tPFh-edwcu*{G3dw#JGZyfnlLDoPt`!VQ+VLcYhxr
z4=-Llr=p_b=FOWN31ReDPjV=OlO;VWo0_DEnB|BTyQ4$)vl|*3IB%EUi(q18B$B^M
zKqKmPP^_Gd$`n}*h?JSKa@VU@vIYiZx4v-kVxjwIWif^-BDOztc64-n{AjJMt(~cZ
z1;1T-w?Kz2icUsaRP=QeUc0pC-s;-BBKb_mu88ZDi@Pg>hkJWO%Goi4D@=iBXJ<t6
z_iIvmmnz#UTw{BefZ<_dV*`y^Tt8DqcP}F90YIXh9vK<21C?5|%yxD{-W3{JRfq}+
z?XQ=$xFQh#wSbnTiogj733<7=(2c0;(+36y06hb4c7QV`CMMQFjeMpo*8bt4LedqM
zs)@cO1hB%+2t@POgQ=sVBUbHnEOfN6EV~pN%3w>%stK4tGLbx>Jb+kYWva!o01$C}
zazcfN>3%EL+R14<f%EqC%nW>{p>qjgi4*u3SfNHhINM0$dK9;g&;0yQ1BddS`GgTs
zQBmm?7{F&=%3<k}DG>#>-WzXubODIO=*S2#nSmulq%xBn5r915VPPh9j+u#xw~<IB
zplCoZfW#5W_del{>s`VLvKma6!X@Vdw2;q9T|Epy0;)(&O-*Jsc^;#pcYJAOWxVpQ
z?Le{2Y5@Em93C<SKG$QH&wu$+lAXOEBjeu6KpHS1j)b+Xg?0dc#1S7=U0q!P?F_rd
z`7|x<%9SesFo@D<J$O(Q85tQ6aOKD(Z!}HZMV*Vh*Saz=5S=HHlO*(gsTnmXsq;cx
zkXfTEaM!1sy&G$460-8|RM`M{a|bBK#=fUS^6l;I!otGOtE!wWEa>Ff07a6JXaq`O
zA%}p#4lssGaYND8)a2&kl5}3yxO<m|`cuorxfc}rsl>E)ZEbB{&BP?7vC#_#+enje
zV<zc|y?N{HeHSk;iNszZO1$;=CE*bfc(}O0!oBI~;ZN4s-`~G+;|8#Fjc%LDSODHK
z1ma-^WM?zAQPPPa6j|a53^?fM=#nz3tE+QzlnM-NAP_ML2?;T=o+ZS1ta3In_-R~R
z6}X!~My4exi87)>Jx8zT9RS(M$;m8nYCMT_lI}VsMvV;(J{MmPX=!O~2h%Yc3JVG<
zs;d>TloS=;j*ZcYBVW(W(J@%t+UDu76&4l_q)CMEE0^yI3JUUfS@;~^l*iHlW=BO!
zi_cb6SV*7z>WL44`hb2|wD@=*ZCU2%9q)Y_1)2bwvAk?kV&t{IW+<z3`?f6{KKJ(R
z+o>t9g*FUTQOd==J>)(12M-=RcEid%I&u$aef;D}u3iCO7naSN*RLO|GUEg>3Ja4n
zpi9t^be`<5Gr-Y8`0;iD++5Wd1u5yO=Tz3#))p7T=9;}J7r&k!awe!pI6)u~Jv}`Y
zm8+>J!6701U4YdF1n21m$WON1^|MKYlbl>@&aK4r4E-dHyixn97n(VGO`iK4H*Z1!
zlP{n7m>%m!^w`i4ik-3^p!0x-aFqxECvtM~7cX95U|>)#0^bJ*t`Sb!+Jjk2;ehZF
z%YXXxX@~+rpi`TgI`z_H7r>o+!l`spp4D!f=B|t#xtE~LZTbRGjlILext2#VfENIm
z8gcQIYZycGi<OlEB}TaAIJme9^71d6nq0(Zwzl}%`N))7ZIzUi9!krIi#O!vVq*Fk
zO8J}_(-S?Fd1PMnP5{sO^x(6U*8wFSrZB3b+viD)PtdcYZL5l-)#2>>HR2!;5)eD8
zNE-mSP*G9o`+e7C$qWn)`_m*S7|2LSvWD$|yCi*nv<RqtT+he(c@*MM{)2-9VXDZc
zrY6Ri*WKMAS!}V){WewM;ka@{h<X6XDxW`Bq$h%nm!rODv*RYj_RIYQpqI4APGI+<
z8gnd@N>p8aeMKeZ>w=drUjkE;lA;9>rm{9PHbPTeot!Ld0B;G9O1Cf5*J)_*0I<Zz
zhb&!*?!H_(K#sniopEt-DPyZSn3zya%Pz<fbpQgjGWd*+jEp(1hmnbCLaDrS)NRgN
z1UmA3Ys;|?B4>K%&cyimD<Mf+%C6bj^ZoTP09ydS2cQ6eQs9R98t09E+V>nP$_`_T
z)$HLCNXeC(hSrz}+bS^cxWAlfbmKLtemyfoOGej48HPsb<mgxq*tzUsjdaP0sVQn0
zIWe)*#&`vwvJ@1F55`IXbL0+KxnzOP?rv5wv5EeE`-6>%#}DtcI|JCL%m|jMaoe1&
zcU~EIDmH2@fU5tP*<*Exp~<|(c|}Y09RMyc&DI6SJx_0C<zT?YLLy;yc5J-7hg(~|
z(`YC9%p|SC8yg!2Y-9lD6_qvb93I-<8fb|}Fc}J$`<j={RYOUM#9_|>wEW|bKRP-T
zbxO#TD&YM#g0GK9M)WF-?;0CpO%IQc*FCy82OK-fv*B!&L>{v@gM)+p{gBY$G$(_4
z*(i?ZqBH}5O;lHZ^Cp4w_~CgZwaA;^-URifYUrrHzrSp&RfJkBIw_iZ-HV!<YoUq^
z3=Duv3IG)#c~i~ar$BoECED8B`~w02;k8Q{c=HC$UVao5((Xq|i4j0!I32@$3>Z2S
z6B9kMgr`sO67+R-V}Sdw$k+o6q!Xm5aOci-wOCvV9#Cq(^Ku_M2glmxW^Hx#-D<nx
zP*PSvo5^|2@w%?W@!?%@`Cy@bTFZez#1|0BGsfftkX%#avfP&(8X5{HVYwSR8mij)
zv6L?^K7P%M7o`OS#G#7vnL2D_lM@p&O<sxgSfPp|v$J28mV|8jD9bscyCu9&K=A}1
z%@(u0b8lE=1YgD{K-kL6o2b%@-mL*pA?<Ylv*z)i13VJ|05Jr_+xZ?_M<_48dWDjz
z#1{5uU|_bvg>yOzCs#L;5`);s(oWbmK)kA|%CzRbp^J-9o!9gAlHy`T#uyEj08^qP
z07_o*f;4aCjKG_Za{+ho`I|=)>c^wk;1a}bL}vhi2}w%g9TCX`tDKXQ0|3}7<6;s1
zuHHo6#NMT}v^2>ZgoUW(;_`oVFZQPht$*x^11|Bhvs-$6@wTj(HzrT!vqV$U1vU})
z01uIshsU8k7$49E-(Vp@IU>M?0vsbsDk`gW#>c~eo_zlN*|X^e_^zob?i5B-Rm`S)
zH(vx`f0~-S!C){TmCVWG<HoVffb^t1d&Wy1zQDkI>g$V=rd12+B^MD9F*P-f>v=wG
z2becEH#azZ^AZkDH>v;|>-Tx?v*O;If7Hz$Koua?*0O!_)xttTfX5?;6%}Do_kMl<
zaC5HZ;V}Z(Uf9~Q2C{dFa&i^{n=-n9uY?!Pi?n-Vb|Ao7UeO8(HRLc<RP2Dae8uF5
zPjmn1gkLns*A);DfY*MuH>&N3*DC!;W$`7`-aRayM6aEXYO&0uBqY8tQJN^_Y&9({
zX~0keaO&<ZO4A@uO-1FoHQzd#r=6OTLWUEhoV^VgBm`n-f8VUirho1R?Kv)Mlz6~+
zgUfo+?K0yov?_?L?Uf>6Q&Op6!$*m5cDC(LNr;IdrlV`I>8Co_T7i$wwYV~-^6+&5
zOIlD+z$N8BCVjTmo|Tm~D03mn$EQ<ZP+6GR-?6l`By({lNS(Mz7%qG9R(7Z!u*ut<
z6vxNMfa6ijcnu3(2QY6VfXToLk`K%B^@65+Sz9|<&5k}fIjJ_`R*Nm(Jf8@7)N7h8
z8X-k{Exa=(CI;}Ri&>fxOE_{wz_t+ZrU4V5LF#8zZDQ}@;sSx>Xys8$xN@D;;<u<!
z*I(3_aFd1FR9#<_{-YegDNJ<q*VY30aA0!^>~1TZ7L|kKTMxYH4x4Vnnj&Ea5Ws}J
zaNqvW*QX&GqOPva!NU`bfg2=$@W=dD)UOi8OoT}bXs?BNFAL!~u*JD{4MPPT%@4(T
z5O4&6jR0WOE;>veg%hr#Oz*W5%5-#gwkA^-b<sDBm6(o=jfw0?9zH^QKqi``!4g+f
zQ-g8!szPwOxJzMUql7#F&1Q@-5%G4LqoSMSl$*1hOc#v-o@ymVD@#k40F8%fVPO#@
zZ~5Q>H95JBz5Undi=D4QsONL#v**zc&(A!&=WI4d&k1GUsR)_2mhAx13m;DdW2By?
z8@(d}ZUcou=r02;^iW467zM@ssZ>NOjxQgc!sgd7Z+X*3n1t8Pa;uCZ0lM7w1)(br
zi;lcIzUHn#@VfMr4~)!QDTg7|q~gu;d5)Tq5&MmPt3Is^k@2KX4+vJ_9G+Sjj!u|y
zLR@BM!G-JD)EZOY9*RCH4V#_)10P@{C~8Vtily@R{J+O<u&}Vc1%ocH{3y7<4=~&T
zc^T<(w}+M7Hib_Klljb*5rJUizCD#zdG^)g-o|4PSv1(K5Qk_yHMI`+VbwQ@=O?tW
zu^Ozdg^axcG&g)!wzecFV#nZa{;br6`1*(h@+yxMozQERmF78*eB7Tksm+zPG#D9n
z@^Oc@rDcrR7^~s7VB6i&51ufbgx=w>PGY}F@;074u`mZQ!~sTr8p9z0A3m&XxY|AW
z<jaNuBt&XNiE#Ax!Mt&*U3FUFM!D?$V)KH6Hxlht>Zt2kN>8MAth$rWY!igvzksB}
zYo{h$`SlwH!U2_^9*iPbBp!23@gi##TYAfFvtwoS@rsd7c<#wcdBv?tQbGgmT+BOr
z%!M}k+5>-RdIwWY(@TkoNFIs4#1)WwiA%DMW1T9<?-4iG`ut*g<_1D8yx@V@2C<=W
zmFtf174i4C)gw#AgLE=xXTpykCWkl$&PwCX-M1--nm@WDs!S6d!RZtRdUZ{VUZBAV
zD$df()b>=j?sfYfmhi{oPc2^@Hk}kO#C<1LTCc}-F?!rnZlr%Y^9fQ4EvlvsEyxwo
z_~U)&swo&l#^Q^EtMB`i$8kE2wlNBy6c!-V%C#1B`|T^c3mk_T{7%=G6cQrFeI$@$
zdi~Q{L~fyvv9COrL*&1RsE+o^B5#B)-<4<6lX`KKFIvuz>lGG2_~@epkFyFIsL3a1
zl?}a1$}i`gDwQn-RYdBVy<B`>p9-&Ysb_X8&YrHSOuIe7xtX0_|GmehA^ISLj2;y^
zj6A3jgNC3yzsUOf5Wb2NNmo>mp=S}5O&m1~Hs=Vs=bSHT#OCc!wfTn2-B@Jk(DUm>
zcvLUuED(guw^z7j&<>X4m=?7diVDzC$XblhdYBY@xq=Vs<Wi3Bo}jJJlf^x}GL1gg
z_vHF6;~0VBBRRfw7bu+T8t?Weri2zZ)${&XiQpP_hgFX`t$P<Dbi2-<W_opP6N=cM
zdPsn0PdA67ILso#_rHu$)!?@W)G1BQ-1<t7tKNbhsCKonN&UTiZN{X)eb1#x>Q=G%
z)~*>Vb!(%VPm6anGm~HB=eBEPgHE5{l?Vfgm26<kmKDbwTA^U&*7%3V8jutl6?QzW
z(cveX+FaH+e@x@utWD|`$>qVCgk&A$NP9HeI~{qqU5JAOs-C4=!k#dE<mXo56B+1x
zYR(_cg~-WljN-14pC0mw#hyn!xzX15MLQo`DIb?)hB|}ixdPdY0zy*L-{*r|%vVuI
zO;7L7@Xg@!sa8^mMs8kPM>v)sx5}x83wh})vJaI`488%a+95Q@3A+R1LxNAEc?AwE
zRnHRd_&u_KeKd2occ0X*TvjlIqJ-pk1}zr#-)gTbqt@TMCA%zVW$QXOl6k|bKF+!w
zH$x_CUe%({ocBpt5}tpPj3q^4uUt8#V<u^(YU%8q6_IfKyGZ<zr3bA04&MI7%*Ykt
znaz$o@P|2ZNH+W35+CrmYf4+H&5a9go|Cz(qvO0@rHY3l5B&1Xg<w5=!zZ{=iOF3v
zZtSoSJVx}~7>wt!wK&9sJAOmbNPGI5EC)AA!_lF!rEs<tj^lg~tGDl@f?A;UVDuWj
z%r2V6#Z{eKSFu>zU-)OEqoA}A0@3F`W7FTUZB9y(e;2?sG9a9c2YlOGIzVk8&PX1F
zwM|Y;g+l<a`EhG*L)e`8-wDI8wu`U7-|`AWLFX0WadPaLw^`Uo7J=cr;e&Vehp&$p
z#P>G(Mc0PNC5gQTB1+1?z)ZJ#M7FQU#Or=pQ<kFB=6cRu;K*?_ajJ{jx68F`knw<_
zrxNdg<avYwV{YpH36<&Owty8^ZG1kH(dfNm3%P|gg2D<$zZCH>Cy*<H2^uV=RNyfd
zcEC1O*R96I4REwiXAi|gzp|sMT-MKLI~#ZlA35~5a7qZ%7?Fr0A<gsm)F(JATi!?a
z8Q(3k?!Db#=3%<v2ym%NIOZ2Z=Bz=Is|(aG%xd=gEWdsb@B|j?@+rXYfd4W8?~L?-
zL7Xpff5ybLY^sjfXS0m2+HTJ7&O!aS!&7hLjDEBpa{EfQlj@9{lU7WjXllo?7A=C$
z*@j<y%>Kyi<9Nd#Jm4t0+aIK$2ltWwIAPcQ{6m&8#;`j{gfVe=AjOp7)@k0O#~xd|
zQ!OuvQ}LrG1c;pys^m#?qOa>SCEd->xmoJ3PJT=7+U;KdWeskZmt;3Nr=RBxHi2$U
z5HrD6k?k9U?>90+w3KJ#>XA|pA;7s7^9*5H#7o}-8E>XwE$RFS7<#CZxnBQZwW#&W
zqn9{(CA|>JFNdewKEmV(YUS`wM$lX3g}Mb!&gAQ*%mjD39Qm&)FR~$Dij5-keO|8@
z85kU=^t2A?X!5=+be(D?u9!NICCxJ+h{A7wxHfx7V7jTQ<5{YH+QE_a4Ih-O<IVO{
zVRCC}B=<)tKaM9aX<)fa2P?P?REAFenmXBEkOCf9pW)G7j}p9j>upBB(Pfq0pm6FA
zrCAKBEhPp=q8IIaF4H8DUlPtJ)s1ipsFj4<amg|S?MFD@dAxsSuIf^3n#`P5oH4)^
zXb>RFDlu>%llJ!6`Vp^u``f^3J;9lpuDpvtRVL#P^r7n<>xQo7ui5I}#**U%V3A;|
z)o{tmMdFL2*YL@gkmci!65W4Hpj4Us##%6#uGO@b9fl6WqNWmzEf>{?&y8C<1gji~
z$T#E6hR3fDOPDiD;wV3Eh~izDaJqID`9%Kf47l=&+>yTl!G2eg;K^&|xxHJ`;Ns3`
zt-wX}N(cs@?1p;G=!d}UkK(}_v${Qz51Qr0WPKhxyb_TWs##HQQm-LjEpzNLOo&pi
zIujEAnt1J^)NWfw`s5XuU3t^E#ew$aoMv}9@5^<kDKs%tk{6kVMk2~uh>~@q5rHMA
zdH40X=kbs2^qlRUmrEHJ#<N&y%Qjo3&=|E^(<tr!p|IyN6rG5~m!R~+e@1LLLOF>a
zi__EMiwdfm9&Q)<LVKUoZA(6qhU?J3_SSuT9n9s8daY3V55gdQ^M~;UdI1U1@i!S{
zG84$_u+Z_flgf)hoce3ze4fQOvxn%}@dw7D{2R$Q62sARZDUvW)nsMs1gELWC-zI6
z)WvBtMFR$v3izx}-Z_lxZsI=R=tK!dOM5^bt;CYyJyPl4&_Zm))Y;;jJ6!$NhAX@C
zMV2Y0RnYZ6lnm{#b+=EuyB_9Ra<V*2=89M|%CNophFNEa{R>gtHh5xwu|r}l@vZKf
z3(Z2}9M4TFktY=aqX!yR3Yy3DeZhO{a5>ynH3xjPx~?m&?{v_HM!?CHZDeM#!_27F
z5j;g1MDvGFMb-!->MA*`%<lMk(FY{l6Un$hs5a;@Vuws~ImYxHS@L_sNS8tPZ^>5m
zKY?R$55m1qSED1O?~VrPy1l!IiMekCNBe?gJtN-U2%&a{e3Znz*~`w0m9BJ`UW+&B
zje3qPSD7mLl)2|V7ubh#LVwdlwGTf+B7c!c@jCZ4<^2Z~EP5M*<RLLtTwnW))vCt@
z(mqs>RBh!1eZ{bcJqmoPp0D9QYqjt+Q8!0c>UExQBhOM37_`09aGM4XB*OCCGyzS2
z%=QTV$|DWdN5x8r4ZZc8$|w5RLRaZI`*TnCtnaJdGR7_0E*843y)39A$d)=&$JfZ;
zf3TNt&cP@3t{v21+qsyjnLf7kC@-T-6GAqEIgHynx<te_3Q>+u?^TvQ7DC<bZ}w^J
z5BTg&XgX9&iMZ(~NG3-AL`zbNcI}g0oqfv?33yN91Un*Tk27xM*2~%Fps|DlfqqbC
z;e;?=XEn)}Qu~o-`BGkWwC&f2`ej)OS>s!M$}`@+jq16527<Zm8$nn1#RF)+a#nAK
zY_8fkd^B^}b=w)|9$k2L04f~tqjJ12BrKPQlP8xIeN#e-B5EU~fq7w0<<M=W#|r+G
zMlM7cd4K+~QDq>g?JYziO7fmZrqWI72=1OV5jFPm;IJKk`ijtctiS_}2ew^A<nA#Z
za#E>2e${?T29xR3yeJomIq0?#*QY9qZE9QnugoI}X|2jW8_6wRD=yrB>)d_BoLL8@
zWhm`edr}|3Yj)-O$bHj!e7nSP!6zm!u20L-u^r48><_<pvzteQZ!aq@zVH4ZTvU`E
z>fxX@UgTT7=pxiNt}JmtzM$poHyQF#HX_e0cw2$6i*SZgW*TjABF*w_JYX9v(iL6%
z>S$qr&u6XmsDOGg&-C7zyDg>6;k~oz$n9NjKA#4`h!rWy!AAb{Ikd$UiFWtlfbA^m
z#ZGR%^to$Yx~y%w6wSljLuS(y4U0D(cF$l%!~LB=^@5M{r0BI9<<gr;KI+FtKV$&u
zy9dr}*&AA<3Zv^;h`|Dfjv9Nhb?uA1U(XB|w^3w!4rv;sZ!b5!eg+<#I?4##nYQJ#
z+jPXg6SK$uy73zF196N|Kq<s?Q+I>3u71i~(qj{Bw)IxegmS1xD3YQAG}mlty3%w^
z4gIVSJkyyU6X173cf+X0a2sW+JNy`Uk=d7>5%6h4Z$RfjvQ@itxxc<OA~yoA4`N&s
zDt$sQ4`pP&%CxrWu~o#XR!_jgOZ#kP&{pNu!N_*(`|5`jWYC%iYG3r0-()vLs>F^e
zfN5v1*3bpK#S2T&dKzRSkgky~MKit2c1O^+U4u`fs`F()sKXX<Wn2i$MOpldFST-(
z-8Y}Zn-2E2%RZ;3dx(97TAgE@dp$}FNHb&4K0?^|1U6U9&gWzZlktMfSUwsE38cJR
zlZ!|0Z}*N(gx}%C80ZL@u|R40N_y#Zg~{Qq&;!W>a$G;1uJ73eGdl_dT!1@vd3?(*
zfFLa|y$VSg;Dg>3f^Y`%3#1tNdH5LVAr4@;HPpdYiqX(OnOl@mMjA&FZes%lL&Sfi
z9B>1<36}H#se`ll_e)ZYE(izlZ^;Sb4wm*1XK^H0{MRf6@n2huf9vRbjXMnL@IZ>u
z4vBOW=j8>$NIY(WJaB|9FF){{llR+I#y`8V2LIL0(FFnf*2)^p3xPo{^B{oi1|ELi
zA8jv3_&*)gW!qne1c$@_>gaL~->3MkJ0N2M#QXjHM^Cm0YirnF?SGj7&{mMwme<-6
zY02#hg}5;>(*L=Rzbv=_BmFl9;z)$0gR>1BVgD`H<1c=|k^|`;mq{PomqYnBi0@p0
z=?d_=JHwE?aBCX@z<cHaP8g+s+^fq;NlSj48q@)bgj&M5!3c;Y5{{6*?+S-HLmlWP
zd4GHPJJGksfK+e;<A+EK@bU9=^NDZ^2pjMTiVKK{gGB$N{(Cz~Yq0o#5P-x%eB%7V
zlDt1IUbgzr!TeJBeFzY1VB8M2+}19LZ;K+WV<5z@|7XAdaq;goU=J|hIZF%tqxGNF
z-`m<k93TK^U_~^bRtSjpHzDU0<Pl^xbbz|k|59lIM2hZ1z(`&}K|XtaF=lxeD9oB$
zfbR$UpPasP{!3Auk-*5Ekx(%Gx1~l}TEQUPV7Q9|Qi>7KweRb1Z)y8iCI6q5aI4Gy
z{@K*=H!U445tg<HOGmpOjeoFkgCnee)B$4ltuL@0&>D<{Abwon=V#=Vmi$ws#V_0Z
zJo{@F->QFX<Gv$`IWI#4F@PqQ2ax=OVqoWDZ{=VKg*o#gfBgKHY@#j{6jYRN*+jN7
z=@cj^SH>?BhuoijeqZq*&5*`-A$7{`N&%H>Y8X3#IOxN?O$IF&hAAh3d<f4us=D0t
z+n!-FiZq;a)cz#bVRo|cIDcei4lQ$voyOx})y@6)!`nOlHIjo{X$wymXxxvt`=h!S
zQIR$m>$qEWF=YH|#Bu+Vj|ylU)x~*#-tA?cw7V_mXWg4pZi`j>{o5ZV4tZ(l2rvnK
znooL8dfd{RL`<45Mq_Z{O`?ZeX9r`?<{lpJtd;P#n0g#ew}T~fb2aZTZwc;Rt=)w6
z(}<maPAGdX9Bez#H+#3y)vmhSbNk(Bo^YAN#_Y_j$JddG&9PE@+Kr2y#k7*vvl(6=
z+RwZt&k5&s1^45{#~WqbJ_LbfrYC2oy~^ET<yE`gqv5#Pv?&|-(ak#}Uo1G`UmR%6
zJgwY7lmmE}A&FXekER|Wd~7xc1BO4U9%J5M*Virx8k0V`S6OPF{&3*J%kB6xVIpg<
z%<k$3>e0EV3x<}J4rV0{z5o08zcu*(k%MzgyLl^WK@(5~fDiwObORBOzarh2d4-qd
zGWAgb?qCD8{S)t<1$ZsNV8Fb818H8c3j*<H6#U9xL8b3#^o{<{51QYw0pQyf$bWRU
zvPAIy9mjs9yu`784!{Nm2khT}qoVo^l_T8I#Sw5yT>eXMzoSFK;V?%_2MFxH(EV<d
zmtFnu=XTlKf2X4R&D@}u=E|8@9*VTLbaehN1O43uFYTerFSp@$fv?M`pfixo2$=qF
zp|2*BDa52Ik@sAg<0skx-k!;LP4)T5L|_HfC{7RR=a@FvHH&PNG0%Oxn771zX&6DL
zA1irgq`bxQ8aw;*Ak13kve2lV_Yx8%9<20N@amt=dTM98F1<N@d$f+pN_s<9s)IgM
zHYE%byh`jXUv|rE&|%IqG}gsBWxc_gc(nn0jWS^RrO~5Pq?F+kvpYCEE48zINp<EW
zJmle{ot^B3(EW{*lZKSp8A+;)r(2&g3oG;H3gtU)D;)&&B^gY3l(^pMKR?uZuiWg^
zUq3c}Wa)g?)o&tqkk^K<_+Er{z4v{!X|%z63sElfrsdCGn`3Uxqr1&$!foSI)BHtA
zU$El!%XAwHzJ!V1g;B6{1x?%DNSQEiU7=X1iSEX}z<DS>1wWX07t*_KH4-9t7mxlq
zO~Ctz9`;pke6S*_h9`bD30rP9Qz{Mx)eXKt$xoy#d#xWhiNiVX2A2mqj@>yW<?;?u
zS12KH_F$bBFr@A*sj}yJMnMR1_~M;!b}b_FTIy|fiW(D3un-+CTZ@riMb2$k4wuKK
zj4x1lqhn_j!F+E@T3W)fFG49JRo}W+_Uj~PZ{_zIB_)xEF~?2`nLXKK`S7wjYj*!u
zDhB2|pQ*w77$uvl3|Lrrf%ZxiI7!VbA50y48k@R`z7i6N+!)B62vX!%m_B2zUc>bZ
zmu^iu4qhaWL;YiCSUO?OmOmfJKfz*|DBTm-X(y~Y+Gim}$nrpGGZ~~plv<>n+rv(7
z#P12&Rz7?EiWw?uMr)1oA~C>Tk5lvf4`M~}Ej^3O%=`A)lfknB8k>3#zLJIXZLDhw
zt2zhXHc?ITZ40$fmCSEi>I6M}2}AU}eN%LUZOxf1H6dF=`x+j0HagLly8L%WS!Au7
zw1R!<%|W^y)>>EGd{x+BDIpok<)Fc^PN@m%i%}JdC#OzaM%q;IHBAr4NHq?Ut)MOh
zU~=QD@uh*7S0QH|_l(pDG7ucPa;cp5`Bs?<8XTG`(k#Q2ZzWVXc(XU=7+5&QQ9}db
z3d6Z*P;hm_5~$^;qpIP-!HoJol9M!ws{->#94QpJh$Hzpd2;w|D8%Jh0EWY|lH%TF
zV;mkzPppMKkw~Ih$neY)=S3Zq0MF5ha6W5ay&qn+wCHg9&|Sz{^UA^li*ghjHY^9)
zanF!wyXKpdTRm?ujbxk!z3vHEW)=AM)8;et)y}+mv_ae`mml?|lK+p!{WpXCYMzXD
z-KYp<EAY={c4h_1;%I{&Sb2wKBVI5Sgyd%4)-62`)`;G2xT|-LzC}vrV?Ki3CNC0J
zDW{nkRnY1x>^2@5k=;CUKpVm0p~OzgIpk_+cEWR0s*{3aaYAC^kzxSLkpam)Lj10`
zmOF=RE3XO8I0-{lxH;<4CQ*mD92$w{C?WOn;?=?pZMk?5?!rdSu6+S8jwX3u9!^|Z
zLDo~2G#uJ^>klID%<ro3^1F7=1yK;mz)o|`TnEdBGwVZNLE^?8;_)YN9}<h&l-;%K
z_O#{9#J3_)Hlwg+bKh9MZM_STUBy_a%2F{jEgs29Bqdj7FfL{=L$8*_GVLBXAb*il
zJI85XK4OR%=zem2SS2WfR>_JqKWSk9uw<Vjv}3w`0R13DBCc2naYw!&+l~!;nOrWd
z_(o-@6Mj0AMmHw4$8F9GiB9rQxedr|_S0%(7zu_EZfn#RweTr|Gi9Hh(N0I<M3SL9
zB3kz084{JfHMq@H^oLTXRC96Mev_R&l}@vzpH+xDvxe-Row{T<?5_LWpWWo3x<I=O
z3eZ8$<i#;iP?8yb1O<MFxz_;9)l@Q&<5tsB=KbrD46sA5c{1nFMo-qYc}Rv|8{WD0
zge&r$wL{}<kQd7Hyo5kBzkuOdI-&DV!Xmesn%Ez=Z=T|;kg-&0GQ|lQvsQDcJ|R#;
z3tQLAZ=k0OuWcnw*4$*l*Uv%=InIm;^ms$th_P+7lp?l(=%jKK+qp}vle6568gx8%
z^wKb8_9m<2O=dj#j2RYIel`s1`S-GNN&I?_!L6)jMFKO>9J5d|%Ycud#$Chg0mr2r
z8p_}ze;s)*4<h^`KPtm#-A%Yrv;Nr=<);xUr*2Q02I#sc+HXxVo~T*qY^XiB9TrS0
zroCih*_duGE92l62fH(K!Qn)COX00%lf<;T<89*$>M4{;&I}3?n~I%&J$?q-PK8~Y
z`eDDhK#{RRlA7Aa85gEEqF1pqi@29&C)WjXTa3t1!7j9Guet9q`>Ed#ChBbxo(;6f
zhz?LG$+5(-rg`{4a<ZXzHoMhr>GNvUtKei+$cwQ*+FV}`S;Xzm5rS6?O-~Cb*f;N=
z4R+ZRqR8SUCN2{5Wy*Zv8P&eLn-_go!oyKfP!517j?OJqG%}Qbe}x3NrN5LPFO>X~
z>Ywk5{3Ou?e*UlC9Qncehk*V15b!dM;b*}o`CC%M55gZ_;Xj`d{v`ZF{`V(_KPZ26
z`D+O5@&Ve<f(iIdKOd+4p!@Oq%|8QVKk3ZrfgXWv@nyK|pH%<c1plNeXa4ss@lP(l
z_kn+MIb{1!hxA`O;-5@^?-BoGD$V|%O#kj5|6QzpQhwj*|2%j94X2;}^1pMrbPRu%
z(%*3T)qnn<@>%{3pI`myf9G@QQU5GAIR0~ee)`w{Q!WO-;quep{&y~y9{0}@_8Tt$
z;eY=-v%fEN_ivc}>W_ct^Fxz<wHucn`Ok6z@cGq0{~>xmD1XJMOS|&3IP?9BmC?C{
UfqB`C1o-O(B1&?XZ=9k0KS>F)Q~&?~

literal 0
HcmV?d00001

diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 717de3f..743a845 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -4,7 +4,7 @@ import unittest
 import shutil
 import os
 
-from src import pdf, png, jpg, audio, office
+from src import pdf, png, jpg, audio, office, libreoffice
 
 class TestGetMeta(unittest.TestCase):
     def test_pdf(self):
@@ -46,6 +46,14 @@ class TestGetMeta(unittest.TestCase):
         self.assertEqual(meta['dc:creator'], 'julien voisin')
         self.assertEqual(meta['Application'], 'LibreOffice/5.4.5.1$Linux_X86_64 LibreOffice_project/40m0$Build-1')
 
+    def test_libreoffice(self):
+        p = libreoffice.LibreOfficeParser('./tests/data/dirty.odt')
+        meta = p.get_meta()
+        self.assertEqual(meta['meta:initial-creator'], 'jvoisin ')
+        self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48')
+        self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
+
+
 
 class TestCleaning(unittest.TestCase):
     def test_pdf(self):
@@ -153,3 +161,19 @@ class TestCleaning(unittest.TestCase):
         self.assertEqual(p.get_meta(), {})
 
         os.remove('./tests/data/clean.docx')
+
+
+    def test_libreoffice(self):
+        shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
+        p = libreoffice.LibreOfficeParser('./tests/data/clean.odt')
+
+        meta = p.get_meta()
+        self.assertIsNotNone(meta)
+
+        ret = p.remove_all()
+        self.assertTrue(ret)
+
+        p = libreoffice.LibreOfficeParser('./tests/data/clean.odt.cleaned')
+        self.assertEqual(p.get_meta(), {})
+
+        os.remove('./tests/data/clean.odt')
-- 
GitLab