From 95169906931e60ace651e8f0aefe0fbe375b2f40 Mon Sep 17 00:00:00 2001
From: jvoisin <julien.voisin@dustri.org>
Date: Sun, 28 Apr 2019 19:25:45 +0200
Subject: [PATCH] Add some verification for "dangerous" tarfiles

---
 libmat2/archive.py            |  53 ++++++++++++++-
 tests/test_corrupted_files.py | 120 ++++++++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+), 1 deletion(-)

diff --git a/libmat2/archive.py b/libmat2/archive.py
index 969bbd8..e6c11c1 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -15,7 +15,7 @@ from . import abstract, UnknownMemberPolicy, parser_factory
 assert Set
 assert Pattern
 
-# pylint: disable=not-callable,assignment-from-no-return
+# pylint: disable=not-callable,assignment-from-no-return,too-many-branches
 
 # An ArchiveClass is a class representing an archive,
 # while an ArchiveMember is a class representing an element
@@ -238,6 +238,57 @@ class TarParser(ArchiveBasedAbstractParser):
     def is_archive_valid(self):
         if tarfile.is_tarfile(self.filename) is False:
             raise ValueError
+        self.__check_tarfile_safety()
+
+    def __check_tarfile_safety(self):
+        """Checks if the tarfile doesn't have any "suspicious" members.
+
+        This is a rewrite of this patch: https://bugs.python.org/file47826/safetarfile-4.diff
+        inspired by this bug from 2014: https://bugs.python.org/issue21109
+        because Python's stdlib doesn't provide a way to "safely" extract
+        things from a tar file.
+        """
+        names = set()
+        with tarfile.open(self.filename) as f:
+            members = f.getmembers()
+        for member in members:
+            name = member.name
+            if os.path.isabs(name):
+                raise ValueError("The archive %s contains a file with an " \
+                        "absolute path: %s" % (self.filename, name))
+            elif os.path.normpath(name).startswith('../') or '/../' in name:
+                raise ValueError("The archive %s contains a file with an " \
+                        "path traversal attack: %s" % (self.filename, name))
+
+            if name in names:
+                raise ValueError("The archive %s contains two times the same " \
+                        "file: %s" % (self.filename, name))
+            else:
+                names.add(name)
+
+            if member.isfile():
+                if member.mode & stat.S_ISUID:
+                    raise ValueError("The archive %s contains a setuid file: %s" % \
+                        (self.filename, name))
+                elif member.mode & stat.S_ISGID:
+                    raise ValueError("The archive %s contains a setgid file: %s" % \
+                            (self.filename, name))
+            elif member.issym():
+                linkname = member.linkname
+                if os.path.normpath(linkname).startswith('..'):
+                    raise ValueError('The archive %s contains a symlink pointing' \
+                            'outside of the archive via a path traversal: %s -> %s' % \
+                            (self.filename, name, linkname))
+                if os.path.isabs(linkname):
+                    raise ValueError('The archive %s contains a symlink pointing' \
+                            'outside of the archive: %s -> %s' % \
+                            (self.filename, name, linkname))
+            elif member.isdev():
+                raise ValueError("The archive %s contains a non-regular " \
+                        "file: %s" % (self.filename, name))
+            elif member.islnk():
+                raise ValueError("The archive %s contains a hardlink: %s" \
+                        % (self.filename, name))
 
     @staticmethod
     def _clean_member(member: ArchiveMember) -> ArchiveMember:
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index b7240fe..1d66556 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import unittest
+import stat
 import time
 import shutil
 import os
@@ -325,6 +326,7 @@ class TestReadOnlyArchiveMembers(unittest.TestCase):
             tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
             tarinfo.mtime = time.time()
             tarinfo.uid = 1337
+            tarinfo.gid = 0
             tarinfo.mode = 0o000
             tarinfo.size = os.stat('./tests/data/dirty.jpg').st_size
             with open('./tests/data/dirty.jpg', 'rb') as f:
@@ -340,3 +342,121 @@ class TestReadOnlyArchiveMembers(unittest.TestCase):
         os.remove('./tests/data/clean.tar')
         os.remove('./tests/data/clean.cleaned.tar')
 
+
+class TestPathTraversalArchiveMembers(unittest.TestCase):
+    def test_tar_traversal(self):
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('./tests/data/dirty.png')
+            tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
+            tarinfo.name = '../../../../../../../../../../tmp/mat2_test.png'
+            with open('./tests/data/dirty.jpg', 'rb') as f:
+                zout.addfile(tarinfo=tarinfo, fileobj=f)
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+
+    def test_tar_absolute_path(self):
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('./tests/data/dirty.png')
+            tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
+            tarinfo.name = '/etc/passwd'
+            with open('./tests/data/dirty.jpg', 'rb') as f:
+                zout.addfile(tarinfo=tarinfo, fileobj=f)
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+
+    def test_tar_duplicate_file(self):
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            for _ in range(3):
+                zout.add('./tests/data/dirty.png')
+                tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
+                with open('./tests/data/dirty.jpg', 'rb') as f:
+                    zout.addfile(tarinfo=tarinfo, fileobj=f)
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+
+    def test_tar_setuid(self):
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('./tests/data/dirty.png')
+            tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
+            tarinfo.mode |= stat.S_ISUID
+            with open('./tests/data/dirty.jpg', 'rb') as f:
+                zout.addfile(tarinfo=tarinfo, fileobj=f)
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+
+    def test_tar_setgid(self):
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('./tests/data/dirty.png')
+            tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
+            tarinfo.mode |= stat.S_ISGID
+            with open('./tests/data/dirty.jpg', 'rb') as f:
+                zout.addfile(tarinfo=tarinfo, fileobj=f)
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+
+    def test_tar_symlink_absolute(self):
+        os.symlink('/etc/passwd', './tests/data/symlink')
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('./tests/data/symlink')
+            tarinfo = tarfile.TarInfo('./tests/data/symlink')
+            tarinfo.linkname = '/etc/passwd'
+            tarinfo.type = tarfile.SYMTYPE
+            with open('./tests/data/dirty.jpg', 'rb') as f:
+                zout.addfile(tarinfo=tarinfo, fileobj=f)
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+        os.remove('./tests/data/symlink')
+
+    def test_tar_symlink_ok(self):
+        shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('./tests/data/dirty.png')
+            t = tarfile.TarInfo('mydir')
+            t.type = tarfile.DIRTYPE
+            zout.addfile(t)
+            zout.add('./tests/data/clean.png')
+            t = tarfile.TarInfo('mylink')
+            t.type = tarfile.SYMTYPE
+            t.linkname = './tests/data/clean.png'
+            zout.addfile(t)
+            zout.add('./tests/data/dirty.jpg')
+        archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.png')
+
+    def test_tar_symlink_relative(self):
+        os.symlink('../../../etc/passwd', './tests/data/symlink')
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('./tests/data/symlink')
+            tarinfo = tarfile.TarInfo('./tests/data/symlink')
+            with open('./tests/data/dirty.jpg', 'rb') as f:
+                zout.addfile(tarinfo=tarinfo, fileobj=f)
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+        os.remove('./tests/data/symlink')
+
+    def test_tar_device_file(self):
+        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
+            zout.add('/dev/null')
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/clean.tar')
+        os.remove('./tests/data/clean.tar')
+
+    def test_tar_hardlink(self):
+        shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
+        os.link('./tests/data/clean.png', './tests/data/hardlink.png')
+        with tarfile.open('./tests/data/cleaner.tar', 'w') as zout:
+            zout.add('tests/data/clean.png')
+            zout.add('tests/data/hardlink.png')
+        with self.assertRaises(ValueError):
+            archive.TarParser('./tests/data/cleaner.tar')
+        os.remove('./tests/data/cleaner.tar')
+        os.remove('./tests/data/clean.png')
+        os.remove('./tests/data/hardlink.png')
-- 
GitLab