thr3ads.net - Secure testing commits - [Secure-testing-commits] r14616

If this information is useful, please help other people find it:
Share via:
Florian Weimer
2010-May-06 14:04 UTC
[Secure-testing-commits] r14616 - lib/python

Author: fw
Date: 2010-05-06 14:04:13 +0000 (Thu, 06 May 2010)
New Revision: 14616

Added:
   lib/python/repo.py
Log:
lib/python/repo.py: mirror of Debian repository metadata

This version is still somewhat preliminary.
No package diffs support for now; we can use a close-by mirror instead.


Added: lib/python/repo.py
==================================================================---
lib/python/repo.py	                        (rev 0)
+++ lib/python/repo.py	2010-05-06 14:04:13 UTC (rev 14616)
@@ -0,0 +1,274 @@
+# repo.py -- mirror Debian repository metadata
+# Copyright (C) 2010 Florian Weimer <fw at deneb.enyo.de>
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+from __future__ import with_statement
+
+import bz2
+import hashlib
+import gzip
+import os
+import re
+import tempfile
+import urllib
+
+import xpickle
+import debian_support
+import parsers
+
+MARKER_NAME = "DEBIAN_REPO_MIRROR"
+
+_re_name = re.compile(r''^[a-z0-9-]+$'')
+_re_hashentry =
re.compile(''^\s*([0-9a-fA-F]{20,})\s+(\d+)\s+(\S+)$'')
+
+def _splitfield(data, field):
+    tup = tuple(data[field].strip().split())
+    if tup == ():
+        data[field] = ('''',)
+    else:
+        data[field] = tup
+
+def _splithashes(path, data, field):
+    result = {}
+    for line in data[field].split(''\n''):
+        if line == "":
+            continue
+        match = _re_hashentry.match(line)
+        if match is None:
+            raise ValueError("invalid line in %r: %r" % (path, line))
+        digest, size, name = match.groups()
+        result[name] = digest
+    data[field] = result
+
+def parserelease(path, f):
+    data = {}
+    for p in debian_support.PackageFile(path, f):
+        for k, v in p:
+            data[k.lower()] = v
+        break # file contains only one record
+    _splitfield(data, "components")
+    _splitfield(data, "architectures")
+    _splithashes(path, data, "md5sum")
+    _splithashes(path, data, "sha1")
+    _splithashes(path, data, "sha256")
+    return data
+
+def unbzip2hash(src, dst):
+    dec = bz2.BZ2Decompressor()
+    digest = hashlib.sha256()
+    while True:
+        data = src.read(8192)
+        if data == '''':
+            break
+        data = dec.decompress(data)
+        dst.write(data)
+        digest.update(data)
+    return digest.hexdigest()
+
+def downloadbz2(url, target, expecteddigest):
+    try:
+        bz2src = urllib.urlopen(url)
+        try:
+            dgst = xpickle.replacefile(target,
+                                       lambda fname, f: unbzip2hash(bz2src, f))
+            if dgst == expecteddigest:
+                return True
+            return False
+        finally:
+            bz2src.close()
+    except IOError:
+        return False
+
+def downloadgz(url, target, expecteddigest):
+    with tempfile.NamedTemporaryFile() as t:
+        try:
+            (filename, headers) = urllib.urlretrieve(url, t.name)
+        except IOError:
+            return False
+        gfile = gzip.GzipFile(t.name)
+        try:
+            def copy(fname, f):
+                digest = hashlib.sha256()
+                while True:
+                    data = gfile.read(8192)
+                    if data == "":
+                        break
+                    f.write(data)
+                    digest.update(data)
+                if digest.hexdigest() == expecteddigest:
+                    return True
+                return False
+            return xpickle.replacefile(target, copy)
+        finally:
+            gfile.close()
+    return True
+
+class RepoCollection:
+    def __init__(self, root):
+        """Creates a new repository mirror.
+        
+        root: path in the local file system"""
+        self.root = root
+        self.repos = {}
+        self.used = ()
+        self.releases = None
+        self.verbose = False
+
+        if not os.path.exists(root):
+            os.makedirs(root)
+        l = os.listdir(root)
+        if len(l) == 0:
+            file(root + "/" + MARKER_NAME, "w").close()
+        elif MARKER_NAME not in l:
+            raise ValueError("not a Debian repository mirror directory:
"
+                             + repr(root))
+
+    def add(self, name, url):
+        """Adds a repository, given its name and the root
URL"""
+        if _re_name.match(name) is None:
+            raise ValueError("invalid repository name: " +
repr(name))
+        if name in self.repos:
+            raise ValueError("repository already registered: " +
repr(name))
+        if url[-1:] != ''/'':
+            url += ''/''
+        self.repos[name] = url
+
+    def update(self):
+        self._initused()
+        for (name, url) in self.repos.items():
+            if not self._updatelrelease(name):
+                continue
+            if not self.hasrelease(name):
+                continue
+            rel = self.release(name)
+            hashes = rel["sha256"]
+            for comp in rel["components"]:
+                for arch in rel["architectures"]:
+                    plainpath = self._plainpath(comp, arch)
+                    plainurl = url + plainpath
+                    if not plainpath in hashes:
+                        self.warn("not downloaded because uncompressed
version not present in Release file: " + plainurl)
+                        continue
+                    uncompressed_digest = hashes[plainpath]
+                    listname = self._listname(uncompressed_digest)
+                    if os.path.exists(listname):
+                        continue
+                    success = False
+                    for suffix, method in ((".bz2", downloadbz2),
+                                           (".gz", downloadgz)):
+                        if method(plainurl + suffix, listname,
+                                  uncompressed_digest):
+                            success = True
+                            break
+                    if not success:
+                        self.warn("download failed: " + plainurl)
+
+    def _updatelrelease(self, name):
+        url = self.repos[name]
+        relname = self._relname(name)
+        self._markused(relname)
+        try:
+            def download(fname, f):
+                urllib.urlretrieve(url + ''Release'', fname)
+            xpickle.replacefile(relname, download)
+            return True
+        except IOError:
+            self.warn("download of Release file failed: " + url)
+            return False
+
+    def hasrelease(self, name):
+        if name not in self.repos:
+            raise ValueError("name not registered: " + repr(name))
+        return os.path.exists(self._relname(name))
+
+    def release(self, name):
+        if name not in self.repos:
+            raise ValueError("name not registered: " + repr(name))
+        with file(self._relname(name)) as f:
+            return parserelease(name, f)
+
+    def filemap(self):
+        d = {}
+        for name in self.repos:
+            rel = self.release(name)
+            hashes = rel["sha256"]
+            l = []
+            for comp in rel["components"]:
+                for arch in rel["architectures"]:
+                    plainpath = self._plainpath(comp, arch)
+                    if not plainpath in hashes:
+                        self.warn("failed to find %s/%s/%s" % (name,
comp, arch))
+                        continue
+                    digest = hashes[plainpath]
+                    listname = self._listname(digest)
+                    if not os.path.exists(listname):
+                        self.warn("file %s for %s/%s/%s not present"
%
+                                  (listname, name, comp, arch))
+                        continue
+                    if arch == "source":
+                        method = parsers.sourcepackages
+                    else:
+                        method = parsers.binarypackages
+                    l.append((comp, arch, listname, method))
+            d[name] = l
+        return d
+
+    def _relname(self, name):
+        return "%s/r_%s" % (self.root, name)
+
+    def _plainpath(self, comp, arch):
+        # Hack to deal with the "updates/" special case.
+        comp = comp.split("/")[-1]
+        if arch == "source":
+            return comp + "/source/Sources"
+        return "%s/binary-%s/Packages" % (comp, arch)
+
+    def _listname(self, digest):
+        return "%s/h_%s" % (self.root, digest)
+
+    def _initused(self):
+        self.used = set()
+        self.used.add("%s/%s" % (self.root, MARKER_NAME))
+
+    def _markused(self, name):
+        self.used.add(name)
+        self.used.add(name + xpickle.EXTENSION)
+
+    def _haslist(self, digest):
+        return os.path.exists(self._listname(digest))
+
+    def warn(self, msg):
+        if self.verbose:
+            print msg
+
+def _test():
+    import shutil
+
+    tmp = tempfile.mkdtemp()
+    try:
+        r = RepoCollection(tmp)
+        r.verbose = True
+        mirror = "http://localhost:9999/"
+        r.add("lenny", mirror + "debian/dists/lenny")
+        r.add("lenny-security", mirror +
"debian-security/dists/lenny/updates")
+        r.add("lenny-proposed-updates", mirror +
"debian/dists/lenny-proposed-updates")
+        r.add("squeeze", mirror + "debian/dists/squeeze")
+        r.add("sid", mirror + "debian/dists/sid")
+        r.update()
+    finally:
+        shutil.rmtree(tmp)
+if __name__ == "__main__":
+    _test()
Secure testing commits - May 2010 - r14616 - lib/python

[Secure-testing-commits] r14616 - lib/python