Author: fw Date: 2005-09-12 20:08:46 +0000 (Mon, 12 Sep 2005) New Revision: 1939 Added: bin/apt-update-file Modified: lib/python/debian_support.py Log: lib/python/debian_support.py: Add support for downloading package file diffs. bin/apt-update-file: Driver script for the new functionality. (I will use this functionality to implement package database replication. The goal is to keep a local copy of all the interesting data, so that we no longer need to consult madison etc.) Added: bin/apt-update-file ==================================================================--- bin/apt-update-file 2005-09-12 18:53:42 UTC (rev 1938) +++ bin/apt-update-file 2005-09-12 20:08:46 UTC (rev 1939) @@ -0,0 +1,30 @@ +#!/usr/bin/python + +# This script is mainly used to demo the updateFile function. + +import os +import os.path +import string +import sys + +def setup_paths(): + check_file = ''lib/python/debian_support.py'' + path = os.getcwd() + while 1: + if os.path.exists("%s/%s" % (path, check_file)): + sys.path = [path + ''/lib/python''] + sys.path + return path + idx = string.rfind(path, ''/'') + if idx == -1: + raise ImportError, "could not setup paths" + path = path[0:idx] +root_path = setup_paths() + +import bugs +import debian_support + +if len(sys.argv) <> 3: + sys.stderr.write("usage: apt-update-file REMOTE LOCAL\n") + sys.exit(1) + +debian_support.updateFile(sys.argv[1], sys.argv[2], verbose=True) Property changes on: bin/apt-update-file ___________________________________________________________________ Name: svn:executable + * Modified: lib/python/debian_support.py ==================================================================--- lib/python/debian_support.py 2005-09-12 18:53:42 UTC (rev 1938) +++ lib/python/debian_support.py 2005-09-12 20:08:46 UTC (rev 1939) @@ -17,7 +17,9 @@ """This module implements facilities to deal with Debian-specific metadata.""" +import os import re +import sha import types class ParseError(Exception): @@ -103,7 +105,7 @@ Objects of this class can be used to read Debian''s Source and Packages files.""" - re_field = re.compile(r''^([A-Za-z][A-Za-z0-9-]+):\s+(.*?)\s*$'') + re_field = re.compile(r''^([A-Za-z][A-Za-z0-9-]+):(?:\s+(.*?))?\s*$'') re_continuation = re.compile(r''^\s+(?:\.|(\S.*?)\s*)$'') def __init__(self, name, fileObj=None): @@ -137,6 +139,7 @@ if not match: self.raiseSyntaxError("expected package field") (name, contents) = match.groups() + contents = contents or '''' while True: line = self.file.readline() @@ -150,6 +153,8 @@ else: break pkg.append((name, contents)) + if pkg: + yield pkg def raiseSyntaxError(self, msg, lineno=None): if lineno is None: @@ -186,6 +191,188 @@ return None del listReleases +def readLinesSHA1(lines): + m = sha.new() + for l in lines: + m.update(l) + return m.hexdigest() + +def patchesFromEdScript(source, + re_cmd=re.compile(r''^(\d+)(?:,(\d+))?([acd])$'')): + """Converts source to a stream of patches. + + Patches are triples of line indexes: + + - first line to be replaced + - one past the last line being replaces + - list of line replacements + + This is enough to model arbitrary additions, deletions and + replacements. + """ + + i = iter(source) + + for line in i: + match = re_cmd.match(line) + if match is None: + raise ValueError, "invalid patch command: " + `line` + + (first, last, cmd) = match.groups() + first = int(first) + if last is not None: + last = int(last) + + if cmd == ''d'': + first = first - 1 + if last is None: + last = first + 1 + yield (first, last, []) + continue + + if cmd == ''a'': + if last is not None: + raise ValueError, "invalid patch argument: " + `line` + last = first + else: # cmd == c + first = first - 1 + if last is None: + last = first + 1 + + lines = [] + for l in i: + if l == '''': + raise ValueError, "end of stream in command: " + `line` + if l == ''.\n'' or l == ''.'': + break + lines.append(l) + yield (first, last, lines) + +def patchLines(lines, patches): + """Applies patches to lines. Updates lines in place.""" + for (first, last, args) in patches: + lines[first:last] = args + +def replaceFile(lines, local): + new_file = file(local + ''.new'', ''w+'') + for l in lines: + new_file.write(l) + new_file.close() + os.rename(local + ''.new'', local) + +def downloadGunzipLines(remote): + """Downloads a file from a remote location and gunzips it. + + Returns the lines in the file.""" + + # The implementation is rather crude, but it seems that the gzip + # module needs a real file for input. + + import gzip + import tempfile + import urllib + + (handle, fname) = tempfile.mkstemp() + try: + os.close(handle) + (filename, headers) = urllib.urlretrieve(remote, fname) + gfile = gzip.GzipFile(filename) + lines = gfile.readlines() + gfile.close() + finally: + os.unlink(fname) + return lines + +def downloadFile(remote, local): + """Copies a gzipped remote file to the local system. + + remote - URL, without the .gz suffix + local - name of the local file + """ + + lines = downloadGunzipLines(remote + ''.gz'') + replaceFile(lines, local) + return lines + +def updateFile(remote, local, verbose=None): + """Updates the local file by downloading a remote patch. + + Returns a list of lines in the local file. + """ + + try: + local_file = file(local) + except OSError: + return downloadFile(remote, local) + + lines = local_file.readlines() + local_file.close() + local_hash = readLinesSHA1(lines) + patches_to_apply = [] + patch_hashes = {} + + import urllib + index_name = remote + ''.diff/Index'' + + re_whitespace=re.compile(''\s+'') + + for fields in PackageFile(index_name, urllib.urlopen(index_name)): + for (field, value) in fields: + if field == ''SHA1-Current'': + (remote_hash, remote_size) = value.split('' '') + if local_hash == remote_hash: + if verbose: + print "updateFile: local file is up-to-date" + return lines + continue + + if field ==''SHA1-History'': + for entry in value.splitlines(): + if entry == '''': + continue + (hist_hash, hist_size, patch_name) \ + = re_whitespace.split(entry) + + # After the first patch, we have to apply all + # remaining patches. + if patches_to_apply or hist_hash == local_hash: + patches_to_apply.append(patch_name) + + continue + + if field == ''SHA1-Patches'': + for entry in value.splitlines(): + if entry == '''': + continue + (patch_hash, patch_size, patch_name) \ + = re_whitespace.split(entry) + patch_hashes[patch_name] = patch_hash + continue + + if verbose: + print "updateFile: field %s ignored" % `field` + + if not patches_to_apply: + if verbose: + print "updateFile: could not find historic entry", local_hash + return downloadFile(remote, local) + + for patch_name in patches_to_apply: + print "updateFile: downloading patch " + `patch_name` + patch_contents = downloadGunzipLines(remote + ''.diff/'' + patch_name + + ''.gz'') + if readLinesSHA1(patch_contents ) <> patch_hashes[patch_name]: + raise ValueError, "patch %s was garbled" % `patch_name` + patchLines(lines, patchesFromEdScript(patch_contents)) + + new_hash = readLinesSHA1(lines) + if new_hash <> remote_hash: + raise ValueError, ("patch failed, got %s instead of %s" + % (new_hash, remote_hash)) + + replaceFile(lines, local) + return lines + def test(): # Version assert Version(''0'') < Version(''a'') @@ -210,5 +397,20 @@ # for p in PackageFile(''../../data/packages/sarge/Packages.i386''): # assert p[0][0] == ''Package'' + # Helper routines + assert readLinesSHA1([]) == ''da39a3ee5e6b4b0d3255bfef95601890afd80709'' + assert readLinesSHA1([''1\n'', ''23\n'']) \ + == ''14293c9bd646a15dc656eaf8fba95124020dfada'' + + file_a = map(lambda x: "%d\n" % x, range(1, 18)) + file_b = [''0\n'', ''1\n'', ''<2>\n'', ''<3>\n'', ''4\n'', ''5\n'', ''7\n'', ''8\n'', + ''11\n'', ''12\n'', ''<13>\n'', ''14\n'', ''15\n'', ''A\n'', ''B\n'', ''C\n'', + ''16\n'', ''17\n'',] + patch = [''15a\n'', ''A\n'', ''B\n'', ''C\n'', ''.\n'', ''13c\n'', ''<13>\n'', ''.\n'', + ''9,10d\n'', ''6d\n'', ''2,3c\n'', ''<2>\n'', ''<3>\n'', ''.\n'', ''0a\n'', + ''0\n'', ''.\n''] + patchLines(file_a, patchesFromEdScript(patch)) + assert ''''.join(file_b) == ''''.join(file_a) + if __name__ == "__main__": test()