#!/usr/bin/env python # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # This is utility for checking Debian/Ubuntu APT repository # Written by Alexander Sashnov # E-mail: sashnov@ngs.ru # JID: asashnov@jabber.ru import os import os.path import string import re import urllib import gzip import sys import md5 import optparse import urllib # for Python 2.3 # in Python 2.4 set is built in type so this can be removed import sets parser = optparse.OptionParser(usage="usage: %prog [options] directory") parser.add_option("-d", "--dists", action="store", type="string", dest="dist", default="gutsy", help="Distribution (feisty, feisty-security, etch, etc.") parser.add_option("-s", "--sections", action="store", type="string", dest="sect", default="main,restricted,universe,multiverse", help="Comma separated sections list") parser.add_option("-a", "--arch", action="store", type="string", dest="arch", default="binary-i386", help="Architecture") parser.add_option("-f", "--fix-from", action="store", type="string", dest="fromurl", help="Try to fix your repository from this mirror.") parser.add_option("-n", "--no-md5sums", action="store_false", dest="checkmd", default=True, help="don't check md5sums") parser.add_option("-r", "--random-check-order", action="store_false", dest="random", default=False, help="Check in random order (usefull for md5sum check on DVD") parser.add_option("-e", "--extra-files", action="store_false", dest="extra", default=False, help="Find extra files in pool directory") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") # TODO: add text to --help: # Examples: -f http://debian.nsu.ru/ubuntu -f ftp://ftp.lug.ro/ubuntu -f file:/mnt/drive/ubuntu (opts, args) = parser.parse_args() if len(args) != 1: parser.error("incorrect number of arguments") base = args[0] arch = opts.arch sections = opts.sect.split(',') dists = opts.dist.split(',') # TODO: check base dir presence files_on_server = sets.Set() for dist in dists: for sect in sections: packages_file = base + "/dists/" + dist + '/' + sect + "/" + arch + "/Packages.gz" re_file = re.compile(r'Filename: (.+)') re_size = re.compile(r'Size: (.+)') re_md5 = re.compile(r'MD5sum: (.+)') if opts.verbose: print "Parse", packages_file fp = gzip.open(packages_file, "r") for line in fp: match = re_file.match(line) if not match == None: fn = match.group(1) continue match = re_size.match(line) if not match == None: sz = match.group(1) continue; match = re_md5.match(line) if not match == None: md = match.group(1) t = fn, int(sz), md files_on_server.add(t) fp.close() def file_md5(fname): '''Returns an md5 hash for an file object with read() method.''' m = md5.new() f = open(fname, 'rb') while True: d = f.read(8096) if not d: break m.update(d) f.close() return m.hexdigest() class Progress: "Print progress in persents" def __init__(self, _step_n): self.step_pers = 5 # 5 persents self.step_n = _step_n * self.step_pers / 100 self.n=0 self.m=0 def step(self): self.n=self.n+1 if self.n > self.m * self.step_n: print "Progress", self.m * self.step_pers, "%" self.m = self.m + 1 if opts.verbose: print "Start checking..." errors=0 bytes_total=0 bytes_broken=0 bytes_fixed = 0 pr = Progress(len(files_on_server)) for f in files_on_server: pr.step() fn = base +'/' + f[0] bytes_total=bytes_total+f[1] broken=False if not os.path.exists(fn): print "File not exists:", fn broken=True elif os.path.getsize(fn) != f[1]: print "Sizes missmatch %s: must be %d but it %d" % (fn, f[1], os.path.getsize(fn)) broken=True elif opts.checkmd and file_md5(fn) != f[2]: print "MD5 missmatch %s: must be '%s'" % (fn, f[2]) broken=True if broken and opts.fromurl != None: dir=os.path.dirname(fn) if not os.path.exists(dir): os.makedirs(dir) u = opts.fromurl + '/' + f[0] print "Download from ", u urllib.urlretrieve(u, fn) if file_md5(fn) == f[2]: print "After download md5sum is OK" bytes_fixed = bytes_fixed + f[1] broken=False else: print "Still broken after download" if broken: errors=errors+1 bytes_broken=bytes_broken+f[1] if opts.verbose: print "There is %d total and %d broken packages in repository." % (len(files_on_server), errors) print "Total packages size %.1fMb." % (bytes_total/1024/1024) if opts.verbose and errors > 0: print "Broken packages size %.1fMb." % (bytes_broken/1024/1024) if opts.fromurl != None: print "%.1fMb was fixed from %s" % (bytes_fixed/1024/1024, opts.fromurl) if opts.fromurl == None and errors > 0: print "Hint: use '--fix-from' for fix repository from other mirror" if errors > 0: sys.exit(1) if opts.verbose: print "Your repository seems OK" if opts.extra: files_in_pool = sets.Set() # TODO: find -type f, compare two sets and print difference