Skip to content
Snippets Groups Projects
clean-betas.py 3.66 KiB
#!/usr/bin/env python

# Cleans-up old beta-conda-packages to avoid storing unused packages

import os
import re
import sys

from distutils.version import StrictVersion


package_regex = re.compile(r'(P<name>\S+)-(P<version>\d+\.\d+\.\d+\.[abc]\d+)-(P<hash>(py\d\d)?h\d{7})_(P<build>\d+)(P<extension>\.tar\.bz2)')


def check_regex():
  """Tests for the above regex"""


  def _check(s, name, version, _hash, build, ext):
    m = package_regex.match(s)
    assert m is not None
    assert m.group('name') == name
    assert m.group('version') == version
    assert m.group('hash') == _hash
    assert m.group('build') == build
    assert m.group('extension') == ext


  # This regexp must match the following examples:
  _check('docs-2018.02.21b0-h7a51f39_0.tar.bz2', 'docs', '2018.02.21b0',
      'h7a51f36', '0', '.tar.bz2')

  _check('docs-with-dashes-2018.02.21b0-h7a51f39_0.tar.bz2',
      'docs-with-dashes', '2018.02.21b0', 'h7a51f36', '0', '.tar.bz2')

  _check('bob.sp-2.0.11b0-py27h21b2d43_7.tar.bz2', 'bob.sp', '2.0.11b0',
      'py27h21b2d43', '7', '.tar.bz2')

  _check('gridtk-1.5.1b0-py27h361992c_4.tar.bz2', 'gridtk', '1.5.1b0',
      'py27h361992c', '4', '.tar.bz2')

  _check('bob.measure-2.5.0b0-py36h81a6768_11.tar.bz2', 'bob.measure',
    '2.5.0b0', 'py36h81a6768', '11', '.tar.bz2')

  _check('bob.ip.caffe_extractor-1.1.2b0-py27hc65a447_0.tar.bz2',
      'bob.ip.caffe_extractor', '1.1.2b0', 'py27hc65a447', '0', 'tar.bz2')

  _check('bob.db.msu_mfsd_mod-2.2.4b0-py27h2410e3f_2.tar.bz2',
      'bob.db.msu_mfsd_mod', '2.2.4b0', 'py27h2410e3f', '2', '.tar.bz2')

  _check('bob-3.0.1b0-py27h2dcd9c5_5.tar.bz2', 'bob', '3.0.1b0',
      'py27h2dcd9c5', '5', '.tar.bz2')

  # This regexp must **not** match the following examples
  assert package_regex.match('zc.buildout-2.10.0-py27_0.tar.bz2') is None
  assert package_regex.match('speexdsp-1.2rc3-h5bbff6d_0.tar.bz2') is None
  assert package_regex.match('pkgtools-0.7.3-py27_0.tar.bz2') is None
  assert package_regex.match('opencv-3.1.0-np111py27_4.tar.bz2') is None
  assert package_regex.match('keras-gpu-2.0.8-py27_0.tar.bz2') is None
  assert package_regex.match('bob-3.0.0-np113py36_0.tar.bz2') is None
  assert package_regex.match('bob.xyz.bla-2.0.12-np113py27_0.tar.bz2') is None


def main(scandir, dry_run):

  betas = dict()

  for (path, dirnames, filenames) in os.walk(sys.argv[1], followlinks=False):

    for f in filenames:
      if f.startswith('repodata.json'): continue
      m = package_regex.match(f)
      if m is None:
        print('ignoring `%s\' (does not match)' % os.path.join(path, f))
        continue

      # got a beta package since it matches our regex, insert it into our
      # list of packages to evaluate
      betas.setdefault(m.group('name'), []).append((
        StrictVersion(m.group('version')),
        int(m.group('build')),
        os.path.join(path, f),
        ))

  count = sum([len(k) for k in betas.values]) - len(betas)
  print('end of scan: prunning %d packages' % count)

  for name in sorted(betas.keys()):
    print('packages for %s (%d):' % (name, len(betas[name])))
    sorted_packages = sorted(betas[name])
    for version, build, path in sorted_packages[:-1]:
      print('unlink %s' % path)
      if not dry_run: os.unlink(path)
    print('keeping %s' % sorted_packages[-1])


if __name__ == '__main__':

  if len(sys.argv) not in (2,3) or sys.argv[1] in ('-h', '--help'):
    print ("usage: %s <dir> [run]" % sys.argv[0])
    print ("When no command line argument is specified, this script is executed in dry-run mode (i.e., it does not actually delete files). Add option 'run' to actually delete the files.")
    sys.exit(1)

  #check_regex()
  main(sys.argv[1], len(sys.argv) != 3 or sys.argv[2] != 'run')