Commit 680c9f0b authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Merge branch 'repodata-patches' into 'master'

Implement hot-fix to repo indexing

Closes #56 and #53

See merge request !164
parents 1e52af87 9a780d46
Pipeline #41583 passed with stages
in 19 minutes and 47 seconds
......@@ -104,6 +104,9 @@ def base_deploy(dry_run):
dry_run=dry_run,
)
logger.info("Removing %s after successful deployment", k)
os.unlink(k)
@ci.command(
epilog="""
......
repodata-patches
python-gitlab
untokenize
docformatter
BSD 3-clause license
Copyright (c) 2020, Idiap research institute
Copyright (c) 2015-2019, conda-forge
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import json
import os
import sys
from collections import defaultdict
from os.path import isdir
from os.path import join
import requests
import tqdm
from packaging.version import parse as parse_version
BASE_URL = "http://www.idiap.ch/software/bob/conda"
SUBDIRS = (
"noarch",
"linux-64",
"osx-64",
)
REMOVALS = {"osx-64": {}, "linux-64": {}, "noarch": {}}
OPERATORS = ["==", ">=", "<=", ">", "<", "!="]
def _gen_patch_instructions(index, new_index, packages_key):
instructions = {
"patch_instructions_version": 1,
packages_key: defaultdict(dict),
"revoke": [],
"remove": [],
}
# diff all items in the index and put any differences in the instructions
for fn in index:
assert fn in new_index
# replace any old keys
for key in index[fn]:
assert key in new_index[fn], (key, index[fn], new_index[fn])
if index[fn][key] != new_index[fn][key]:
instructions[packages_key][fn][key] = new_index[fn][key]
# add any new keys
for key in new_index[fn]:
if key not in index[fn]:
instructions[packages_key][fn][key] = new_index[fn][key]
return instructions
def _gen_new_index(repodata, packages_key):
"""Make any changes to the index by adjusting the values directly.
This function returns the new index with the adjustments.
Finally, the new and old indices are then diff'ed to produce the repo
data patches.
"""
index = copy.deepcopy(repodata[packages_key])
for fn, record in index.items():
record_name = record["name"]
# bob.bio.base <=4.1.0 does not work with numpy >=1.18
if record_name == "bob.bio.base":
if parse_version(record["version"]) <= parse_version("4.1.0"):
record["depends"].append("numpy <1.18")
return index
def gen_new_index_and_patch_instructions(repodata):
instructions = {}
for i, packages_key in enumerate(["packages", "packages.conda"]):
new_index = _gen_new_index(repodata, packages_key)
inst = _gen_patch_instructions(repodata[packages_key], new_index, packages_key)
if i == 0:
instructions.update(inst)
else:
instructions[packages_key] = inst[packages_key]
instructions["revoke"] = list(
set(instructions["revoke"]) | set(inst["revoke"])
)
instructions["remove"] = list(
set(instructions["remove"]) | set(inst["remove"])
)
return instructions
def main():
# Step 1. Collect initial repodata for all subdirs.
repodatas = {}
subdirs = SUBDIRS
for subdir in tqdm.tqdm(subdirs, desc="Downloading repodata"):
repodata_url = "/".join((BASE_URL, subdir, "repodata_from_packages.json"))
response = requests.get(repodata_url)
response.raise_for_status()
repodatas[subdir] = response.json()
# Step 2. Create all patch instructions.
prefix_dir = os.getenv("PREFIX", "tmp")
for subdir in subdirs:
prefix_subdir = join(prefix_dir, subdir)
if not isdir(prefix_subdir):
os.makedirs(prefix_subdir)
# Step 2a. Generate a new index.
# Step 2b. Generate the instructions by diff'ing the indices.
instructions = gen_new_index_and_patch_instructions(repodatas[subdir])
# Step 2c. Output this to $PREFIX so that we bundle the JSON files.
patch_instructions_path = join(prefix_subdir, "patch_instructions.json")
with open(patch_instructions_path, "w") as fh:
json.dump(
instructions, fh, indent=2, sort_keys=True, separators=(",", ": ")
)
if __name__ == "__main__":
sys.exit(main())
{% set version = datetime.datetime.utcnow().strftime('%Y%m%d.%H.%M.%S') %}
package:
name: repodata-patches
version: {{ version }}
source:
path: .
build:
noarch: generic
number: 0
script:
- python gen_patch_json.py
requirements:
build:
- python 3.*
- packaging
- requests
- tqdm
test:
files:
- test_gen_patch_json.py
- gen_patch_json.py
- get_license_family.py
requires:
- python 3.*
- packaging
- requests
- tqdm
- pytest
commands:
{% for subdir in ("noarch", "linux-64", "osx-64") %}
- test -e $PREFIX/{{ subdir }}/patch_instructions.json
{% endfor %}
- pytest -vv test_gen_patch_json.py
about:
summary: generate tweaks to index metadata, hosted separately from anaconda.org index
home: https://github.com/conda-forge/conda-forge-repodata-patches-feedstock
license: CC-PDDC
extra:
recipe-maintainers:
- amohammadi
Forked from: https://github.com/conda-forge/conda-forge-repodata-patches-feedstock
This scheme generates one file per subdir, ``patch_instructions.json``. This file has entries
```
instructions = {
"patch_instructions_version": 1,
"packages": defaultdict(dict),
"revoke": [],
"remove": [],
}
```
``remove`` are lists of filenames that will not show up in the index but may still be downloadable with a direct URL to the file.
``packages`` is a dictionary, where keys are package filenames. Values are dictionaries similar to the contents of each package in repodata.json. Any values in provided in ``packages`` here overwrite the values in repodata.json. Any value set to None is removed.
A tool downloads this package when it sees updates to it, and applies the ``patch_instructions.json``
to the repodata of the channel
The ``show_diff.py`` script in this directory can be used to test out
modifications to ``gen_patch_json.py``. This scripts shows the difference
between the package records currently available on anaconda.org/conda-forge and those
produced from the patch instructions produced by ``gen_patch_json.py``.
Usage is:
```
usage: show_diff.py [-h] [--subdirs [SUBDIRS [SUBDIRS ...]]] [--use-cache]
show repodata changes from the current gen_patch_json
optional arguments:
-h, --help show this help message and exit
--subdirs [SUBDIRS [SUBDIRS ...]]
subdir(s) show, default is all
--use-cache use cached repodata files, rather than downloading
them
```
Repodata is cached in a ``cache`` directory in the current directory or in the
path specified by the ``CACHE_DIR`` environment variable.
Typically ``show_diff.py`` is run without any argument to download the
necessary repodata followed by repeated calls to ``show_diff.py --use-cache``
to test out changes to the ``gen_patch_json.py`` script.
#!/usr/bin/env python
import bz2
import difflib
import json
import os
import urllib
from conda_build.index import _apply_instructions
from gen_patch_json import BASE_URL
from gen_patch_json import SUBDIRS
from gen_patch_json import gen_new_index_and_patch_instructions
CACHE_DIR = os.environ.get(
"CACHE_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "cache")
)
def show_record_diffs(subdir, ref_repodata, new_repodata):
for packages_key in ("packages", "packages.conda"):
for name, ref_pkg in ref_repodata[packages_key].items():
new_pkg = new_repodata[packages_key][name]
# license_family gets added for new packages, ignore it in the diff
ref_pkg.pop("license_family", None)
new_pkg.pop("license_family", None)
if ref_pkg == new_pkg:
continue
print(f"{subdir}::{name}")
ref_lines = json.dumps(ref_pkg, indent=2).splitlines()
new_lines = json.dumps(new_pkg, indent=2).splitlines()
for ln in difflib.unified_diff(ref_lines, new_lines, n=0, lineterm=""):
if ln.startswith("+++") or ln.startswith("---") or ln.startswith("@@"):
continue
print(ln)
def do_subdir(subdir, raw_repodata_path, ref_repodata_path):
with bz2.open(raw_repodata_path) as fh:
raw_repodata = json.load(fh)
with bz2.open(ref_repodata_path) as fh:
ref_repodata = json.load(fh)
instructions = gen_new_index_and_patch_instructions(raw_repodata)
new_repodata = _apply_instructions(subdir, raw_repodata, instructions)
show_record_diffs(subdir, ref_repodata, new_repodata)
def download_subdir(subdir, raw_repodata_path, ref_repodata_path):
raw_url = f"{BASE_URL}/{subdir}/repodata_from_packages.json.bz2"
print("Downloading:", raw_url)
urllib.request.urlretrieve(raw_url, raw_repodata_path)
ref_url = f"{BASE_URL}/{subdir}/repodata.json.bz2"
print("Downloading:", ref_url)
urllib.request.urlretrieve(ref_url, ref_repodata_path)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="show repodata changes from the current gen_patch_json"
)
parser.add_argument(
"--subdirs", nargs="*", default=None, help="subdir(s) show, default is all"
)
parser.add_argument(
"--use-cache",
action="store_true",
help="use cached repodata files, rather than downloading them",
)
args = parser.parse_args()
if args.subdirs is None:
subdirs = SUBDIRS
else:
subdirs = args.subdirs
for subdir in subdirs:
subdir_dir = os.path.join(CACHE_DIR, subdir)
if not os.path.exists(subdir_dir):
os.makedirs(subdir_dir)
raw_repodata_path = os.path.join(subdir_dir, "repodata_from_packages.json.bz2")
ref_repodata_path = os.path.join(subdir_dir, "repodata.json.bz2")
if not args.use_cache:
download_subdir(subdir, raw_repodata_path, ref_repodata_path)
do_subdir(subdir, raw_repodata_path, ref_repodata_path)
from gen_patch_json import REMOVALS
from gen_patch_json import _gen_patch_instructions
def test_gen_patch_instructions():
index = {
"a": {"depends": ["c", "d"], "features": "d"},
"b": {"nane": "blah"},
"c": {},
}
new_index = {
"a": {"depends": ["c", "d", "e"], "features": None},
"b": {"nane": "blah"},
"c": {"addthis": "yes"},
}
inst = _gen_patch_instructions(index, new_index, "packages")
assert inst["patch_instructions_version"] == 1
assert "revoke" in inst
assert "remove" in inst
assert "packages" in inst
assert inst["packages"] == {
"a": {"depends": ["c", "d", "e"], "features": None},
"c": {"addthis": "yes"},
}
assert set(REMOVALS["osx-64"]) <= set(inst["remove"])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment