Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.devtools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bob
bob.devtools
Commits
3a065d15
Commit
3a065d15
authored
5 years ago
by
André Anjos
Browse files
Options
Downloads
Patches
Plain Diff
[mirror] Implement patch and whitelist support
parent
82c5897d
No related branches found
No related tags found
No related merge requests found
Pipeline
#34665
passed
5 years ago
Stage: build
Stage: deploy
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
bob/devtools/mirror.py
+61
-7
61 additions, 7 deletions
bob/devtools/mirror.py
bob/devtools/scripts/mirror.py
+35
-0
35 additions, 0 deletions
bob/devtools/scripts/mirror.py
with
96 additions
and
7 deletions
bob/devtools/mirror.py
+
61
−
7
View file @
3a065d15
...
@@ -48,8 +48,8 @@ def _download(url, target_directory):
...
@@ -48,8 +48,8 @@ def _download(url, target_directory):
download_filename
=
os
.
path
.
join
(
target_directory
,
target_filename
)
download_filename
=
os
.
path
.
join
(
target_directory
,
target_filename
)
with
open
(
download_filename
,
'
w+b
'
)
as
tf
:
with
open
(
download_filename
,
'
w+b
'
)
as
tf
:
ret
=
requests
.
get
(
url
,
stream
=
True
)
ret
=
requests
.
get
(
url
,
stream
=
True
)
logger
.
debug
(
'
Saving to %s (%s bytes)
'
,
download_filename
,
size
=
ret
.
headers
.
get
(
'
Content-length
'
,
'
??
'
)
ret
.
headers
[
'
Content-length
'
]
)
logger
.
debug
(
'
Saving to %s (%s bytes)
'
,
download_filename
,
size
)
for
data
in
ret
.
iter_content
(
chunk_size
):
for
data
in
ret
.
iter_content
(
chunk_size
):
tf
.
write
(
data
)
tf
.
write
(
data
)
file_size
=
os
.
path
.
getsize
(
download_filename
)
file_size
=
os
.
path
.
getsize
(
download_filename
)
...
@@ -96,16 +96,17 @@ def get_json(channel, platform, name):
...
@@ -96,16 +96,17 @@ def get_json(channel, platform, name):
url
=
channel
+
'
/
'
+
platform
+
'
/
'
+
name
url
=
channel
+
'
/
'
+
platform
+
'
/
'
+
name
logger
.
debug
(
'
[checking] %s...
'
,
url
)
logger
.
debug
(
'
[checking] %s...
'
,
url
)
r
=
requests
.
get
(
url
,
allow_redirects
=
True
,
stream
=
True
)
r
=
requests
.
get
(
url
,
allow_redirects
=
True
,
stream
=
True
)
logger
.
info
(
'
[download] %s (%s bytes)...
'
,
url
,
r
.
headers
[
'
Content-length
'
])
size
=
r
.
headers
.
get
(
'
Content-length
'
,
'
??
'
)
logger
.
info
(
'
[download] %s (%s bytes)...
'
,
url
,
size
)
if
name
.
endswith
(
'
.bz2
'
):
if
name
.
endswith
(
'
.bz2
'
):
# just in case transport encoding was applied
# just in case transport encoding was applied
r
.
raw
.
decode_content
=
True
r
.
raw
.
decode_content
=
True
data
=
bz2
.
decompress
(
r
.
raw
.
read
())
data
=
bz2
.
decompress
(
r
.
raw
.
read
())
else
:
return
json
.
loads
(
data
)
data
=
r
.
read
()
return
json
.
loads
(
data
)
# else, just decodes the response
return
r
.
json
()
def
get_local_contents
(
path
,
arch
):
def
get_local_contents
(
path
,
arch
):
...
@@ -141,6 +142,15 @@ def blacklist_filter(packages, globs):
...
@@ -141,6 +142,15 @@ def blacklist_filter(packages, globs):
return
packages
-
to_remove
return
packages
-
to_remove
def
whitelist_filter
(
packages
,
globs
):
"""
Filters **in** the input package set with the glob list
"""
to_keep
=
set
()
for
k
in
globs
:
to_keep
|=
set
(
fnmatch
.
filter
(
packages
,
k
))
return
to_keep
def
download_packages
(
packages
,
repodata
,
channel_url
,
dest_dir
,
arch
,
dry_run
):
def
download_packages
(
packages
,
repodata
,
channel_url
,
dest_dir
,
arch
,
dry_run
):
"""
Downloads remote packages to a download directory
"""
Downloads remote packages to a download directory
...
@@ -215,8 +225,9 @@ def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
...
@@ -215,8 +225,9 @@ def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
if
not
dry_run
:
if
not
dry_run
:
logger
.
debug
(
'
[checking: %d/%d] %s
'
,
k
,
total
,
url
)
logger
.
debug
(
'
[checking: %d/%d] %s
'
,
k
,
total
,
url
)
r
=
requests
.
get
(
url
,
stream
=
True
,
allow_redirects
=
True
)
r
=
requests
.
get
(
url
,
stream
=
True
,
allow_redirects
=
True
)
size
=
r
.
headers
.
get
(
'
Content-length
'
,
'
??
'
)
logger
.
info
(
'
[download: %d/%d] %s -> %s (%s bytes)
'
,
k
,
logger
.
info
(
'
[download: %d/%d] %s -> %s (%s bytes)
'
,
k
,
total
,
url
,
temp_dest
,
r
.
headers
[
'
Content-length
'
]
)
total
,
url
,
temp_dest
,
size
)
open
(
temp_dest
,
'
wb
'
).
write
(
r
.
raw
.
read
())
open
(
temp_dest
,
'
wb
'
).
write
(
r
.
raw
.
read
())
# verify that checksum matches
# verify that checksum matches
...
@@ -279,3 +290,46 @@ def remove_packages(packages, dest_dir, arch, dry_run):
...
@@ -279,3 +290,46 @@ def remove_packages(packages, dest_dir, arch, dry_run):
logger
.
info
(
'
[remove: %d/%d] %s
'
,
k
,
total
,
path
)
logger
.
info
(
'
[remove: %d/%d] %s
'
,
k
,
total
,
path
)
if
not
dry_run
:
if
not
dry_run
:
os
.
unlink
(
path
)
os
.
unlink
(
path
)
def
_cleanup_json
(
data
,
packages
):
"""
Cleans-up the contents of conda JSON looking at existing packages
"""
# only keys to clean-up here, othere keys remain unchanged
for
key
in
(
'
packages
'
,
'
packages.conda
'
):
if
key
not
in
data
:
continue
data
[
key
]
=
dict
((
k
,
v
)
for
k
,
v
in
data
[
key
].
items
()
if
k
in
packages
)
return
data
def
_save_json
(
data
,
dest_dir
,
arch
,
name
):
"""
Saves contents of conda JSON
"""
destfile
=
os
.
path
.
join
(
dest_dir
,
arch
,
name
)
with
open
(
destfile
,
'
w
'
)
as
outfile
:
json
.
dump
(
data
,
outfile
,
ensure_ascii
=
True
,
indent
=
2
)
return
destfile
def
copy_and_clean_json
(
url
,
dest_dir
,
arch
,
name
):
"""
Copies and cleans conda JSON file
"""
data
=
get_json
(
url
,
arch
,
name
)
packages
=
get_local_contents
(
dest_dir
,
arch
)
data
=
_cleanup_json
(
data
,
packages
)
return
_save_json
(
data
,
dest_dir
,
arch
,
name
)
def
copy_and_clean_patch
(
url
,
dest_dir
,
arch
,
name
):
"""
Copies and cleans conda JSON file
"""
data
=
get_json
(
url
,
arch
,
name
)
packages
=
get_local_contents
(
dest_dir
,
arch
)
data
=
_cleanup_json
(
data
,
packages
)
# cleanup specific patch_instructions.json fields
for
key
in
[
"
remove
"
,
"
revoke
"
]:
data
[
key
]
=
[
k
for
k
in
data
[
key
]
if
k
in
packages
]
return
_save_json
(
data
,
dest_dir
,
arch
,
name
)
This diff is collapsed.
Click to expand it.
bob/devtools/scripts/mirror.py
+
35
−
0
View file @
3a065d15
...
@@ -14,8 +14,10 @@ from ..mirror import (
...
@@ -14,8 +14,10 @@ from ..mirror import (
get_local_contents
,
get_local_contents
,
load_glob_list
,
load_glob_list
,
blacklist_filter
,
blacklist_filter
,
whitelist_filter
,
download_packages
,
download_packages
,
remove_packages
,
remove_packages
,
copy_and_clean_patch
,
)
)
from
..log
import
verbosity_option
,
get_logger
,
echo_info
,
echo_warning
from
..log
import
verbosity_option
,
get_logger
,
echo_info
,
echo_warning
...
@@ -51,6 +53,15 @@ Examples:
...
@@ -51,6 +53,15 @@ Examples:
help
=
"
A file containing a list of globs to exclude from local
"
\
help
=
"
A file containing a list of globs to exclude from local
"
\
"
mirroring, one per line
"
,
"
mirroring, one per line
"
,
)
)
@click.option
(
"
-w
"
,
"
--whitelist
"
,
type
=
click
.
Path
(
exists
=
True
,
dir_okay
=
False
,
file_okay
=
True
,
readable
=
True
,
resolve_path
=
True
),
help
=
"
A file containing a list of globs to include at local
"
\
"
mirroring, one per line. This is considered *after*
"
\
"
the blacklisting. It is here just for testing purposes
"
,
)
@click.option
(
@click.option
(
"
-m
"
,
"
-m
"
,
"
--check-md5/--no-check-md5
"
,
"
--check-md5/--no-check-md5
"
,
...
@@ -72,15 +83,25 @@ Examples:
...
@@ -72,15 +83,25 @@ Examples:
readable
=
True
,
writable
=
True
,
resolve_path
=
True
),
readable
=
True
,
writable
=
True
,
resolve_path
=
True
),
help
=
"
A directory where to store temporary files
"
,
help
=
"
A directory where to store temporary files
"
,
)
)
@click.option
(
"
-p
"
,
"
--patch/--no-patch
"
,
default
=
False
,
help
=
"
If set, then consider we are mirroring the defaults channel
"
"
where a patch_instructions.json exists and must be downloaded and
"
"
prunned so the mirror works adequately
"
,
)
@verbosity_option
()
@verbosity_option
()
@bdt.raise_on_error
@bdt.raise_on_error
def
mirror
(
def
mirror
(
channel_url
,
channel_url
,
dest_dir
,
dest_dir
,
blacklist
,
blacklist
,
whitelist
,
check_md5
,
check_md5
,
dry_run
,
dry_run
,
tmpdir
,
tmpdir
,
patch
,
):
):
"""
Mirrors a conda channel to a particular local destination
"""
Mirrors a conda channel to a particular local destination
...
@@ -133,6 +154,10 @@ def mirror(
...
@@ -133,6 +154,10 @@ def mirror(
to_download
=
blacklist_filter
(
remote_packages
-
local_packages
,
to_download
=
blacklist_filter
(
remote_packages
-
local_packages
,
globs_to_remove
)
globs_to_remove
)
if
whitelist
is
not
None
and
os
.
path
.
exists
(
whitelist
):
globs_to_consider
=
set
(
load_glob_list
(
whitelist
))
to_download
=
whitelist_filter
(
to_download
,
globs_to_consider
)
# in the local packages, subset those that we no longer need, be it
# in the local packages, subset those that we no longer need, be it
# because they have been removed from the remote repository, or because
# because they have been removed from the remote repository, or because
# we decided to blacklist them.
# we decided to blacklist them.
...
@@ -157,6 +182,16 @@ def mirror(
...
@@ -157,6 +182,16 @@ def mirror(
echo_info
(
"
Mirror at %s/%s is up-to-date w.r.t. blacklist.
"
\
echo_info
(
"
Mirror at %s/%s is up-to-date w.r.t. blacklist.
"
\
"
No packages to be removed.
"
%
(
dest_dir
,
arch
))
"
No packages to be removed.
"
%
(
dest_dir
,
arch
))
if
patch
:
# download/cleanup patch instructions, otherwise conda installs may
# go crazy. Do this before the indexing, that will use that file
# to do its magic.
patch_file
=
'
patch_instructions.json
'
name
=
copy_and_clean_patch
(
channel_url
,
dest_dir
,
arch
,
patch_file
)
echo_info
(
"
Cleaned copy of %s/%s/%s installed at %s
"
%
(
channel_url
,
arch
,
patch_file
,
name
))
# re-indexes the channel to produce a conda-compatible setup
# re-indexes the channel to produce a conda-compatible setup
echo_info
(
"
Re-indexing %s...
"
%
dest_dir
)
echo_info
(
"
Re-indexing %s...
"
%
dest_dir
)
if
not
dry_run
:
if
not
dry_run
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment