Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.devtools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bob
bob.devtools
Commits
4bb0e089
Commit
4bb0e089
authored
5 years ago
by
André Anjos
Browse files
Options
Downloads
Patches
Plain Diff
[mirror] Own implementation of conda channel mirroring
parent
bd288e10
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!123
Own implementation of conda channel mirroring
Pipeline
#34578
passed
5 years ago
Stage: build
Changes
3
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
bob/devtools/mirror.py
+260
-0
260 additions, 0 deletions
bob/devtools/mirror.py
bob/devtools/scripts/mirror.py
+151
-0
151 additions, 0 deletions
bob/devtools/scripts/mirror.py
setup.py
+1
-0
1 addition, 0 deletions
setup.py
with
412 additions
and
0 deletions
bob/devtools/mirror.py
0 → 100644
+
260
−
0
View file @
4bb0e089
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
'''
Mirroring functionality for conda channels
Some constructs are bluntly copied from
https://github.com/valassis-digital-media/conda-mirror
'''
import
os
import
bz2
import
json
import
hashlib
import
fnmatch
import
tempfile
import
requests
from
.log
import
get_logger
logger
=
get_logger
(
__name__
)
def
_download
(
url
,
target_directory
):
"""
Download `url` to `target_directory`
Parameters
----------
url : str
The url to download
target_directory : str
The path to a directory where `url` should be downloaded
Returns
-------
file_size: int
The size in bytes of the file that was downloaded
"""
file_size
=
0
chunk_size
=
1024
# 1KB chunks
logger
.
info
(
"
Download %s -> %s
"
,
url
,
target_directory
)
# create a temporary file
target_filename
=
url
.
split
(
'
/
'
)[
-
1
]
download_filename
=
os
.
path
.
join
(
target_directory
,
target_filename
)
with
open
(
download_filename
,
'
w+b
'
)
as
tf
:
ret
=
requests
.
get
(
url
,
stream
=
True
)
logger
.
debug
(
'
Saving to %s (%s bytes)
'
,
download_filename
,
ret
.
headers
[
'
Content-length
'
])
for
data
in
ret
.
iter_content
(
chunk_size
):
tf
.
write
(
data
)
file_size
=
os
.
path
.
getsize
(
download_filename
)
return
file_size
def
_list_conda_packages
(
local_dir
):
"""
List the conda packages (*.tar.bz2 or *.conda files) in `local_dir`
Parameters
----------
local_dir : str
Some local directory with (hopefully) some conda packages in it
Returns
-------
list
List of conda packages in `local_dir`
"""
contents
=
os
.
listdir
(
local_dir
)
return
fnmatch
.
filter
(
contents
,
"
*.conda
"
)
+
\
fnmatch
.
filter
(
contents
,
"
*.tar.bz2
"
)
def
get_json
(
channel
,
platform
,
name
):
"""
Get a JSON file for a channel/platform combo on conda channel
Parameters
----------
channel : str
Complete channel URL
platform : {
'
linux-64
'
,
'
osx-64
'
,
'
noarch
'
}
The platform of interest
name : str
The name of the file to retrieve. If the name ends in
'
.bz2
'
, then it
is auto-decompressed
Returns
-------
repodata : dict
contents of repodata.json
"""
url
=
channel
+
'
/
'
+
platform
+
'
/
'
+
name
logger
.
debug
(
'
[checking] %s...
'
,
url
)
r
=
requests
.
get
(
url
,
allow_redirects
=
True
,
stream
=
True
)
logger
.
info
(
'
[download] %s (%s bytes)...
'
,
url
,
r
.
headers
[
'
Content-length
'
])
if
name
.
endswith
(
'
.bz2
'
):
# just in case transport encoding was applied
r
.
raw
.
decode_content
=
True
data
=
bz2
.
decompress
(
r
.
raw
.
read
())
else
:
data
=
r
.
read
()
return
json
.
loads
(
data
)
def
get_local_contents
(
path
,
arch
):
"""
Returns the local package contents as a set
"""
path_arch
=
os
.
path
.
join
(
path
,
arch
)
if
not
os
.
path
.
exists
(
path_arch
):
return
set
()
# path exists, lists currently available packages
logger
.
info
(
'
Listing package contents of %s...
'
,
path_arch
)
contents
=
os
.
listdir
(
path_arch
)
return
set
(
fnmatch
.
filter
(
contents
,
'
*.tar.bz2
'
)
+
fnmatch
.
filter
(
contents
,
'
*.conda
'
))
def
load_glob_list
(
path
):
"""
Loads a list of globs from a configuration file
Excludes comments and empty lines
"""
retval
=
[
str
(
k
.
strip
())
for
k
in
open
(
path
,
"
rt
"
)]
return
[
k
for
k
in
retval
if
k
and
k
[
0
]
not
in
(
"
#
"
,
"
-
"
)]
def
blacklist_filter
(
packages
,
globs
):
"""
Filters **out** the input package set with the glob list
"""
to_remove
=
set
()
for
k
in
globs
:
to_remove
|=
set
(
fnmatch
.
filter
(
packages
,
k
))
return
packages
-
to_remove
def
download_packages
(
packages
,
repodata
,
channel_url
,
dest_dir
,
arch
,
dry_run
):
"""
Downloads remote packages to a download directory
Packages are downloaded first to a temporary directory, then validated
according to the expected sha256/md5 sum and then moved, one by one, to the
destination directory. An error is raised if the package cannot be
correctly downloaded.
Parameters
----------
packages : list of str
List of packages to download from the remote channel
repodata: dict
A dictionary containing the remote repodata.json contents
channel_url: str
The complete channel URL
dest_dir: str
The local directory where the channel is being mirrored
arch: str
The current architecture which we are mirroring
dry_run: bool
A boolean flag indicating if this is just a dry-run (simulation),
flagging so we don
'
t really do anything (set to ``True``).
"""
def
_sha256sum
(
filename
):
h
=
hashlib
.
sha256
()
b
=
bytearray
(
128
*
1024
)
mv
=
memoryview
(
b
)
with
open
(
filename
,
'
rb
'
,
buffering
=
0
)
as
f
:
for
n
in
iter
(
lambda
:
f
.
readinto
(
mv
),
0
):
h
.
update
(
mv
[:
n
])
return
h
.
hexdigest
()
def
_md5sum
(
filename
):
h
=
hashlib
.
md5
()
b
=
bytearray
(
128
*
1024
)
mv
=
memoryview
(
b
)
with
open
(
filename
,
'
rb
'
,
buffering
=
0
)
as
f
:
for
n
in
iter
(
lambda
:
f
.
readinto
(
mv
),
0
):
h
.
update
(
mv
[:
n
])
return
h
.
hexdigest
()
# download files into temporary directory, that is removed by the end of
# the procedure, or if something bad occurs
with
tempfile
.
TemporaryDirectory
()
as
download_dir
:
total
=
len
(
packages
)
for
k
,
p
in
enumerate
(
packages
):
k
+=
1
#adjust to produce correct order on printouts
# checksum to verify
if
p
.
endswith
(
'
.tar.bz2
'
):
expected_hash
=
repodata
[
'
packages
'
][
p
].
get
(
'
sha256
'
,
repodata
[
'
packages
'
][
p
][
'
md5
'
])
else
:
expected_hash
=
repodata
[
'
packages.conda
'
][
p
].
get
(
'
sha256
'
,
repodata
[
'
packages
'
][
p
][
'
md5
'
])
# download package to file in our temporary directory
url
=
channel_url
+
'
/
'
+
arch
+
'
/
'
+
p
temp_dest
=
os
.
path
.
join
(
download_dir
,
p
)
logger
.
info
(
'
[download: %d/%d] %s -> %s
'
,
k
,
total
,
url
,
temp_dest
)
if
not
dry_run
:
logger
.
debug
(
'
[checking: %d/%d] %s
'
,
k
,
total
,
url
)
r
=
requests
.
get
(
url
,
stream
=
True
,
allow_redirects
=
True
)
logger
.
info
(
'
[download: %d/%d] %s -> %s (%s bytes)
'
,
k
,
total
,
url
,
temp_dest
,
r
.
headers
[
'
Content-length
'
])
open
(
temp_dest
,
'
wb
'
).
write
(
r
.
raw
.
read
())
# verify that checksum matches
if
len
(
expected_hash
)
==
32
:
#md5
logger
.
info
(
'
[verify: %d/%d] md5(%s) == %s?
'
,
k
,
total
,
temp_dest
,
expected_hash
)
else
:
#sha256
logger
.
info
(
'
[verify: %d/%d] sha256(%s) == %s?
'
,
k
,
total
,
temp_dest
,
expected_hash
)
if
not
dry_run
:
if
len
(
expected_hash
)
==
32
:
#md5
actual_hash
=
_md5sum
(
temp_dest
)
else
:
#sha256
actual_hash
=
_sha256sum
(
temp_dest
)
assert
actual_hash
==
expected_hash
,
'
Checksum of locally
'
\
'
downloaded version of %s does not match
'
\
'
(actual:%r != %r:expected)
'
%
(
url
,
actual_hash
,
expected_hash
)
# move
local_dest
=
os
.
path
.
join
(
dest_dir
,
arch
,
p
)
logger
.
info
(
'
[move: %d/%d] %s -> %s
'
,
k
,
total
,
temp_dest
,
local_dest
)
# check local directory is available before moving
dirname
=
os
.
path
.
dirname
(
local_dest
)
if
not
os
.
path
.
exists
(
dirname
):
logger
.
info
(
'
[mkdir] %s
'
,
dirname
)
if
not
dry_run
:
os
.
makedirs
(
dirname
)
if
not
dry_run
:
os
.
rename
(
temp_dest
,
local_dest
)
def
remove_packages
(
packages
,
dest_dir
,
arch
,
dry_run
):
"""
Removes local packages that no longer matter
"""
total
=
len
(
packages
)
for
k
,
p
in
enumerate
(
packages
):
k
+=
1
#adjust to produce correct order on printouts
path
=
os
.
path
.
join
(
dest_dir
,
arch
,
p
)
logger
.
info
(
'
[remove: %d/%d] %s
'
,
k
,
total
,
path
)
if
not
dry_run
:
os
.
unlink
(
path
)
This diff is collapsed.
Click to expand it.
bob/devtools/scripts/mirror.py
0 → 100644
+
151
−
0
View file @
4bb0e089
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
import
os
import
click
import
conda_build.api
from
.
import
bdt
from
..mirror
import
(
get_json
,
get_local_contents
,
load_glob_list
,
blacklist_filter
,
download_packages
,
remove_packages
,
)
from
..log
import
verbosity_option
,
get_logger
,
echo_info
,
echo_warning
logger
=
get_logger
(
__name__
)
@click.command
(
epilog
=
"""
Examples:
1. Mirrors a conda channel:
\b
$ bdt mirror -vv https://www.idiap.ch/software/bob/label/beta
"""
)
@click.argument
(
"
channel-url
"
,
required
=
True
,
)
@click.argument
(
"
dest-dir
"
,
type
=
click
.
Path
(
exists
=
False
,
dir_okay
=
True
,
file_okay
=
False
,
writable
=
True
,
readable
=
True
,
resolve_path
=
True
),
required
=
True
,
)
@click.option
(
"
-b
"
,
"
--blacklist
"
,
type
=
click
.
Path
(
exists
=
True
,
dir_okay
=
False
,
file_okay
=
True
,
readable
=
True
,
resolve_path
=
True
),
help
=
"
A file containing a list of globs to exclude from local
"
\
"
mirroring, one per line
"
,
)
@click.option
(
"
-m
"
,
"
--check-md5/--no-check-md5
"
,
default
=
False
,
help
=
"
If set, then check MD5 sums of all packages during conda-index
"
,
)
@click.option
(
"
-d
"
,
"
--dry-run/--no-dry-run
"
,
default
=
False
,
help
=
"
Only goes through the actions, but does not execute them
"
"
(combine with the verbosity flags - e.g. ``-vvv``) to enable
"
"
printing to help you understand what will be done
"
,
)
@verbosity_option
()
@bdt.raise_on_error
def
mirror
(
channel_url
,
dest_dir
,
blacklist
,
check_md5
,
dry_run
,
):
"""
Mirrors a conda channel to a particular local destination
This command is capable of completely mirroring a valid conda channel,
excluding packages that you may not be interested on via globs. It works
to minimize channel usage by first downloading the channel repository data
(in compressed format), analysing what is available locally and what is
available on the channel, and only downloading the missing files.
"""
# if we are in a dry-run mode, let's let it be known
if
dry_run
:
logger
.
warn
(
"
!!!! DRY RUN MODE !!!!
"
)
logger
.
warn
(
"
Nothing will be really mirrored
"
)
DEFAULT_SUBDIRS
=
[
'
noarch
'
,
'
linux-64
'
,
'
osx-64
'
]
noarch
=
os
.
path
.
join
(
dest_dir
,
'
noarch
'
)
if
not
os
.
path
.
exists
(
noarch
):
#first time
# calls conda index to create basic infrastructure
logger
.
info
(
"
Creating conda channel at %s...
"
,
dest_dir
)
if
not
dry_run
:
conda_build
.
api
.
update_index
([
dest_dir
],
subdir
=
DEFAULT_SUBDIRS
,
progress
=
False
)
for
arch
in
DEFAULT_SUBDIRS
:
remote_repodata
=
get_json
(
channel_url
,
arch
,
'
repodata.json.bz2
'
)
logger
.
info
(
'
%d packages available in remote index
'
,
len
(
remote_repodata
.
get
(
'
packages
'
,
{})))
local_packages
=
get_local_contents
(
dest_dir
,
arch
)
logger
.
info
(
'
%d packages available in local mirror
'
,
len
(
local_packages
))
remote_packages
=
set
(
list
(
remote_repodata
.
get
(
'
packages
'
,
{}).
keys
())
+
list
(
remote_repodata
.
get
(
'
packages.conda
'
,
{}).
keys
()))
if
blacklist
is
not
None
and
os
.
path
.
exists
(
blacklist
):
globs_to_remove
=
set
(
load_glob_list
(
blacklist
))
else
:
globs_to_remove
=
set
()
# in the remote packages, subset those that need to be downloaded
# according to our own interest
to_download
=
blacklist_filter
(
remote_packages
-
local_packages
,
globs_to_remove
)
# in the local packages, subset those that we no longer need, be it
# because they have been removed from the remote repository, or because
# we decided to blacklist them.
disappeared_remotely
=
local_packages
-
remote_packages
to_keep
=
blacklist_filter
(
local_packages
,
globs_to_remove
)
to_delete_locally
=
(
local_packages
-
to_keep
)
|
disappeared_remotely
# execute the transaction
if
to_download
:
download_packages
(
to_download
,
remote_repodata
,
channel_url
,
dest_dir
,
arch
,
dry_run
)
else
:
echo_info
(
"
Mirror at %s/%s is up-to-date w.r.t. %s/%s.
"
\
"
No packages to download.
"
%
(
dest_dir
,
arch
,
channel_url
,
arch
))
if
to_delete_locally
:
echo_warning
(
"
%d packages will be removed at %s/%s
"
%
\
(
len
(
to_delete_locally
),
dest_dir
,
arch
))
remove_packages
(
to_delete_locally
,
dest_dir
,
arch
,
dry_run
)
else
:
echo_info
(
"
Mirror at %s/%s is up-to-date w.r.t. blacklist.
"
\
"
No packages to be removed.
"
%
(
dest_dir
,
arch
))
# re-indexes the channel to produce a conda-compatible setup
echo_info
(
"
Re-indexing %s...
"
%
dest_dir
)
if
not
dry_run
:
conda_build
.
api
.
update_index
([
dest_dir
],
check_md5
=
check_md5
,
progress
=
True
)
This diff is collapsed.
Click to expand it.
setup.py
+
1
−
0
View file @
4bb0e089
...
@@ -48,6 +48,7 @@ setup(
...
@@ -48,6 +48,7 @@ setup(
'
dumpsphinx = bob.devtools.scripts.dumpsphinx:dumpsphinx
'
,
'
dumpsphinx = bob.devtools.scripts.dumpsphinx:dumpsphinx
'
,
'
create = bob.devtools.scripts.create:create
'
,
'
create = bob.devtools.scripts.create:create
'
,
'
build = bob.devtools.scripts.build:build
'
,
'
build = bob.devtools.scripts.build:build
'
,
'
mirror = bob.devtools.scripts.mirror:mirror
'
,
'
rebuild = bob.devtools.scripts.rebuild:rebuild
'
,
'
rebuild = bob.devtools.scripts.rebuild:rebuild
'
,
'
test = bob.devtools.scripts.test:test
'
,
'
test = bob.devtools.scripts.test:test
'
,
'
caupdate = bob.devtools.scripts.caupdate:caupdate
'
,
'
caupdate = bob.devtools.scripts.caupdate:caupdate
'
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment