download.py 2.88 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>

import os
import logging
logger = logging.getLogger(__name__)


def _unzip(zip_file, directory):
    import zipfile

    with zipfile.ZipFile(zip_file) as myzip:
        myzip.extractall(directory)


def _untar(tar_file, directory, mode):
18 19 20 21 22 23 24 25 26 27 28
    if ".tar" in tar_file:
        import tarfile
        with tarfile.open(name=tar_file, mode='r:'+mode) as t:
            t.extractall(directory)
    else:
        if mode=="bz2":
            import bz2
            with bz2.BZ2File(tar_file) as t:
                open(os.path.splitext(tar_file)[0:-1][0], 'wb').write(t.read())
        else:
           raise ValueError("It was not possible to extract {0}".format(tar_file))
29

30
def download_file(url, out_file):
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
  """Downloads a file from a given url

  Parameters
  ----------
  url : str
      The url to download form.
      
  out_file : str
      Where to save the file.
  """
  import sys
  if sys.version_info[0] < 3:
    # python2 technique for downloading a file
    from urllib2 import urlopen
    with open(out_file, 'wb') as f:
      response = urlopen(url)
      f.write(response.read())

  else:
    # python3 technique for downloading a file
    from urllib.request import urlopen
    from shutil import copyfileobj
    with urlopen(url) as response:
      with open(out_file, 'wb') as f:
        copyfileobj(response, f)


def download_and_unzip(urls, filename):
    """
60
    Download a file from a given URL list, save it somewhere and unzip/untar if necessary
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
    
    Example:
       download_and_unzip(["https://mytesturl.co/my_file_example.tag.bz2"], filename="~/my_file_example.tag.bz2")

   
    Parameters
    ----------
    
      urls: list
        List containing the all the URLs.
        The function will try to download them in order
        
      filename: str
        File name (full path) where the downloaded file will be written and uncompressed

    """

    # Just testing if string and wrap it in a list if it's the case
    if isinstance(urls, str):
        urls = [urls]

    for url in urls:
        try:
            logger.info(
                "Downloading from "
                "{} ...".format(url))
87
            download_file(url, filename)
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107

            break
        except Exception:
            logger.warning(
                "Could not download from the %s url", url, exc_info=True)
    else:  # else is for the for loop
        if not os.path.isfile(filename):
            raise RuntimeError("Could not download the file.")

    # Uncompressing if it is the case
    ext = os.path.splitext(filename)[-1].lower()

    if ext == ".zip":
        logger.info("Unziping in {0}".format(filename))
        _unzip(filename, os.path.dirname(filename))

    elif ext in [".gz", ".bz2"]:
        logger.info("Untar/gzip in {0}".format(filename))
        _untar(filename, os.path.dirname(filename), mode=ext[1:])