diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml
index 8edf22abbb59c7a3d03a985474eb2da0d848a50e..756b0258f64ca8ee02b3b15e497ca4cd5f7a4b97 100755
--- a/.azure-pipelines/azure-pipelines-linux.yml
+++ b/.azure-pipelines/azure-pipelines-linux.yml
@@ -8,16 +8,64 @@ jobs:
     vmImage: ubuntu-latest
   strategy:
     matrix:
-      linux_64_python3.7.____cpython:
-        CONFIG: linux_64_python3.7.____cpython
+      linux_64_cuda_compiler_version10.2cudnn7python3.7.____cpython:
+        CONFIG: linux_64_cuda_compiler_version10.2cudnn7python3.7.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:10.2
+      linux_64_cuda_compiler_version10.2cudnn7python3.8.____cpython:
+        CONFIG: linux_64_cuda_compiler_version10.2cudnn7python3.8.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:10.2
+      linux_64_cuda_compiler_version10.2cudnn7python3.9.____cpython:
+        CONFIG: linux_64_cuda_compiler_version10.2cudnn7python3.9.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:10.2
+      linux_64_cuda_compiler_version11.0cudnn8python3.7.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.0cudnn8python3.7.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.0
+      linux_64_cuda_compiler_version11.0cudnn8python3.8.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.0cudnn8python3.8.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.0
+      linux_64_cuda_compiler_version11.0cudnn8python3.9.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.0cudnn8python3.9.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.0
+      linux_64_cuda_compiler_version11.1cudnn8python3.7.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.1cudnn8python3.7.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.1
+      linux_64_cuda_compiler_version11.1cudnn8python3.8.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.1cudnn8python3.8.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.1
+      linux_64_cuda_compiler_version11.1cudnn8python3.9.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.1cudnn8python3.9.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.1
+      linux_64_cuda_compiler_version11.2cudnn8python3.7.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.2cudnn8python3.7.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
+      linux_64_cuda_compiler_version11.2cudnn8python3.8.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.2cudnn8python3.8.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
+      linux_64_cuda_compiler_version11.2cudnn8python3.9.____cpython:
+        CONFIG: linux_64_cuda_compiler_version11.2cudnn8python3.9.____cpython
+        UPLOAD_PACKAGES: 'True'
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
+      linux_64_cuda_compiler_versionNonecudnnundefinedpython3.7.____cpython:
+        CONFIG: linux_64_cuda_compiler_versionNonecudnnundefinedpython3.7.____cpython
         UPLOAD_PACKAGES: 'True'
         DOCKER_IMAGE: quay.io/condaforge/linux-anvil-comp7
-      linux_64_python3.8.____cpython:
-        CONFIG: linux_64_python3.8.____cpython
+      linux_64_cuda_compiler_versionNonecudnnundefinedpython3.8.____cpython:
+        CONFIG: linux_64_cuda_compiler_versionNonecudnnundefinedpython3.8.____cpython
         UPLOAD_PACKAGES: 'True'
         DOCKER_IMAGE: quay.io/condaforge/linux-anvil-comp7
-      linux_64_python3.9.____cpython:
-        CONFIG: linux_64_python3.9.____cpython
+      linux_64_cuda_compiler_versionNonecudnnundefinedpython3.9.____cpython:
+        CONFIG: linux_64_cuda_compiler_versionNonecudnnundefinedpython3.9.____cpython
         UPLOAD_PACKAGES: 'True'
         DOCKER_IMAGE: quay.io/condaforge/linux-anvil-comp7
   timeoutInMinutes: 360
diff --git a/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.7.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.7.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff37df2b56190bf93d3716057ad1a92c5467f60f
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.7.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cudnn:
+- '7'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.8.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.8.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72dd7ba2148be80e4c8b53313ba2c7d31bb5e36a
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.8.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cudnn:
+- '7'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.9.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.9.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d9cbe47ccaa439f9b8de50e1d235a77edd9c062
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version10.2cudnn7python3.9.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cudnn:
+- '7'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.7.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.7.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..004bdc212ab111776150f47f23b8588653d1f39f
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.7.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.0'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.0
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.8.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.8.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7a7a7a8db07dcc561aa17c20d7ec94fe2b455a0f
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.8.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.0'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.0
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.9.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.9.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf2dd1557397e3e789c72b2d44be0d8b4da424fe
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.0cudnn8python3.9.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.0'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.0
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.7.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.7.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b229592291e607bb1d19af10789f0ce8e8a2160
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.7.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.1'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.1
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.8.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.8.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a95a65729ca43f21a65b2f6b862a7062d287bcc3
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.8.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.1'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.1
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.9.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.9.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4765191deaad1af12dfa2adbd0988e0a56cf8de4
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.1cudnn8python3.9.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.1'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.1
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.7.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.7.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ccc10e36d8f6e8a4ab6f583b01e10ac8e0bdfcdd
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.7.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.2'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.2
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.8.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.8.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ecac70a74b3991ae93ad32c6d08f658bab16358d
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.8.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.2'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.2
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.9.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.9.____cpython.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04fab325cbed9dc58f11ba3e4c1d26f4c5e42b3f
--- /dev/null
+++ b/.ci_support/linux_64_cuda_compiler_version11.2cudnn8python3.9.____cpython.yaml
@@ -0,0 +1,71 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '7'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.2'
+cudnn:
+- '8'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '7'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.2
+giflib:
+- '5.2'
+grpc_cpp:
+- '1.40'
+icu:
+- '69'
+jpeg:
+- '9'
+libcurl:
+- '7'
+libpng:
+- '1.6'
+libprotobuf:
+- '3.18'
+nccl:
+- '2'
+openssl:
+- 1.1.1
+pin_run_as_build:
+  jpeg:
+    max_pin: x
+  libcurl:
+    max_pin: x
+  libpng:
+    max_pin: x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  sqlite:
+    max_pin: x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+snappy:
+- '1'
+sqlite:
+- '3'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - cuda_compiler_version
+  - cudnn
+  - docker_image
+zlib:
+- '1.2'
diff --git a/.ci_support/linux_64_python3.7.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.7.____cpython.yaml
similarity index 81%
rename from .ci_support/linux_64_python3.7.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.7.____cpython.yaml
index ace0608b03c08127793a5048bd1415b6f0ad792a..e893dce0eb98590c31a84090111288e3aeec735e 100644
--- a/.ci_support/linux_64_python3.7.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.7.____cpython.yaml
@@ -1,17 +1,23 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '9'
+- '7'
 cdt_name:
 - cos6
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cudnn:
+- undefined
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '9'
+- '7'
 docker_image:
 - quay.io/condaforge/linux-anvil-comp7
 giflib:
@@ -19,7 +25,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
@@ -28,6 +34,8 @@ libpng:
 - '1.6'
 libprotobuf:
 - '3.18'
+nccl:
+- '2'
 openssl:
 - 1.1.1
 pin_run_as_build:
@@ -56,6 +64,8 @@ zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cdt_name
+  - cuda_compiler_version
+  - cudnn
   - docker_image
 zlib:
 - '1.2'
diff --git a/.ci_support/linux_64_python3.8.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.8.____cpython.yaml
similarity index 81%
rename from .ci_support/linux_64_python3.8.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.8.____cpython.yaml
index 81beed1358474ebfe50b7f023bcb6275c2f5771b..45b5eaf7dd312b0ac49be8e54558ec949fac8cab 100644
--- a/.ci_support/linux_64_python3.8.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.8.____cpython.yaml
@@ -1,17 +1,23 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '9'
+- '7'
 cdt_name:
 - cos6
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cudnn:
+- undefined
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '9'
+- '7'
 docker_image:
 - quay.io/condaforge/linux-anvil-comp7
 giflib:
@@ -19,7 +25,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
@@ -28,6 +34,8 @@ libpng:
 - '1.6'
 libprotobuf:
 - '3.18'
+nccl:
+- '2'
 openssl:
 - 1.1.1
 pin_run_as_build:
@@ -56,6 +64,8 @@ zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cdt_name
+  - cuda_compiler_version
+  - cudnn
   - docker_image
 zlib:
 - '1.2'
diff --git a/.ci_support/linux_64_python3.9.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.9.____cpython.yaml
similarity index 81%
rename from .ci_support/linux_64_python3.9.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.9.____cpython.yaml
index 2adeb64a7d84618e11981d55c0b301bdb855d74b..aebea15402a44c6ef82c0375b65b346f3386f703 100644
--- a/.ci_support/linux_64_python3.9.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_versionNonecudnnundefinedpython3.9.____cpython.yaml
@@ -1,17 +1,23 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '9'
+- '7'
 cdt_name:
 - cos6
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cudnn:
+- undefined
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '9'
+- '7'
 docker_image:
 - quay.io/condaforge/linux-anvil-comp7
 giflib:
@@ -19,7 +25,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
@@ -28,6 +34,8 @@ libpng:
 - '1.6'
 libprotobuf:
 - '3.18'
+nccl:
+- '2'
 openssl:
 - 1.1.1
 pin_run_as_build:
@@ -56,6 +64,8 @@ zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cdt_name
+  - cuda_compiler_version
+  - cudnn
   - docker_image
 zlib:
 - '1.2'
diff --git a/.ci_support/migrations/cuda110.yaml b/.ci_support/migrations/cuda110.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..823f31d1a4f8fe27925c0b309a9f940dbff01b6c
--- /dev/null
+++ b/.ci_support/migrations/cuda110.yaml
@@ -0,0 +1,81 @@
+migrator_ts: 1601612527
+__migrator:
+  kind:
+    version
+  migration_number:
+    1
+  build_number:
+    1
+  override_cbc_keys:
+    - cuda_compiler_stub
+  ordering:
+    cxx_compiler_version:
+      - 9
+      - 8
+      - 7
+    c_compiler_version:
+      - 9
+      - 8
+      - 7
+    docker_image:
+      - quay.io/condaforge/linux-anvil-comp7        # [os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cos7-x86_64  # [os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-aarch64      # [os.environ.get("BUILD_PLATFORM") == "linux-aarch64"]
+      - quay.io/condaforge/linux-anvil-ppc64le      # [os.environ.get("BUILD_PLATFORM") == "linux-ppc64le"]
+      - quay.io/condaforge/linux-anvil-armv7l       # [os.environ.get("BUILD_PLATFORM") == "linux-armv7l"]
+      - quay.io/condaforge/linux-anvil-cuda:9.2     # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cos7-cuda:9.2     # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cuda:10.0    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cos7-cuda:10.0    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cuda:10.1    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cos7-cuda:10.1    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cuda:10.2    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cos7-cuda:10.2    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cuda:11.0    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cuda:11.1    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+      - quay.io/condaforge/linux-anvil-cuda:11.2    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+
+cuda_compiler_version:
+  - None
+  - 10.2                       # [linux64]
+  - 11.0                       # [linux64]
+
+c_compiler_version:     # [linux]
+  - 7                   # [linux64 or aarch64]
+  - 8                   # [ppc64le]
+cxx_compiler_version:   # [linux]
+  - 7                   # [linux64 or aarch64]
+  - 8                   # [ppc64le]
+
+cudnn:
+  - undefined
+  - 7                   # [linux64]
+  - 8                   # [linux64]
+
+cdt_name:  # [linux]
+  - cos6   # [linux64 and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos6"]
+  - cos7   # [linux64 and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos7"]
+  - cos7   # [linux and aarch64]
+  - cos7   # [linux and ppc64le]
+  - cos7   # [linux and armv7l]
+
+  - cos6   # [linux64 and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos6"]
+  - cos7   # [linux64 and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos7"]
+  - cos7   # [linux64]
+
+docker_image:                                   # [os.environ.get("BUILD_PLATFORM", "").startswith("linux-")]
+  - quay.io/condaforge/linux-anvil-comp7        # [os.environ.get("BUILD_PLATFORM") == "linux-64" and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos6"]
+  - quay.io/condaforge/linux-anvil-cos7-x86_64  # [os.environ.get("BUILD_PLATFORM") == "linux-64" and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos7"]
+  - quay.io/condaforge/linux-anvil-aarch64      # [os.environ.get("BUILD_PLATFORM") == "linux-aarch64"]
+  - quay.io/condaforge/linux-anvil-ppc64le      # [os.environ.get("BUILD_PLATFORM") == "linux-ppc64le"]
+  - quay.io/condaforge/linux-anvil-armv7l       # [os.environ.get("BUILD_PLATFORM") == "linux-armv7l"]
+
+  - quay.io/condaforge/linux-anvil-cuda:10.2        # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64" and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos6"]
+  - quay.io/condaforge/linux-anvil-cos7-cuda:10.2   # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64" and os.environ.get("DEFAULT_LINUX_VERSION", "cos6") == "cos7"]
+  - quay.io/condaforge/linux-anvil-cuda:11.0    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+
+zip_keys:
+  - - cudnn                      # [linux64]
+    - cuda_compiler_version      # [linux64]
+    - docker_image               # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+    - cdt_name                   # [linux64]
\ No newline at end of file
diff --git a/.ci_support/migrations/cuda111_112.yaml b/.ci_support/migrations/cuda111_112.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7752337978a705bcb8ef4fdd8f740e78a948dc89
--- /dev/null
+++ b/.ci_support/migrations/cuda111_112.yaml
@@ -0,0 +1,29 @@
+migrator_ts: 1611736740
+__migrator:
+  kind:
+    version
+  migration_number:
+    1
+  build_number:
+    1
+  override_cbc_keys:
+    - cuda_compiler_stub
+  operation: key_add
+  check_solvable: false
+  primary_key: cuda_compiler_version
+
+cuda_compiler_version:         # [linux64 or win]
+  - 11.1                       # [linux64 or win]
+  - 11.2                       # [linux64 or win]
+
+cudnn:                  # [linux64 or win]
+  - 8                   # [linux64 or win]
+  - 8                   # [linux64 or win]
+
+cdt_name:  # [linux]
+  - cos7   # [linux64]
+  - cos7   # [linux64]
+
+docker_image:                                   # [os.environ.get("BUILD_PLATFORM", "").startswith("linux-")]
+  - quay.io/condaforge/linux-anvil-cuda:11.1    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
+  - quay.io/condaforge/linux-anvil-cuda:11.2    # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"]
diff --git a/.ci_support/migrations/icu69.yaml b/.ci_support/migrations/icu69.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cfadd8c5849381d456739a0be090dc3a499c38f
--- /dev/null
+++ b/.ci_support/migrations/icu69.yaml
@@ -0,0 +1,7 @@
+__migrator:
+  build_number: 1
+  kind: version
+  migration_number: 1
+icu:
+- '69'
+migrator_ts: 1635419000.7351162
diff --git a/.ci_support/osx_64_python3.7.____cpython.yaml b/.ci_support/osx_64_python3.7.____cpython.yaml
index fdd58ddf23d3ebb5f3235d4a329ce8e6b6e152b0..446372054543b905070e69b8582c8c2a4e74f965 100644
--- a/.ci_support/osx_64_python3.7.____cpython.yaml
+++ b/.ci_support/osx_64_python3.7.____cpython.yaml
@@ -9,7 +9,9 @@ c_compiler_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler_version:
+- None
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
@@ -19,7 +21,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
diff --git a/.ci_support/osx_64_python3.8.____cpython.yaml b/.ci_support/osx_64_python3.8.____cpython.yaml
index 3d922a893068811619484c7c4d68529b74fd745e..78c3d7dc32de0fff9dd35cfd0f5bcd8f7b5a91df 100644
--- a/.ci_support/osx_64_python3.8.____cpython.yaml
+++ b/.ci_support/osx_64_python3.8.____cpython.yaml
@@ -9,7 +9,9 @@ c_compiler_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler_version:
+- None
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
@@ -19,7 +21,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
diff --git a/.ci_support/osx_64_python3.9.____cpython.yaml b/.ci_support/osx_64_python3.9.____cpython.yaml
index e77ab7881eb13f1e1c4a4c5050b9bafc36044754..31a18d08d95e8723b8e209ae064452a206f6cbc7 100644
--- a/.ci_support/osx_64_python3.9.____cpython.yaml
+++ b/.ci_support/osx_64_python3.9.____cpython.yaml
@@ -9,7 +9,9 @@ c_compiler_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler_version:
+- None
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
@@ -19,7 +21,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
diff --git a/.ci_support/osx_arm64_python3.8.____cpython.yaml b/.ci_support/osx_arm64_python3.8.____cpython.yaml
index 98338e6d554c136e98103212a5d8adea6b3e0cc9..7d712cb372c265f37a14c9475c21228745d111b9 100644
--- a/.ci_support/osx_arm64_python3.8.____cpython.yaml
+++ b/.ci_support/osx_arm64_python3.8.____cpython.yaml
@@ -7,7 +7,9 @@ c_compiler_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler_version:
+- None
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
@@ -17,7 +19,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
diff --git a/.ci_support/osx_arm64_python3.9.____cpython.yaml b/.ci_support/osx_arm64_python3.9.____cpython.yaml
index 5cdc8112d986b88fe0e2ba3776c8727ac7987f4a..fb2c1f38eff2f07a160fa235ef76564e16790327 100644
--- a/.ci_support/osx_arm64_python3.9.____cpython.yaml
+++ b/.ci_support/osx_arm64_python3.9.____cpython.yaml
@@ -7,7 +7,9 @@ c_compiler_version:
 channel_sources:
 - conda-forge
 channel_targets:
-- amir183 main
+- conda-forge main
+cuda_compiler_version:
+- None
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
@@ -17,7 +19,7 @@ giflib:
 grpc_cpp:
 - '1.40'
 icu:
-- '68'
+- '69'
 jpeg:
 - '9'
 libcurl:
diff --git a/.gitattributes b/.gitattributes
index 9060b272b25719c02fac7599ddf8a2c30d192016..ce52713a1b5ad32e1cb679d5cc9117a510f7aa1b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -18,6 +18,7 @@ bld.bat text eol=crlf
 .gitignore linguist-generated=true
 .travis.yml linguist-generated=true
 .scripts/* linguist-generated=true
+.woodpecker.yml linguist-generated=true
 LICENSE.txt linguist-generated=true
 README.md linguist-generated=true
 azure-pipelines.yml linguist-generated=true
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 0f10de3ac3e30df170c5f8ef7a5388e40cad7ff5..a7bda41b18893cfce9d4146ce685e63894c0f19c 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1 @@
-* @farhantejani @ghego @gilbertfrancois @h-vetinari @hajapy @hmaarrfk @jschueller @njzjz @waitingkuo @xhochy
\ No newline at end of file
+* @farhantejani @ghego @gilbertfrancois @h-vetinari @hajapy @hmaarrfk @jschueller @njzjz @waitingkuo @wolfv @xhochy
\ No newline at end of file
diff --git a/.scripts/build_steps.sh b/.scripts/build_steps.sh
index 23868ed8c6b218d4f153ddcc21af3324e551ac00..a85ae38b451cc1c9c039ba80bd25b2fd341e584e 100755
--- a/.scripts/build_steps.sh
+++ b/.scripts/build_steps.sh
@@ -39,6 +39,7 @@ source run_conda_forge_build_setup
 make_build_number "${FEEDSTOCK_ROOT}" "${RECIPE_ROOT}" "${CONFIG_FILE}"
 
 
+
 ( endgroup "Configuring conda" ) 2> /dev/null
 
 if [[ "${BUILD_WITH_CONDA_DEBUG:-0}" == 1 ]]; then
diff --git a/.scripts/run_docker_build.sh b/.scripts/run_docker_build.sh
index 9a61caf79eefbfe7cd381c9c7915ef3a59741ee7..0c6515fce78935fc1230c29499f6563b4f83d163 100755
--- a/.scripts/run_docker_build.sh
+++ b/.scripts/run_docker_build.sh
@@ -76,6 +76,7 @@ fi
 
 export UPLOAD_PACKAGES="${UPLOAD_PACKAGES:-True}"
 export IS_PR_BUILD="${IS_PR_BUILD:-False}"
+docker pull "${DOCKER_IMAGE}"
 docker run ${DOCKER_RUN_ARGS} \
            -v "${RECIPE_ROOT}":/home/conda/recipe_root:rw,z,delegated \
            -v "${FEEDSTOCK_ROOT}":/home/conda/feedstock_root:rw,z,delegated \
@@ -93,9 +94,9 @@ docker run ${DOCKER_RUN_ARGS} \
            -e BINSTAR_TOKEN \
            -e FEEDSTOCK_TOKEN \
            -e STAGING_BINSTAR_TOKEN \
-           $DOCKER_IMAGE \
+           "${DOCKER_IMAGE}" \
            bash \
-           /home/conda/feedstock_root/${PROVIDER_DIR}/build_steps.sh
+           "/home/conda/feedstock_root/${PROVIDER_DIR}/build_steps.sh"
 
 # verify that the end of the script was reached
 test -f "$DONE_CANARY"
diff --git a/.scripts/run_osx_build.sh b/.scripts/run_osx_build.sh
index d4f04eab0ce28f6d3b02ab6b8ffebc0af7819cdc..d6de1234fc34155bb19a8e89904134121c569fe2 100755
--- a/.scripts/run_osx_build.sh
+++ b/.scripts/run_osx_build.sh
@@ -11,20 +11,21 @@ MINIFORGE_HOME=${MINIFORGE_HOME:-${HOME}/miniforge3}
 MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download"
 MINIFORGE_FILE="Miniforge3-MacOSX-$(uname -m).sh"
 curl -L -O "${MINIFORGE_URL}/${MINIFORGE_FILE}"
+rm -rf ${MINIFORGE_HOME}
 bash $MINIFORGE_FILE -b -p ${MINIFORGE_HOME}
 
 ( endgroup "Installing a fresh version of Miniforge" ) 2> /dev/null
 
 ( startgroup "Configuring conda" ) 2> /dev/null
 
-BUILD_CMD=build
+GET_BOA=boa
+BUILD_CMD=mambabuild
 
 source ${MINIFORGE_HOME}/etc/profile.d/conda.sh
 conda activate base
 
 echo -e "\n\nInstalling conda-forge-ci-setup=3 and conda-build."
 conda install -n base --quiet --yes "conda-forge-ci-setup=3" conda-build pip ${GET_BOA:-}
-conda update --yes -c conda-forge/label/lief_dev -c conda-forge py-lief
 
 
 
@@ -63,8 +64,8 @@ validate_recipe_outputs "${FEEDSTOCK_NAME}"
 
 ( startgroup "Uploading packages" ) 2> /dev/null
 
-if [[ "${UPLOAD_PACKAGES}" != "False" ]]; then
+if [[ "${UPLOAD_PACKAGES}" != "False" ]] && [[ "${IS_PR_BUILD}" == "False" ]]; then
   upload_package --validate --feedstock-name="${FEEDSTOCK_NAME}" ./ ./recipe ./.ci_support/${CONFIG}.yaml
 fi
 
-( endgroup "Uploading packages" ) 2> /dev/null
+( endgroup "Uploading packages" ) 2> /dev/null
\ No newline at end of file
diff --git a/README.md b/README.md
index fb0f1a1dfa2b505f2a938c6d16c27fde0b758969..3d3a56c0ed5c853b34a8d7a5ab514b4b9fa138d0 100644
--- a/README.md
+++ b/README.md
@@ -36,24 +36,108 @@ Current build status
         <table>
           <thead><tr><th>Variant</th><th>Status</th></tr></thead>
           <tbody><tr>
-              <td>linux_64_python3.7.____cpython</td>
+              <td>linux_64_cuda_compiler_version10.2cudnn7python3.7.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_python3.7.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version10.2cudnn7python3.7.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_64_python3.8.____cpython</td>
+              <td>linux_64_cuda_compiler_version10.2cudnn7python3.8.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_python3.8.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version10.2cudnn7python3.8.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_64_python3.9.____cpython</td>
+              <td>linux_64_cuda_compiler_version10.2cudnn7python3.9.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_python3.9.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version10.2cudnn7python3.9.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.0cudnn8python3.7.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.0cudnn8python3.7.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.0cudnn8python3.8.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.0cudnn8python3.8.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.0cudnn8python3.9.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.0cudnn8python3.9.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.1cudnn8python3.7.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.1cudnn8python3.7.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.1cudnn8python3.8.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.1cudnn8python3.8.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.1cudnn8python3.9.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.1cudnn8python3.9.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.2cudnn8python3.7.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.2cudnn8python3.7.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.2cudnn8python3.8.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.2cudnn8python3.8.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_version11.2cudnn8python3.9.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_version11.2cudnn8python3.9.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_versionNonecudnnundefinedpython3.7.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_versionNonecudnnundefinedpython3.7.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_versionNonecudnnundefinedpython3.8.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_versionNonecudnnundefinedpython3.8.____cpython" alt="variant">
+                </a>
+              </td>
+            </tr><tr>
+              <td>linux_64_cuda_compiler_versionNonecudnnundefinedpython3.9.____cpython</td>
+              <td>
+                <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=4385&branchName=master">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/tensorflow-feedstock?branchName=master&jobName=linux&configuration=linux_64_cuda_compiler_versionNonecudnnundefinedpython3.9.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
@@ -108,7 +192,9 @@ Current release info
 | [![Conda Recipe](https://img.shields.io/badge/recipe-libtensorflow_cc-green.svg)](https://anaconda.org/conda-forge/libtensorflow_cc) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/libtensorflow_cc.svg)](https://anaconda.org/conda-forge/libtensorflow_cc) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/libtensorflow_cc.svg)](https://anaconda.org/conda-forge/libtensorflow_cc) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/libtensorflow_cc.svg)](https://anaconda.org/conda-forge/libtensorflow_cc) |
 | [![Conda Recipe](https://img.shields.io/badge/recipe-tensorflow-green.svg)](https://anaconda.org/conda-forge/tensorflow) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/tensorflow.svg)](https://anaconda.org/conda-forge/tensorflow) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/tensorflow.svg)](https://anaconda.org/conda-forge/tensorflow) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/tensorflow.svg)](https://anaconda.org/conda-forge/tensorflow) |
 | [![Conda Recipe](https://img.shields.io/badge/recipe-tensorflow--base-green.svg)](https://anaconda.org/conda-forge/tensorflow-base) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/tensorflow-base.svg)](https://anaconda.org/conda-forge/tensorflow-base) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/tensorflow-base.svg)](https://anaconda.org/conda-forge/tensorflow-base) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/tensorflow-base.svg)](https://anaconda.org/conda-forge/tensorflow-base) |
+| [![Conda Recipe](https://img.shields.io/badge/recipe-tensorflow--cpu-green.svg)](https://anaconda.org/conda-forge/tensorflow-cpu) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/tensorflow-cpu.svg)](https://anaconda.org/conda-forge/tensorflow-cpu) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/tensorflow-cpu.svg)](https://anaconda.org/conda-forge/tensorflow-cpu) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/tensorflow-cpu.svg)](https://anaconda.org/conda-forge/tensorflow-cpu) |
 | [![Conda Recipe](https://img.shields.io/badge/recipe-tensorflow--estimator-green.svg)](https://anaconda.org/conda-forge/tensorflow-estimator) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/tensorflow-estimator.svg)](https://anaconda.org/conda-forge/tensorflow-estimator) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/tensorflow-estimator.svg)](https://anaconda.org/conda-forge/tensorflow-estimator) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/tensorflow-estimator.svg)](https://anaconda.org/conda-forge/tensorflow-estimator) |
+| [![Conda Recipe](https://img.shields.io/badge/recipe-tensorflow--gpu-green.svg)](https://anaconda.org/conda-forge/tensorflow-gpu) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/tensorflow-gpu.svg)](https://anaconda.org/conda-forge/tensorflow-gpu) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/tensorflow-gpu.svg)](https://anaconda.org/conda-forge/tensorflow-gpu) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/tensorflow-gpu.svg)](https://anaconda.org/conda-forge/tensorflow-gpu) |
 
 Installing tensorflow
 =====================
@@ -120,10 +206,10 @@ conda config --add channels conda-forge
 conda config --set channel_priority strict
 ```
 
-Once the `conda-forge` channel has been enabled, `libtensorflow, libtensorflow_cc, tensorflow, tensorflow-base, tensorflow-estimator` can be installed with:
+Once the `conda-forge` channel has been enabled, `libtensorflow, libtensorflow_cc, tensorflow, tensorflow-base, tensorflow-cpu, tensorflow-estimator, tensorflow-gpu` can be installed with:
 
 ```
-conda install libtensorflow libtensorflow_cc tensorflow tensorflow-base tensorflow-estimator
+conda install libtensorflow libtensorflow_cc tensorflow tensorflow-base tensorflow-cpu tensorflow-estimator tensorflow-gpu
 ```
 
 It is possible to list all of the versions of `libtensorflow` available on your platform with:
@@ -136,7 +222,8 @@ conda search libtensorflow --channel conda-forge
 About conda-forge
 =================
 
-[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org)
+[![Powered by
+NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)
 
 conda-forge is a community-led conda channel of installable packages.
 In order to provide high-quality builds, the process has been automated into the
@@ -205,5 +292,6 @@ Feedstock Maintainers
 * [@jschueller](https://github.com/jschueller/)
 * [@njzjz](https://github.com/njzjz/)
 * [@waitingkuo](https://github.com/waitingkuo/)
+* [@wolfv](https://github.com/wolfv/)
 * [@xhochy](https://github.com/xhochy/)
 
diff --git a/build-locally.py b/build-locally.py
index 3df30a401406087c4e3666bdffea04f5dacca0f3..8b7434893256bdb05ab5c105b1442d4752526f5a 100755
--- a/build-locally.py
+++ b/build-locally.py
@@ -12,6 +12,8 @@ import platform
 
 def setup_environment(ns):
     os.environ["CONFIG"] = ns.config
+    os.environ["UPLOAD_PACKAGES"] = "False"
+    os.environ["IS_PR_BUILD"] = "True"
     if ns.debug:
         os.environ["BUILD_WITH_CONDA_DEBUG"] = "1"
         if ns.output_id:
@@ -20,6 +22,10 @@ def setup_environment(ns):
         os.environ["MINIFORGE_HOME"] = os.path.join(
             os.path.dirname(__file__), "miniforge3"
         )
+    if "OSX_SDK_DIR" not in os.environ:
+        os.environ["OSX_SDK_DIR"] = os.path.join(
+            os.path.dirname(__file__), "SDKs"
+        )
 
 
 def run_docker_build(ns):
diff --git a/recipe/build.sh b/recipe/build.sh
index f0504dc967c0d5d1d5b368d72d58f046e93f0e61..873502e7da58927baeca4bfd1ac72f0651dd39fb 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -110,6 +110,37 @@ export TF_CONFIGURE_IOS=0
 sed -i -e "/PROTOBUF_INCLUDE_PATH/c\ " .bazelrc
 sed -i -e "/PREFIX/c\ " .bazelrc
 
+
+if [[ ${cuda_compiler_version} != "None" ]]; then
+    export GCC_HOST_COMPILER_PATH="${GCC}"
+    export GCC_HOST_COMPILER_PREFIX="$(dirname ${GCC})"
+
+    export TF_CUDA_PATHS="${PREFIX},${CUDA_HOME}"
+    export TF_NEED_CUDA=1
+    export TF_CUDA_VERSION="${cuda_compiler_version}"
+    export TF_CUDNN_VERSION="${cudnn}"
+    export TF_NCCL_VERSION=$(pkg-config nccl --modversion | grep -Po '\d+\.\d+')
+
+    export LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}"
+    export CC_OPT_FLAGS="-march=nocona -mtune=haswell"
+
+    if [[ ${cuda_compiler_version} == 10.* ]]; then
+        export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_50,sm_60,sm_62,sm_70,sm_72,sm_75,compute_75
+    elif [[ ${cuda_compiler_version} == 11.0* ]]; then
+        export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_50,sm_60,sm_62,sm_70,sm_72,sm_75,sm_80,compute_80
+    elif [[ ${cuda_compiler_version} == 11.1 ]]; then
+        export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_50,sm_60,sm_62,sm_70,sm_72,sm_75,sm_80,sm_86,compute_86
+    elif [[ ${cuda_compiler_version} == 11.2 ]]; then
+        export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_50,sm_60,sm_62,sm_70,sm_72,sm_75,sm_80,sm_86,compute_86
+    else
+        echo "unsupported cuda version."
+        exit 1
+    fi
+fi
+
+bazel clean --expunge
+bazel shutdown
+
 ./configure
 
 # build using bazel
diff --git a/recipe/custom_toolchain/cc_toolchain_config.bzl b/recipe/custom_toolchain/cc_toolchain_config.bzl
index a312d0750b90d070fabb0dbadfdde4480e4dbcff..9494c4a89c6f41c67d168b40be51fc99e6ab09f3 100644
--- a/recipe/custom_toolchain/cc_toolchain_config.bzl
+++ b/recipe/custom_toolchain/cc_toolchain_config.bzl
@@ -18,7 +18,7 @@ def _impl(ctx):
     tool_paths = [
         tool_path(
             name = "gcc",
-            path = "${GCC}",
+            path = "${BAZEL_TOOLCHAIN_GCC}",
         ),
         tool_path(
             name = "ld",
@@ -304,15 +304,18 @@ def _impl(ctx):
     else:
         cxx_builtin_include_directories = [
             "${CONDA_BUILD_SYSROOT}/usr/include",
-	    "${BUILD_PREFIX}/lib/gcc/${HOST}/${COMPILER_VERSION}",
-	    "${BUILD_PREFIX}/${HOST}/include/c++/${COMPILER_VERSION}",
+            "${BUILD_PREFIX}/lib/gcc/${HOST}/${COMPILER_VERSION}",
+            "${BUILD_PREFIX}/${HOST}/include/c++/${COMPILER_VERSION}",
             "${PREFIX}/include",
         ]
 
+        if (len("${CUDA_HOME}")):
+            cxx_builtin_include_directories.append("${CUDA_HOME}/include")
+
     return cc_common.create_cc_toolchain_config_info(
         ctx = ctx,
         toolchain_identifier = "local",
-	host_system_name = "local",
+        host_system_name = "local",
         #host_system_name = "TARGET_CPU",
         target_system_name = "TARGET_SYSTEM",
         target_cpu = "TARGET_CPU",
diff --git a/recipe/custom_toolchain/crosstool_wrapper_driver_is_not_gcc b/recipe/custom_toolchain/crosstool_wrapper_driver_is_not_gcc
new file mode 100755
index 0000000000000000000000000000000000000000..86cac91a1a61647d9fcf3f31c446584fa0505093
--- /dev/null
+++ b/recipe/custom_toolchain/crosstool_wrapper_driver_is_not_gcc
@@ -0,0 +1,283 @@
+#!/usr/bin/env python
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Crosstool wrapper for compiling CUDA programs.
+
+SYNOPSIS:
+  crosstool_wrapper_is_not_gcc [options passed in by cc_library()
+                                or cc_binary() rule]
+
+DESCRIPTION:
+  This script is expected to be called by the cc_library() or cc_binary() bazel
+  rules. When the option "-x cuda" is present in the list of arguments passed
+  to this script, it invokes the nvcc CUDA compiler. Most arguments are passed
+  as is as a string to --compiler-options of nvcc. When "-x cuda" is not
+  present, this wrapper invokes hybrid_driver_is_not_gcc with the input
+  arguments as is.
+"""
+
+# NOTE wolfv this file can be found here: https://raw.githubusercontent.com/tensorflow/tensorflow/master/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
+
+from __future__ import print_function
+
+from argparse import ArgumentParser
+import os
+import subprocess
+import re
+import sys
+import pipes
+
+# Template values set by cuda_autoconf.
+CPU_COMPILER = "${GCC_COMPILER_PATH}"
+GCC_HOST_COMPILER_PATH = "${GCC_COMPILER_PATH}"
+
+NVCC_VERSION = '${CUDA_VERSION}'
+NVCC_PATH = '${CUDA_HOME}/bin/nvcc'
+PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+
+def Log(s):
+    print('gpus/crosstool: {0}'.format(s))
+
+
+def GetOptionValue(argv, option):
+    """Extract the list of values for option from the argv list.
+
+    Args:
+      argv: A list of strings, possibly the argv passed to main().
+      option: The option whose value to extract, with the leading '-'.
+
+    Returns:
+      A list of values, either directly following the option,
+      (eg., -opt val1 val2) or values collected from multiple occurrences of
+      the option (eg., -opt val1 -opt val2).
+    """
+
+    parser = ArgumentParser()
+    parser.add_argument(option, nargs='*', action='append')
+    option = option.lstrip('-').replace('-', '_')
+    args, _ = parser.parse_known_args(argv)
+    if not args or not vars(args)[option]:
+        return []
+    else:
+        return sum(vars(args)[option], [])
+
+
+def GetHostCompilerOptions(argv):
+    """Collect the -isystem, -iquote, and --sysroot option values from argv.
+
+    Args:
+      argv: A list of strings, possibly the argv passed to main().
+
+    Returns:
+      The string that can be used as the --compiler-options to nvcc.
+    """
+
+    parser = ArgumentParser()
+    parser.add_argument('-isystem', nargs='*', action='append')
+    parser.add_argument('-iquote', nargs='*', action='append')
+    parser.add_argument('--sysroot', nargs=1)
+    parser.add_argument('-g', nargs='*', action='append')
+    parser.add_argument('-fno-canonical-system-headers', action='store_true')
+    parser.add_argument('-no-canonical-prefixes', action='store_true')
+
+    args, _ = parser.parse_known_args(argv)
+
+    opts = ''
+
+    if args.isystem:
+        opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
+    if args.iquote:
+        opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
+    if args.g:
+        opts += ' -g' + ' -g'.join(sum(args.g, []))
+    if args.fno_canonical_system_headers:
+        opts += ' -fno-canonical-system-headers'
+    if args.no_canonical_prefixes:
+        opts += ' -no-canonical-prefixes'
+    if args.sysroot:
+        opts += ' --sysroot ' + args.sysroot[0]
+
+    return opts
+
+def _update_options(nvcc_options):
+    if NVCC_VERSION in ("7.0",):
+        return nvcc_options
+
+    update_options = { "relaxed-constexpr" : "expt-relaxed-constexpr" }
+    return [ update_options[opt] if opt in update_options else opt
+             for opt in nvcc_options ]
+
+def GetNvccOptions(argv):
+    """Collect the -nvcc_options values from argv.
+
+    Args:
+      argv: A list of strings, possibly the argv passed to main().
+
+    Returns:
+      The string that can be passed directly to nvcc.
+    """
+
+    parser = ArgumentParser()
+    parser.add_argument('-nvcc_options', nargs='*', action='append')
+
+    args, _ = parser.parse_known_args(argv)
+
+    if args.nvcc_options:
+        options = _update_options(sum(args.nvcc_options, []))
+        return ' '.join(['--'+a for a in options])
+    return ''
+
+def system(cmd):
+    """Invokes cmd with os.system().
+
+    Args:
+      cmd: The command.
+
+    Returns:
+      The exit code if the process exited with exit() or -signal
+      if the process was terminated by a signal.
+    """
+    retv = os.system(cmd)
+    if os.WIFEXITED(retv):
+        return os.WEXITSTATUS(retv)
+    else:
+        return -os.WTERMSIG(retv)
+
+def InvokeNvcc(argv, log=False):
+    """Call nvcc with arguments assembled from argv.
+
+    Args:
+      argv: A list of strings, possibly the argv passed to main().
+      log: True if logging is requested.
+
+    Returns:
+      The return value of calling system('nvcc ' + args)
+    """
+
+    host_compiler_options = GetHostCompilerOptions(argv)
+    nvcc_compiler_options = GetNvccOptions(argv)
+    opt_option = GetOptionValue(argv, '-O')
+    m_options = GetOptionValue(argv, '-m')
+    m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
+    include_options = GetOptionValue(argv, '-I')
+    out_file = GetOptionValue(argv, '-o')
+    depfiles = GetOptionValue(argv, '-MF')
+    defines = GetOptionValue(argv, '-D')
+    defines = ''.join([' -D' + define for define in defines])
+    undefines = GetOptionValue(argv, '-U')
+    undefines = ''.join([' -U' + define for define in undefines])
+    std_options = GetOptionValue(argv, '-std')
+    # Supported -std flags as of CUDA 9.0. Only keep last to mimic gcc/clang.
+    nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
+    std_options = ''.join([' -std=' + define
+                           for define in std_options if define in nvcc_allowed_std_options][-1:])
+    fatbin_options = ''.join([' --fatbin-options=' + option
+                              for option in GetOptionValue(argv, '-Xcuda-fatbinary')])
+
+    # The list of source files get passed after the -c option. I don't know of
+    # any other reliable way to just get the list of source files to be compiled.
+    src_files = GetOptionValue(argv, '-c')
+
+    # Pass -w through from host to nvcc, but don't do anything fancier with
+    # warnings-related flags, since they're not necessarily the same across
+    # compilers.
+    warning_options = ' -w' if '-w' in argv else ''
+
+    if len(src_files) == 0:
+        return 1
+    if len(out_file) != 1:
+        return 1
+
+    opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
+           else ' -g')
+
+    includes = (' -I ' + ' -I '.join(include_options)
+                if len(include_options) > 0
+                else '')
+
+    # Unfortunately, there are other options that have -c prefix too.
+    # So allowing only those look like C/C++ files.
+    src_files = [f for f in src_files if
+                 re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
+    srcs = ' '.join(src_files)
+    out = ' -o ' + out_file[0]
+
+    nvccopts = '-D_FORCE_INLINES '
+    for capability in GetOptionValue(argv, "--cuda-gpu-arch"):
+        capability = capability[len('sm_'):]
+        nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s\" ' % (capability,
+                                                                   capability)
+    for capability in GetOptionValue(argv, '--cuda-include-ptx'):
+        capability = capability[len('sm_'):]
+        nvccopts += r'-gencode=arch=compute_%s,\"code=compute_%s\" ' % (capability,
+                                                                        capability)
+    nvccopts += nvcc_compiler_options
+    nvccopts += undefines
+    nvccopts += defines
+    nvccopts += std_options
+    nvccopts += m_options
+    nvccopts += warning_options
+    nvccopts += fatbin_options
+
+    if depfiles:
+        # Generate the dependency file
+        depfile = depfiles[0]
+        cmd = (NVCC_PATH + ' ' + nvccopts +
+               ' --compiler-options "' + host_compiler_options + '"' +
+               ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH +
+               ' -I .' +
+               ' -x cu ' + opt + includes + ' ' + srcs + ' -M -o ' + depfile)
+        if log: Log(cmd)
+        exit_status = system(cmd)
+        if exit_status != 0:
+            return exit_status
+
+    cmd = (NVCC_PATH + ' ' + nvccopts +
+           ' --compiler-options "' + host_compiler_options + ' -fPIC"' +
+           ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH +
+           ' -I .' +
+           ' -x cu ' + opt + includes + ' -c ' + srcs + out)
+
+    # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
+    # Need to investigate and fix.
+    cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
+    if log: Log(cmd)
+    return system(cmd)
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('-x', nargs=1)
+    parser.add_argument('--cuda_log', action='store_true')
+    args, leftover = parser.parse_known_args(sys.argv[1:])
+
+    if args.x and args.x[0] == 'cuda':
+        if args.cuda_log: Log('-x cuda')
+        leftover = [pipes.quote(s) for s in leftover]
+        if args.cuda_log: Log('using nvcc')
+        return InvokeNvcc(leftover, log=args.cuda_log)
+
+    # Strip our flags before passing through to the CPU compiler for files which
+    # are not -x cuda. We can't just pass 'leftover' because it also strips -x.
+    # We not only want to pass -x to the CPU compiler, but also keep it in its
+    # relative location in the argv list (the compiler is actually sensitive to
+    # this).
+    cpu_compiler_flags = [flag for flag in sys.argv[1:]
+                          if not flag.startswith(('--cuda_log'))]
+
+    return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/recipe/gen-bazel-toolchain.sh b/recipe/gen-bazel-toolchain.sh
index cee9fc7d41adb8297ef14a7155fdc04731b994b9..acdfea1f35b49288b6f27a183a234cdd092d186a 100755
--- a/recipe/gen-bazel-toolchain.sh
+++ b/recipe/gen-bazel-toolchain.sh
@@ -9,7 +9,10 @@ function apply_cc_template() {
   sed -ie "s:TARGET_PLATFORM:${target_platform}:" $1
   sed -ie "s:\${CONDA_BUILD_SYSROOT}:${CONDA_BUILD_SYSROOT}:" $1
   sed -ie "s:\${COMPILER_VERSION}:${BAZEL_TOOLCHAIN_COMPILER_VERSION:-}:" $1
-  sed -ie "s:\${GCC}:${BAZEL_TOOLCHAIN_GCC}:" $1
+  sed -ie "s:\${GCC_COMPILER_PATH}:${GCC:-}:" $1
+  sed -ie "s:\${BAZEL_TOOLCHAIN_GCC}:${BAZEL_TOOLCHAIN_GCC}:" $1
+  sed -ie "s:\${CUDA_VERSION}:${cuda_compiler_version:-}:" $1
+  sed -ie "s:\${CUDA_HOME}:${CUDA_HOME:-}:" $1
   sed -ie "s:\${PREFIX}:${PREFIX}:" $1
   sed -ie "s:\${BUILD_PREFIX}:${BUILD_PREFIX}:" $1
   sed -ie "s:\${LD}:${LD}:" $1
@@ -48,8 +51,14 @@ pushd custom_toolchain
     export BAZEL_TOOLCHAIN_COMPILER_VERSION=$(${CC} -v 2>&1|tail -n1|cut -d' ' -f3)
     export BAZEL_TOOLCHAIN_AR=$(basename ${AR})
     touch cc_wrapper.sh
+
     export BAZEL_TOOLCHAIN_LIBCXX="stdc++"
     export BAZEL_TOOLCHAIN_GCC="${GCC}"
+
+    # for NVCC we need to use a crosstool wrapper
+    if [[ ${cuda_compiler_version} != "None" ]]; then
+      export BAZEL_TOOLCHAIN_GCC=crosstool_wrapper_driver_is_not_gcc
+    fi
   fi
 
   export TARGET_SYSTEM="${HOST}"
@@ -99,6 +108,7 @@ pushd custom_toolchain
 
   cp cc_toolchain_config.bzl cc_toolchain_build_config.bzl
   apply_cc_template cc_toolchain_config.bzl
+  apply_cc_template crosstool_wrapper_driver_is_not_gcc
   (
     if [[ "${build_platform}" != "${target_platform}" ]]; then
       if [[ "${target_platform}" == osx-* ]]; then
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 4407d7814337bc3a61f929214e2e2a2d77b0947b..d04ec426a69a895820cca2876e9d1b103a6e308d 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -21,7 +21,7 @@ source:
     folder: tensorflow-estimator
 
 build:
-  number: 2
+  number: 3
   skip: true  # [win]
 
 requirements:
@@ -31,6 +31,7 @@ requirements:
     - numpy 1.19.*                           # [build_platform != target_platform]
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
+    - {{ compiler('cuda') }}                 # [cuda_compiler_version != "None"]
     - bazel
     - bazel >=4.2.1  # [osx and arm64]
     - grpc-cpp
@@ -38,7 +39,13 @@ requirements:
     - nasm
     - sed
     - rsync  # [linux]
+    # realpath is not available from the docker image for cuda <= 10.2
+    # so we install coreutils here
+    - coreutils  # [cuda_compiler_version != "None"]
   host:
+    # GPU requirements for CUDA
+    - cudnn      # [cuda_compiler_version != "None"]
+    - nccl       # [cuda_compiler_version != "None"]
     # conda build requirements
     - python
     - pip
@@ -95,6 +102,8 @@ outputs:
     script: build_pkg.sh  # [not win]
     script: build_pkg.bat  # [win]
     build:
+      string: cuda{{ cuda_compiler_version | replace('.', '') }}py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
+      string: cpu_py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version == "None"]
       entry_points:
         - toco_from_protos = tensorflow.lite.toco.python.toco_from_protos:main
         - tflite_convert = tensorflow.lite.python.tflite_convert:main
@@ -109,7 +118,11 @@ outputs:
       build:
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
+        - {{ compiler('cuda') }}  # [cuda_compiler_version != "None"]
       host:
+        # GPU reuqirements
+        - cudnn                   # [cuda_compiler_version != "None"]
+        - nccl                    # [cuda_compiler_version != "None"]
         # conda build requirements
         - python
         - pip
@@ -187,11 +200,18 @@ outputs:
   - name: tensorflow-estimator
     script: build_estimator.sh   # [not win]
     script: build_estimator.bat  # [win]
+    build:
+      string: cuda{{ cuda_compiler_version | replace('.', '') }}py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
+      string: cpu_py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version == "None"]
     requirements:
       build:
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
-        - bazel 3
+        # Keep the cuda compiler here since it helps package solvers
+        # decide on the cuda variant
+        # https://github.com/conda-forge/tensorflow-feedstock/issues/162
+        - {{ compiler('cuda') }}                 # [cuda_compiler_version != "None"]
+        - bazel
         - bazel >=4.2.1  # [osx and arm64]
       host:
         - python
@@ -214,7 +234,24 @@ outputs:
 
 
   - name: tensorflow
+    build:
+      string: cuda{{ cuda_compiler_version | replace('.', '') }}py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
+      string: cpu_py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version == "None"]
+
+      # weigh down cpu implementation and give cuda preference
+      track_features:
+        - tensorflow-cpu          # [cuda_compiler_version == "None"]
+
     requirements:
+      build:
+        # Keep the other compilers here so as to help solve for the
+        # required version of libc
+        - {{ compiler('c') }}
+        - {{ compiler('cxx') }}
+        # Keep the cuda compiler here since it helps package solvers
+        # decide on the cuda variant
+        # https://github.com/conda-forge/tensorflow-feedstock/issues/162
+        - {{ compiler('cuda') }}  # [cuda_compiler_version != "None"]
       host:
         - python
         # This ensures that a consistent version of openssl is chosen between
@@ -238,11 +275,17 @@ outputs:
     script: cp_libtensorflow.sh
     build:
       skip: true  # [not linux]
+      string: cuda{{ cuda_compiler_version | replace('.', '') }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
+      string: cpu_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version == "None"]
     requirements:
       # build requirements needs to pick up the compiler run_exports
       build:
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
+        # Keep the cuda compiler here since it helps package solvers
+        # decide on the cuda variant
+        # https://github.com/conda-forge/tensorflow-feedstock/issues/162
+        - {{ compiler('cuda') }}  # [cuda_compiler_version != "None"]
       # host requirements to pick up run_exports
       host:
         - giflib
@@ -264,18 +307,24 @@ outputs:
       requires:
         - {{ compiler('c') }}
       commands:
-        - test -f $PREFIX/lib/libtensorflow.so  # [not win]
-        - ./test_libtensorflow.sh               # [not win]
+        - test -f $PREFIX/lib/libtensorflow.so  # [not win and cuda_compiler_version == "None"]
+        - ./test_libtensorflow.sh               # [not win and cuda_compiler_version == "None"]
 
   - name: libtensorflow_cc
     script: cp_libtensorflow_cc.sh
     build:
       skip: true  # [not linux]
+      string: cuda{{ cuda_compiler_version | replace('.', '') }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
+      string: cpu_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version == "None"]
     requirements:
       # build requirements needs to pick up the compiler run_exports
       build:
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
+        # Keep the cuda compiler here since it helps package solvers
+        # decide on the cuda variant
+        # https://github.com/conda-forge/tensorflow-feedstock/issues/162
+        - {{ compiler('cuda') }}  # [cuda_compiler_version != "None"]
       # host requirements to pick up run_exports
       host:
         - giflib
@@ -296,8 +345,25 @@ outputs:
       requires:
         - {{ compiler('cxx') }}
       commands:
-        - test -f $PREFIX/lib/libtensorflow_cc.so  # [not win]
-        - ./test_libtensorflow_cc.sh               # [not win]
+        - test -f $PREFIX/lib/libtensorflow_cc.so  # [not win and cuda_compiler_version == "None"]
+        - ./test_libtensorflow_cc.sh               # [not win and cuda_compiler_version == "None"]
+
+  # 2021/08/01, hmaarrfk
+  # While this seems like a roundabout way of defining the package name
+  # It helps the linter avoid errors on a package not having tests.
+  {% set tensorflow_cpu_gpu = "tensorflow-cpu" %}   # [cuda_compiler_version == "None"]
+  {% set tensorflow_cpu_gpu = "tensorflow-gpu" %}   # [cuda_compiler_version != "None"]
+  - name: {{ tensorflow_cpu_gpu }}
+    build:
+      string: cuda{{ cuda_compiler_version | replace('.', '') }}py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
+      string: cpu_py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}                                      # [cuda_compiler_version == "None"]
+      detect_binary_files_with_prefix: False
+    requirements:
+      run:
+        - {{ pin_subpackage("tensorflow", exact=True) }}
+    test:
+      imports:
+        - tensorflow
 
 about:
   home: http://tensorflow.org/
@@ -326,3 +392,4 @@ extra:
     - waitingkuo
     - xhochy
     - hmaarrfk
+    - wolfv