Skip to content

Instantly share code, notes, and snippets.

@EdmondFrank
Created September 7, 2020 05:37
Show Gist options
  • Save EdmondFrank/25b2378a843443e5f01b15b13b6a1e70 to your computer and use it in GitHub Desktop.
Save EdmondFrank/25b2378a843443e5f01b15b13b6a1e70 to your computer and use it in GitHub Desktop.
Anaconda sync script for Nexus Repository
import groovy.json.JsonSlurper
import groovy.json.JsonParserType
import org.sonatype.nexus.repository.storage.Asset
import org.sonatype.nexus.repository.storage.Query
import org.sonatype.nexus.repository.storage.StorageFacet
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors
CONDA_CLOUD_BASE_URL = "https://conda.anaconda.org"
NEXUS_REPOSITORY = "http://127.0.0.1:8081/repository/conda"
WORKING_DIR = "/data/tmp"
REPOSITORY_NAME = 'conda'
WORKERS = 3
CONDA_CLOUD_REPOS = [
'conda-forge/linux-64', 'conda-forge/osx-64', 'conda-forge/win-64', 'conda-forge/noarch',
'msys2/linux-64', 'msys2/win-64', 'msys2/noarch', 'rapidsai/linux-64', 'rapidsai/noarch',
'bioconda/linux-64', 'bioconda/osx-64', 'bioconda/win-64', 'bioconda/noarch',
'menpo/linux-64', 'menpo/osx-64', 'menpo/win-64', 'menpo/win-32', 'menpo/noarch',
'pytorch/linux-64', 'pytorch/osx-64', 'pytorch/win-64', 'pytorch/win-32', 'pytorch/noarch',
'pytorch-test/linux-64', 'pytorch-test/osx-64', 'pytorch-test/win-64', 'pytorch-test/win-32', 'pytorch-test/noarch',
'stackless/linux-64', 'stackless/win-64', 'stackless/win-32', 'stackless/linux-32', 'stackless/osx-64', 'stackless/noarch',
'fermi/linux-64', 'fermi/osx-64', 'fermi/win-64', 'fermi/noarch',
'fastai/linux-64', 'fastai/osx-64', 'fastai/win-64', 'fastai/noarch',
'omnia/linux-64', 'omnia/osx-64', 'omnia/win-64', 'omnia/noarch',
'simpleitk/linux-64', 'simpleitk/linux-32', 'simpleitk/osx-64', 'simpleitk/win-64', 'simpleitk/win-32', 'simpleitk/noarch',
'caffe2/linux-64', 'caffe2/osx-64', 'caffe2/win-64', 'caffe2/noarch',
'plotly/linux-64', 'plotly/linux-32', 'plotly/osx-64', 'plotly/win-64', 'plotly/win-32', 'plotly/noarch',
'intel/linux-64', 'intel/linux-32', 'intel/osx-64', 'intel/win-64', 'intel/win-32', 'intel/noarch',
'auto/linux-64', 'auto/linux-32', 'auto/osx-64', 'auto/win-64', 'auto/win-32', 'auto/noarch',
'ursky/linux-64', 'ursky/osx-64', 'ursky/noarch',
'matsci/linux-64', 'matsci/osx-64', 'matsci/win-64', 'matsci/noarch',
'psi4/linux-64', 'psi4/osx-64', 'psi4/win-64', 'psi4/noarch',
'Paddle/linux-64', 'Paddle/linux-32', 'Paddle/osx-64', 'Paddle/win-64', 'Paddle/win-32', 'Paddle/noarch',
'deepmodeling/linux-64', 'deepmodeling/noarch',
'numba/linux-64', 'numba/linux-32', 'numba/osx-64', 'numba/win-64', 'numba/win-32', 'numba/noarch',
'numba/label/dev/win-64', 'numba/label/dev/noarch',
'pyviz/linux-64', 'pyviz/linux-32', 'pyviz/win-64', 'pyviz/win-32', 'pyviz/osx-64', 'pyviz/noarch',
'dglteam/linux-64', 'dglteam/win-64', 'dglteam/osx-64', 'dglteam/noarch',
'rdkit/linux-64', 'rdkit/win-64', 'rdkit/osx-64', 'rdkit/noarch',
'mordred-descriptor/linux-64', 'mordred-descriptor/win-64', 'mordred-descriptor/win-32', 'mordred-descriptor/osx-64', 'mordred-descriptor/noarch',
'ohmeta/linux-64', 'ohmeta/osx-64', 'ohmeta/noarch',
'c4aarch64/linux-aarch64', 'c4aarch64/noarch']
def fileDownload(String url, String filename, Proxy proxy=null) {
while( url ) {
def connection = proxy == null ?
new URL(url).openConnection() :
new URL(url).openConnection(proxy)
connection.with { conn ->
conn.instanceFollowRedirects = false
url = conn.getHeaderField( "Location" )
if( !url ) {
new File( filename ).withOutputStream { out ->
conn.inputStream.with { inp ->
out << inp
inp.close()
}
}
}
}
}
}
def sizeFmt(double num, String suffix='iB') {
for (String unit in ['','K','M','G','T','P','E','Z']) {
if (Math.abs(num) < 1024) {
return String.format("%,.2f%s%s", num, unit, suffix)
}
num /= 1024.0d
}
String.format("%,.2f%s%s", num, 'Y', suffix)
}
def syncRepo(String repoUrl, String localDir, File tmpdir, Map existedAssets, Map args) {
if (existedAssets.size() == 0) {
throw new Exception("loading existed assets failed!")
}
println("Starting syncing ${repoUrl} => ${localDir}")
new File(localDir).mkdirs()
String repoDataUrl = repoUrl + '/repodata.json'
// String bz2repoDataUrl = repoUrl + '/repodata.json.bz2'
// String currentRepoDataUrl = repoUrl + '/current_repodata.json'
String tmpRepoData = "${tmpdir}/repodata.json"
// String tmpBz2RepoData = "${tmpdir}/repodata.json.bz2"
// String tmpCurrentRepoData = "${tmpdir}/current_repodata.json"
// Download metadata from upstream
// Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1081));
// fileDownload(repoDataUrl, tmpRepoData, proxy)
fileDownload(repoDataUrl, tmpRepoData)
def jsonSlurper = new JsonSlurper(type: JsonParserType.INDEX_OVERLAY)
Map repodata = jsonSlurper.parseText(new File(tmpRepoData).text)
Map packages = repodata.packages
int totalSize = 0
if('packages.conda' in repodata) {
packages += repodata['packages.conda']
}
ExecutorService pool=Executors.newFixedThreadPool(WORKERS)
packages.each {filename, meta ->
// String md5 = meta.md5
if(meta.md5 in existedAssets) {
// log.info("existed ${filename}")
} else {
pool.execute {
log.info("new download: ${filename} with md5:${meta.md5}")
// Get original pkg url
String pkgUrl = "${repoUrl}/${filename}"
String localFile = "${localDir}/${filename}"
try {
fileDownload(
pkgUrl.replace(CONDA_CLOUD_BASE_URL, NEXUS_REPOSITORY),
localFile
)
totalSize += meta.size
} catch(Exception ex) {
log.info("failed to download: ${filename} ${ex}")
} finally {
if(args.delete) {
new File(localFile).delete()
}
}
}
}
}
pool.shutdown() //all tasks submitted
while (!pool.isTerminated()){} //waitfor termination
return totalSize
}
// File tmpdir = File.createTempDir()
// log.info("${tmpdir}")
double statistics = 0
def args = ["delete": true]
def repo = repository.repositoryManager.get(REPOSITORY_NAME)
assert repo: "Repository ${REPOSITORY_NAME} does not exist"
StorageFacet storageFacet = repo.facet(StorageFacet)
def tx = storageFacet.txSupplier().get()
def existedAssets = [:]
try {
tx.begin()
def assets = tx.browseAssets(tx.findBucket(repo))
if(assets != null && !assets.isEmpty()) {
assets.each { Asset asset ->
def hash = asset.attributes().child('checksum').get('md5')
if (hash != null && !(hash in existedAssets)) {
existedAssets.put(hash, 1)
}
}
}
} catch(Exception e) {
log.info("Error: "+e)
} finally {
tx.close()
}
CONDA_CLOUD_REPOS.each {
String remoteUrl = "${CONDA_CLOUD_BASE_URL}/${it}"
String localDir = "${WORKING_DIR}/cloud/${it}"
File tmpdir = File.createTempDir()
try {
statistics += syncRepo(remoteUrl, localDir, tmpdir, existedAssets, args)
} catch(Exception ex) {
log.info("Failed to sync repo: ${it} ${ex}")
} finally {
tmpdir.deleteDir()
}
}
log.info(sizeFmt(statistics))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment