Created
September 7, 2020 05:37
-
-
Save EdmondFrank/25b2378a843443e5f01b15b13b6a1e70 to your computer and use it in GitHub Desktop.
Anaconda sync script for Nexus Repository
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import groovy.json.JsonSlurper | |
import groovy.json.JsonParserType | |
import org.sonatype.nexus.repository.storage.Asset | |
import org.sonatype.nexus.repository.storage.Query | |
import org.sonatype.nexus.repository.storage.StorageFacet | |
import java.util.concurrent.ExecutorService | |
import java.util.concurrent.Executors | |
CONDA_CLOUD_BASE_URL = "https://conda.anaconda.org" | |
NEXUS_REPOSITORY = "http://127.0.0.1:8081/repository/conda" | |
WORKING_DIR = "/data/tmp" | |
REPOSITORY_NAME = 'conda' | |
WORKERS = 3 | |
CONDA_CLOUD_REPOS = [ | |
'conda-forge/linux-64', 'conda-forge/osx-64', 'conda-forge/win-64', 'conda-forge/noarch', | |
'msys2/linux-64', 'msys2/win-64', 'msys2/noarch', 'rapidsai/linux-64', 'rapidsai/noarch', | |
'bioconda/linux-64', 'bioconda/osx-64', 'bioconda/win-64', 'bioconda/noarch', | |
'menpo/linux-64', 'menpo/osx-64', 'menpo/win-64', 'menpo/win-32', 'menpo/noarch', | |
'pytorch/linux-64', 'pytorch/osx-64', 'pytorch/win-64', 'pytorch/win-32', 'pytorch/noarch', | |
'pytorch-test/linux-64', 'pytorch-test/osx-64', 'pytorch-test/win-64', 'pytorch-test/win-32', 'pytorch-test/noarch', | |
'stackless/linux-64', 'stackless/win-64', 'stackless/win-32', 'stackless/linux-32', 'stackless/osx-64', 'stackless/noarch', | |
'fermi/linux-64', 'fermi/osx-64', 'fermi/win-64', 'fermi/noarch', | |
'fastai/linux-64', 'fastai/osx-64', 'fastai/win-64', 'fastai/noarch', | |
'omnia/linux-64', 'omnia/osx-64', 'omnia/win-64', 'omnia/noarch', | |
'simpleitk/linux-64', 'simpleitk/linux-32', 'simpleitk/osx-64', 'simpleitk/win-64', 'simpleitk/win-32', 'simpleitk/noarch', | |
'caffe2/linux-64', 'caffe2/osx-64', 'caffe2/win-64', 'caffe2/noarch', | |
'plotly/linux-64', 'plotly/linux-32', 'plotly/osx-64', 'plotly/win-64', 'plotly/win-32', 'plotly/noarch', | |
'intel/linux-64', 'intel/linux-32', 'intel/osx-64', 'intel/win-64', 'intel/win-32', 'intel/noarch', | |
'auto/linux-64', 'auto/linux-32', 'auto/osx-64', 'auto/win-64', 'auto/win-32', 'auto/noarch', | |
'ursky/linux-64', 'ursky/osx-64', 'ursky/noarch', | |
'matsci/linux-64', 'matsci/osx-64', 'matsci/win-64', 'matsci/noarch', | |
'psi4/linux-64', 'psi4/osx-64', 'psi4/win-64', 'psi4/noarch', | |
'Paddle/linux-64', 'Paddle/linux-32', 'Paddle/osx-64', 'Paddle/win-64', 'Paddle/win-32', 'Paddle/noarch', | |
'deepmodeling/linux-64', 'deepmodeling/noarch', | |
'numba/linux-64', 'numba/linux-32', 'numba/osx-64', 'numba/win-64', 'numba/win-32', 'numba/noarch', | |
'numba/label/dev/win-64', 'numba/label/dev/noarch', | |
'pyviz/linux-64', 'pyviz/linux-32', 'pyviz/win-64', 'pyviz/win-32', 'pyviz/osx-64', 'pyviz/noarch', | |
'dglteam/linux-64', 'dglteam/win-64', 'dglteam/osx-64', 'dglteam/noarch', | |
'rdkit/linux-64', 'rdkit/win-64', 'rdkit/osx-64', 'rdkit/noarch', | |
'mordred-descriptor/linux-64', 'mordred-descriptor/win-64', 'mordred-descriptor/win-32', 'mordred-descriptor/osx-64', 'mordred-descriptor/noarch', | |
'ohmeta/linux-64', 'ohmeta/osx-64', 'ohmeta/noarch', | |
'c4aarch64/linux-aarch64', 'c4aarch64/noarch'] | |
def fileDownload(String url, String filename, Proxy proxy=null) { | |
while( url ) { | |
def connection = proxy == null ? | |
new URL(url).openConnection() : | |
new URL(url).openConnection(proxy) | |
connection.with { conn -> | |
conn.instanceFollowRedirects = false | |
url = conn.getHeaderField( "Location" ) | |
if( !url ) { | |
new File( filename ).withOutputStream { out -> | |
conn.inputStream.with { inp -> | |
out << inp | |
inp.close() | |
} | |
} | |
} | |
} | |
} | |
} | |
def sizeFmt(double num, String suffix='iB') { | |
for (String unit in ['','K','M','G','T','P','E','Z']) { | |
if (Math.abs(num) < 1024) { | |
return String.format("%,.2f%s%s", num, unit, suffix) | |
} | |
num /= 1024.0d | |
} | |
String.format("%,.2f%s%s", num, 'Y', suffix) | |
} | |
def syncRepo(String repoUrl, String localDir, File tmpdir, Map existedAssets, Map args) { | |
if (existedAssets.size() == 0) { | |
throw new Exception("loading existed assets failed!") | |
} | |
println("Starting syncing ${repoUrl} => ${localDir}") | |
new File(localDir).mkdirs() | |
String repoDataUrl = repoUrl + '/repodata.json' | |
// String bz2repoDataUrl = repoUrl + '/repodata.json.bz2' | |
// String currentRepoDataUrl = repoUrl + '/current_repodata.json' | |
String tmpRepoData = "${tmpdir}/repodata.json" | |
// String tmpBz2RepoData = "${tmpdir}/repodata.json.bz2" | |
// String tmpCurrentRepoData = "${tmpdir}/current_repodata.json" | |
// Download metadata from upstream | |
// Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1081)); | |
// fileDownload(repoDataUrl, tmpRepoData, proxy) | |
fileDownload(repoDataUrl, tmpRepoData) | |
def jsonSlurper = new JsonSlurper(type: JsonParserType.INDEX_OVERLAY) | |
Map repodata = jsonSlurper.parseText(new File(tmpRepoData).text) | |
Map packages = repodata.packages | |
int totalSize = 0 | |
if('packages.conda' in repodata) { | |
packages += repodata['packages.conda'] | |
} | |
ExecutorService pool=Executors.newFixedThreadPool(WORKERS) | |
packages.each {filename, meta -> | |
// String md5 = meta.md5 | |
if(meta.md5 in existedAssets) { | |
// log.info("existed ${filename}") | |
} else { | |
pool.execute { | |
log.info("new download: ${filename} with md5:${meta.md5}") | |
// Get original pkg url | |
String pkgUrl = "${repoUrl}/${filename}" | |
String localFile = "${localDir}/${filename}" | |
try { | |
fileDownload( | |
pkgUrl.replace(CONDA_CLOUD_BASE_URL, NEXUS_REPOSITORY), | |
localFile | |
) | |
totalSize += meta.size | |
} catch(Exception ex) { | |
log.info("failed to download: ${filename} ${ex}") | |
} finally { | |
if(args.delete) { | |
new File(localFile).delete() | |
} | |
} | |
} | |
} | |
} | |
pool.shutdown() //all tasks submitted | |
while (!pool.isTerminated()){} //waitfor termination | |
return totalSize | |
} | |
// File tmpdir = File.createTempDir() | |
// log.info("${tmpdir}") | |
double statistics = 0 | |
def args = ["delete": true] | |
def repo = repository.repositoryManager.get(REPOSITORY_NAME) | |
assert repo: "Repository ${REPOSITORY_NAME} does not exist" | |
StorageFacet storageFacet = repo.facet(StorageFacet) | |
def tx = storageFacet.txSupplier().get() | |
def existedAssets = [:] | |
try { | |
tx.begin() | |
def assets = tx.browseAssets(tx.findBucket(repo)) | |
if(assets != null && !assets.isEmpty()) { | |
assets.each { Asset asset -> | |
def hash = asset.attributes().child('checksum').get('md5') | |
if (hash != null && !(hash in existedAssets)) { | |
existedAssets.put(hash, 1) | |
} | |
} | |
} | |
} catch(Exception e) { | |
log.info("Error: "+e) | |
} finally { | |
tx.close() | |
} | |
CONDA_CLOUD_REPOS.each { | |
String remoteUrl = "${CONDA_CLOUD_BASE_URL}/${it}" | |
String localDir = "${WORKING_DIR}/cloud/${it}" | |
File tmpdir = File.createTempDir() | |
try { | |
statistics += syncRepo(remoteUrl, localDir, tmpdir, existedAssets, args) | |
} catch(Exception ex) { | |
log.info("Failed to sync repo: ${it} ${ex}") | |
} finally { | |
tmpdir.deleteDir() | |
} | |
} | |
log.info(sizeFmt(statistics)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment