-
-
Save datavudeja/00233a61cdd5003a258e4c4ec9fd2af9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import sqlite3 | |
repodata = json.load(open("497deca9.json")) | |
COLS = 'filename, build, build_number, depends, license, license_family, md5, name, sha256, size, subdir, timestamp, version'.split(', ') | |
db = sqlite3.connect("497deca9.sqlite") | |
db.execute("create table repodata ({}, primary key (filename))".format(','.join(COLS))) | |
rows = [ | |
{**{c: None for c in COLS}, **pkg, "filename": key, "depends": json.dumps(pkg["depends"])} for key, pkg in repodata["packages"].items() | |
] | |
# all_deps = {} | |
# for row in rows: | |
# all_deps.setdefault(row["name"], set()).update(row["depends"]) | |
# for row in rows: | |
# if row["name"] in all_deps: | |
# db.executemany("insert into repodata ({}) values ({})".format(','.join(COLS), ','.join(':'+c for c in COLS)), [{ | |
# **row, "depends": json.dumps(list(all_deps[row["name"]]))}]) | |
# del all_deps[row["name"]] | |
db.executemany("insert into repodata ({}) values ({})".format(','.join(COLS), ','.join(':'+c for c in COLS)), rows) | |
db.execute("create index idx_name on repodata (name)") | |
db.commit() | |
# import sqlite_zstd | |
# db.enable_load_extension(True) | |
# sqlite_zstd.load(db) | |
# for col in 'build,depends,license,license_family,md5,sha256,subdir,version'.split(','): | |
# print('compressing',col) | |
# db.execute(f"""SELECT zstd_enable_transparent('{{"table": "repodata", "column": "{col}", "compression_level": 19, "dict_chooser": "''a''"}}')""") | |
# db.execute("select zstd_incremental_maintenance(null, 1)") | |
# db.execute("vacuum") | |
# db.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import json | |
import sqlite3 | |
db = sqlite3.connect("497deca9.sqlite") | |
# db.enable_load_extension(True) | |
# import sqlite_zstd | |
# sqlite_zstd.load(db) | |
#want = {'pandas'} | |
want = set("_libgcc_mutex _openmp_mutex alabaster annotated-types anyio argon2-cffi argon2-cffi-bindings arrow asciitree asttokens async-lru attrs aws-c-auth aws-c-cal aws-c-common aws-c-compression aws-c-event-stream aws-c-http aws-c-io aws-c-mqtt aws-c-s3 aws-c-sdkutils aws-checksums aws-crt-cpp aws-sdk-cpp babel backcall backports backports.functools_lru_cache barmer_utils beautifulsoup4 blas bleach brotli brotli-bin brotli-python bzip2 c-ares ca-certificates cached-property cached_property certifi cffi cfgv charset-normalizer click colorama comm conda-env-lock contourpy coverage cycler debugpy decorator defusedxml dil_health_drg_fake_data distlib docutils drg-plausibility drg-simulation drg_external_data_1010 entrypoints exceptiongroup executing fasteners filelock fonttools fqdn freetype gflags glog greenlet identify idna imagesize importlib-metadata importlib_metadata importlib_resources iniconfig ipykernel ipython isoduration jedi jinja2 joblib json5 jsonpointer jsonschema jsonschema-specifications jsonschema-with-format-nongpl jupyter-lsp jupyter_client jupyter_core jupyter_events jupyter_server jupyter_server_terminals jupyterlab jupyterlab_pygments jupyterlab_server keyutils kiwisolver krb5 lcms2 ld_impl_linux-64 lerc libabseil libarrow libblas libbrotlicommon libbrotlidec libbrotlienc libcblas libcrc32c libcurl libdeflate libedit libev libevent libexpat libffi libgcc-ng libgfortran-ng libgfortran5 libgomp libgoogle-cloud libgrpc libiconv libjpeg-turbo liblapack libnghttp2 libnsl libnuma libopenblas libpng libprotobuf libsodium libsqlite libssh2 libstdcxx-ng libthrift libtiff libutf8proc libuuid libwebp-base libxcb libzlib lightgbm lz4-c make markupsafe matplotlib-base matplotlib-inline matplotlib-venn mistune msgpack-python multimethod munkres mypy_extensions nbclient nbconvert-core nbformat ncurses nest-asyncio nodeenv notebook notebook-shim numcodecs numpy numpydoc openblas openjpeg openssl orc overrides packaging pandas pandera-base pandocfilters parso patsy pbr pexpect pickleshare pillow pip pkgutil-resolve-name platformdirs pluggy polars pooch pre-commit prometheus_client prompt-toolkit prompt_toolkit psutil pthread-stubs ptyprocess pure_eval pyarrow pycparser pydantic pydantic-core pygments pyodbc pyparsing pysocks pytest pytest-cov python python-dateutil python-fastjsonschema python-json-logger python-tzdata python_abi pytz pyyaml pyzmq qc_drg_grouper_geos_binaries quantcore.ducttape re2 readline referencing requests rfc3339-validator rfc3986-validator rheia rpds-py ruamel.yaml ruamel.yaml.clib ruamel.yaml.jinja2 s2n scikit-learn scipy seaborn seaborn-base send2trash setuptools setuptools-scm six snappy sniffio snowballstemmer soupsieve sphinx sphinx_rtd_theme sphinxcontrib-apidoc sphinxcontrib-applehelp sphinxcontrib-devhelp sphinxcontrib-htmlhelp sphinxcontrib-jquery sphinxcontrib-jsmath sphinxcontrib-qthelp sphinxcontrib-serializinghtml sqlalchemy stack_data statsmodels terminado themis-assets themis-assets-axa themis-assets-external-data themis-assets-meta themis-assets-model themis-assets-plausibility themis-assets-simulation threadpoolctl tinycss2 tk toml tomli tornado tqdm traitlets turbodbc typeguard typing-extensions typing_extensions typing_inspect typing_utils tzdata ukkonen unixodbc uri-template urllib3 virtualenv wcwidth webcolors webencodings websocket-client wheel wrapt xorg-libxau xorg-libxdmcp xz yaml zarr zeromq zipp zstandard zstd".split()) | |
versions = {} | |
start = time.perf_counter() | |
while want: | |
pkg = want.pop() | |
versions[pkg] = [] | |
for version, build, depends in db.execute("select version, build, depends from repodata where name = ?", [pkg]).fetchall(): | |
versions[pkg].append((version, build)) | |
for d in (d.split()[0] for d in json.loads(depends)): | |
if d not in versions: | |
want.add(d) | |
duration = time.perf_counter() - start | |
print("Took", duration, "s to reduce number of builds from", db.execute("select count(*) from repodata").fetchone()[0], "to", sum(map(len, versions.values()))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment