Created
November 7, 2023 21:51
-
-
Save tsibley/e92c67d35491ca9a2c94df27bf934e7f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 5de6c904a6be1c775a576f6497e8a7a2c1f2f06b Mon Sep 17 00:00:00 2001 | |
From: James Hadfield <[email protected]> | |
Date: Mon, 25 Sep 2023 13:32:49 +1300 | |
Subject: [PATCH] Collect resources from core + staging buckets | |
This sets out the pattern for reading S3 inventories and turning them | |
into resource collections. The JSON output will ultimately be used by | |
nextstrain.org to both provide a listing of available resources and to | |
be queried by versioned dataset requests (in order to go from a | |
requested date to the corresponding S3 version IDs of the relevant | |
objects). | |
Eventually this flat JSON file may be replaced with a database, | |
but for now this is a simple way to introduce the functionality. The | |
collected resources JSON for core + staging is a ~3.2Mb JSON file | |
(gzipped). When naively loaded into node it increases the total size of | |
the allocated heap (V8) by ~60Mb (presumably this would be reduced by | |
mapping certain string constants to variables). | |
Currently only working for S3 buckets nextstrain-data and | |
nextstrain-staging. Narratives are not yet considered, in part because | |
they are not stored on S3. | |
`node resourceIndexer/main.js --help` for how to run. AWS credentials | |
with permission to read s3://nextstrain-inventories will need to be set | |
in the usual way. | |
--- | |
.gitignore | 3 + | |
package-lock.json | 494 ++++++++++++++++++++++++++++++- | |
package.json | 4 +- | |
resourceIndexer/constants.js | 35 +++ | |
resourceIndexer/coreStagingS3.js | 294 ++++++++++++++++++ | |
resourceIndexer/errors.js | 1 + | |
resourceIndexer/inventory.js | 255 ++++++++++++++++ | |
resourceIndexer/logger.js | 10 + | |
resourceIndexer/main.js | 113 +++++++ | |
9 files changed, 1196 insertions(+), 13 deletions(-) | |
create mode 100644 resourceIndexer/constants.js | |
create mode 100644 resourceIndexer/coreStagingS3.js | |
create mode 100644 resourceIndexer/errors.js | |
create mode 100644 resourceIndexer/inventory.js | |
create mode 100644 resourceIndexer/logger.js | |
create mode 100644 resourceIndexer/main.js | |
diff --git a/.gitignore b/.gitignore | |
index 8d0b1a69..564a660f 100644 | |
--- a/.gitignore | |
+++ b/.gitignore | |
@@ -17,6 +17,9 @@ | |
# local key-value storage (e.g. when Redis isn't configured) | |
/data/kv.db | |
+# data caches etc for development purposes | |
+/devData/ | |
+ | |
# Generated by scripts/collect-datasets.js | |
/data/datasets_influenza.json | |
/data/datasets_staging.json | |
diff --git a/package-lock.json b/package-lock.json | |
index 0d7be3de..e2d64630 100644 | |
--- a/package-lock.json | |
+++ b/package-lock.json | |
@@ -34,9 +34,11 @@ | |
"jszip": "^3.10.1", | |
"keyv": "^4.5.4", | |
"lodash.partition": "^4.6.0", | |
+ "luxon": "^3.4.3", | |
"make-fetch-happen": "^10.0.0", | |
"marked": "^0.7.0", | |
"mime": "^2.5.2", | |
+ "neat-csv": "^7.0.0", | |
"negotiator": "^0.6.2", | |
"node-fetch": "^2.6.0", | |
"passport": "^0.4.0", | |
@@ -45,6 +47,7 @@ | |
"proxy-agent": "^6.3.1", | |
"raw-body": "^2.4.2", | |
"session-file-store": "^1.3.1", | |
+ "winston": "^3.11.0", | |
"yaml-front-matter": "^4.0.0" | |
}, | |
"devDependencies": { | |
@@ -60,7 +63,6 @@ | |
"http-proxy-middleware": "^1.3.1", | |
"jest": "^27.5.1", | |
"jest-extended": "^1.1.0", | |
- "luxon": "^3.0.4", | |
"nodemon": "^2.0.22", | |
"request": "^2.88.2", | |
"start-server-and-test": "^1.11.4" | |
@@ -4563,6 +4565,24 @@ | |
"integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", | |
"dev": true | |
}, | |
+ "node_modules/@colors/colors": { | |
+ "version": "1.6.0", | |
+ "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz", | |
+ "integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==", | |
+ "engines": { | |
+ "node": ">=0.1.90" | |
+ } | |
+ }, | |
+ "node_modules/@dabh/diagnostics": { | |
+ "version": "2.0.3", | |
+ "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz", | |
+ "integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==", | |
+ "dependencies": { | |
+ "colorspace": "1.1.x", | |
+ "enabled": "2.0.x", | |
+ "kuler": "^2.0.0" | |
+ } | |
+ }, | |
"node_modules/@eslint/eslintrc": { | |
"version": "1.3.2", | |
"resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-1.3.2.tgz", | |
@@ -8330,6 +8350,11 @@ | |
"integrity": "sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==", | |
"dev": true | |
}, | |
+ "node_modules/@types/triple-beam": { | |
+ "version": "1.3.4", | |
+ "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.4.tgz", | |
+ "integrity": "sha512-HlJjF3wxV4R2VQkFpKe0YqJLilYNgtRtsqqZtby7RkVsSs+i+vbyzjtUwpFEdUCKcrGzCiEJE7F/0mKjh0sunA==" | |
+ }, | |
"node_modules/@types/ws": { | |
"version": "8.5.3", | |
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.3.tgz", | |
@@ -8836,6 +8861,11 @@ | |
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", | |
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" | |
}, | |
+ "node_modules/async": { | |
+ "version": "3.2.5", | |
+ "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz", | |
+ "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg==" | |
+ }, | |
"node_modules/async-limiter": { | |
"version": "1.0.1", | |
"resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz", | |
@@ -10250,6 +10280,15 @@ | |
"integrity": "sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==", | |
"dev": true | |
}, | |
+ "node_modules/color": { | |
+ "version": "3.2.1", | |
+ "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz", | |
+ "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==", | |
+ "dependencies": { | |
+ "color-convert": "^1.9.3", | |
+ "color-string": "^1.6.0" | |
+ } | |
+ }, | |
"node_modules/color-convert": { | |
"version": "1.9.3", | |
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", | |
@@ -10263,6 +10302,15 @@ | |
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", | |
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=" | |
}, | |
+ "node_modules/color-string": { | |
+ "version": "1.9.1", | |
+ "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", | |
+ "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", | |
+ "dependencies": { | |
+ "color-name": "^1.0.0", | |
+ "simple-swizzle": "^0.2.2" | |
+ } | |
+ }, | |
"node_modules/color-support": { | |
"version": "1.1.3", | |
"resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", | |
@@ -10271,6 +10319,15 @@ | |
"color-support": "bin.js" | |
} | |
}, | |
+ "node_modules/colorspace": { | |
+ "version": "1.1.4", | |
+ "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz", | |
+ "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==", | |
+ "dependencies": { | |
+ "color": "^3.1.3", | |
+ "text-hex": "1.0.x" | |
+ } | |
+ }, | |
"node_modules/combined-stream": { | |
"version": "1.0.8", | |
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", | |
@@ -10476,6 +10533,28 @@ | |
"integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==", | |
"dev": true | |
}, | |
+ "node_modules/csv-parser": { | |
+ "version": "3.0.0", | |
+ "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.0.0.tgz", | |
+ "integrity": "sha512-s6OYSXAK3IdKqYO33y09jhypG/bSDHPuyCme/IdEHfWpLf/jKcpitVFyOC6UemgGk8v7Q5u2XE0vvwmanxhGlQ==", | |
+ "dependencies": { | |
+ "minimist": "^1.2.0" | |
+ }, | |
+ "bin": { | |
+ "csv-parser": "bin/csv-parser" | |
+ }, | |
+ "engines": { | |
+ "node": ">= 10" | |
+ } | |
+ }, | |
+ "node_modules/csv-parser/node_modules/minimist": { | |
+ "version": "1.2.8", | |
+ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", | |
+ "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", | |
+ "funding": { | |
+ "url": "https://github.com/sponsors/ljharb" | |
+ } | |
+ }, | |
"node_modules/dashdash": { | |
"version": "1.14.1", | |
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", | |
@@ -10802,6 +10881,11 @@ | |
"url": "https://github.com/sindresorhus/emittery?sponsor=1" | |
} | |
}, | |
+ "node_modules/enabled": { | |
+ "version": "2.0.0", | |
+ "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz", | |
+ "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==" | |
+ }, | |
"node_modules/encodeurl": { | |
"version": "1.0.2", | |
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", | |
@@ -11858,6 +11942,11 @@ | |
"bser": "2.1.1" | |
} | |
}, | |
+ "node_modules/fecha": { | |
+ "version": "4.2.3", | |
+ "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz", | |
+ "integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw==" | |
+ }, | |
"node_modules/file-entry-cache": { | |
"version": "6.0.1", | |
"resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", | |
@@ -12006,6 +12095,11 @@ | |
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==", | |
"dev": true | |
}, | |
+ "node_modules/fn.name": { | |
+ "version": "1.1.0", | |
+ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", | |
+ "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" | |
+ }, | |
"node_modules/follow-redirects": { | |
"version": "1.5.10", | |
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.10.tgz", | |
@@ -12976,7 +13070,6 @@ | |
"version": "2.0.1", | |
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", | |
"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", | |
- "dev": true, | |
"engines": { | |
"node": ">=8" | |
}, | |
@@ -15746,6 +15839,11 @@ | |
"node": ">=6" | |
} | |
}, | |
+ "node_modules/kuler": { | |
+ "version": "2.0.0", | |
+ "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", | |
+ "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==" | |
+ }, | |
"node_modules/lazy-ass": { | |
"version": "1.6.0", | |
"resolved": "https://registry.npmjs.org/lazy-ass/-/lazy-ass-1.6.0.tgz", | |
@@ -15866,6 +15964,27 @@ | |
"resolved": "https://registry.npmjs.org/lodash.partition/-/lodash.partition-4.6.0.tgz", | |
"integrity": "sha1-o45GtzRp4EILDaEhLmbUFL42S6Q=" | |
}, | |
+ "node_modules/logform": { | |
+ "version": "2.6.0", | |
+ "resolved": "https://registry.npmjs.org/logform/-/logform-2.6.0.tgz", | |
+ "integrity": "sha512-1ulHeNPp6k/LD8H91o7VYFBng5i1BDE7HoKxVbZiGFidS1Rj65qcywLxX+pVfAPoQJEjRdvKcusKwOupHCVOVQ==", | |
+ "dependencies": { | |
+ "@colors/colors": "1.6.0", | |
+ "@types/triple-beam": "^1.3.2", | |
+ "fecha": "^4.2.0", | |
+ "ms": "^2.1.1", | |
+ "safe-stable-stringify": "^2.3.1", | |
+ "triple-beam": "^1.3.0" | |
+ }, | |
+ "engines": { | |
+ "node": ">= 12.0.0" | |
+ } | |
+ }, | |
+ "node_modules/logform/node_modules/ms": { | |
+ "version": "2.1.3", | |
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", | |
+ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" | |
+ }, | |
"node_modules/lowercase-keys": { | |
"version": "1.0.1", | |
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-1.0.1.tgz", | |
@@ -15883,10 +16002,9 @@ | |
} | |
}, | |
"node_modules/luxon": { | |
- "version": "3.0.4", | |
- "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.0.4.tgz", | |
- "integrity": "sha512-aV48rGUwP/Vydn8HT+5cdr26YYQiUZ42NM6ToMoaGKwYfWbfLeRkEu1wXWMHBZT6+KyLfcbbtVcoQFCbbPjKlw==", | |
- "dev": true, | |
+ "version": "3.4.3", | |
+ "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.4.3.tgz", | |
+ "integrity": "sha512-tFWBiv3h7z+T/tDaoxA8rqTxy1CHV6gHS//QdaH4pulbq/JuBSGgQspQQqcgnwdAx6pNI7cmvz5Sv/addzHmUg==", | |
"engines": { | |
"node": ">=12" | |
} | |
@@ -16552,6 +16670,32 @@ | |
"integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=", | |
"dev": true | |
}, | |
+ "node_modules/neat-csv": { | |
+ "version": "7.0.0", | |
+ "resolved": "https://registry.npmjs.org/neat-csv/-/neat-csv-7.0.0.tgz", | |
+ "integrity": "sha512-ZmiKZNkdqb6hrBU3lDHm52vWXs6CuFPfw6ZoJZNnY7IIpfA1fxM0UPPi+iQpqQo82qcLbsZPwLkQ1cdrMDtwwA==", | |
+ "dependencies": { | |
+ "csv-parser": "^3.0.0", | |
+ "get-stream": "^6.0.1" | |
+ }, | |
+ "engines": { | |
+ "node": "^12.20.0 || ^14.13.1 || >=16.0.0" | |
+ }, | |
+ "funding": { | |
+ "url": "https://github.com/sponsors/sindresorhus" | |
+ } | |
+ }, | |
+ "node_modules/neat-csv/node_modules/get-stream": { | |
+ "version": "6.0.1", | |
+ "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", | |
+ "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", | |
+ "engines": { | |
+ "node": ">=10" | |
+ }, | |
+ "funding": { | |
+ "url": "https://github.com/sponsors/sindresorhus" | |
+ } | |
+ }, | |
"node_modules/negotiator": { | |
"version": "0.6.2", | |
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", | |
@@ -17083,6 +17227,14 @@ | |
"wrappy": "1" | |
} | |
}, | |
+ "node_modules/one-time": { | |
+ "version": "1.0.0", | |
+ "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz", | |
+ "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==", | |
+ "dependencies": { | |
+ "fn.name": "1.x.x" | |
+ } | |
+ }, | |
"node_modules/onetime": { | |
"version": "5.1.2", | |
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", | |
@@ -18199,6 +18351,14 @@ | |
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", | |
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" | |
}, | |
+ "node_modules/safe-stable-stringify": { | |
+ "version": "2.4.3", | |
+ "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.4.3.tgz", | |
+ "integrity": "sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g==", | |
+ "engines": { | |
+ "node": ">=10" | |
+ } | |
+ }, | |
"node_modules/safer-buffer": { | |
"version": "2.1.2", | |
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", | |
@@ -18396,6 +18556,19 @@ | |
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", | |
"integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" | |
}, | |
+ "node_modules/simple-swizzle": { | |
+ "version": "0.2.2", | |
+ "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", | |
+ "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==", | |
+ "dependencies": { | |
+ "is-arrayish": "^0.3.1" | |
+ } | |
+ }, | |
+ "node_modules/simple-swizzle/node_modules/is-arrayish": { | |
+ "version": "0.3.2", | |
+ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", | |
+ "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==" | |
+ }, | |
"node_modules/simple-update-notifier": { | |
"version": "1.1.0", | |
"resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-1.1.0.tgz", | |
@@ -18639,6 +18812,14 @@ | |
"node": ">= 8" | |
} | |
}, | |
+ "node_modules/stack-trace": { | |
+ "version": "0.0.10", | |
+ "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz", | |
+ "integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==", | |
+ "engines": { | |
+ "node": "*" | |
+ } | |
+ }, | |
"node_modules/stack-utils": { | |
"version": "2.0.2", | |
"resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.2.tgz", | |
@@ -19115,6 +19296,11 @@ | |
"url": "https://github.com/sponsors/isaacs" | |
} | |
}, | |
+ "node_modules/text-hex": { | |
+ "version": "1.0.0", | |
+ "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz", | |
+ "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==" | |
+ }, | |
"node_modules/text-table": { | |
"version": "0.2.0", | |
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", | |
@@ -19234,6 +19420,14 @@ | |
"node": "*" | |
} | |
}, | |
+ "node_modules/triple-beam": { | |
+ "version": "1.4.1", | |
+ "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz", | |
+ "integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==", | |
+ "engines": { | |
+ "node": ">= 14.0.0" | |
+ } | |
+ }, | |
"node_modules/tslib": { | |
"version": "1.10.0", | |
"resolved": "https://registry.npmjs.org/tslib/-/tslib-1.10.0.tgz", | |
@@ -19731,6 +19925,66 @@ | |
"node": ">= 0.10.0" | |
} | |
}, | |
+ "node_modules/winston": { | |
+ "version": "3.11.0", | |
+ "resolved": "https://registry.npmjs.org/winston/-/winston-3.11.0.tgz", | |
+ "integrity": "sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==", | |
+ "dependencies": { | |
+ "@colors/colors": "^1.6.0", | |
+ "@dabh/diagnostics": "^2.0.2", | |
+ "async": "^3.2.3", | |
+ "is-stream": "^2.0.0", | |
+ "logform": "^2.4.0", | |
+ "one-time": "^1.0.0", | |
+ "readable-stream": "^3.4.0", | |
+ "safe-stable-stringify": "^2.3.1", | |
+ "stack-trace": "0.0.x", | |
+ "triple-beam": "^1.3.0", | |
+ "winston-transport": "^4.5.0" | |
+ }, | |
+ "engines": { | |
+ "node": ">= 12.0.0" | |
+ } | |
+ }, | |
+ "node_modules/winston-transport": { | |
+ "version": "4.6.0", | |
+ "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.6.0.tgz", | |
+ "integrity": "sha512-wbBA9PbPAHxKiygo7ub7BYRiKxms0tpfU2ljtWzb3SjRjv5yl6Ozuy/TkXf00HTAt+Uylo3gSkNwzc4ME0wiIg==", | |
+ "dependencies": { | |
+ "logform": "^2.3.2", | |
+ "readable-stream": "^3.6.0", | |
+ "triple-beam": "^1.3.0" | |
+ }, | |
+ "engines": { | |
+ "node": ">= 12.0.0" | |
+ } | |
+ }, | |
+ "node_modules/winston-transport/node_modules/readable-stream": { | |
+ "version": "3.6.2", | |
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", | |
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", | |
+ "dependencies": { | |
+ "inherits": "^2.0.3", | |
+ "string_decoder": "^1.1.1", | |
+ "util-deprecate": "^1.0.1" | |
+ }, | |
+ "engines": { | |
+ "node": ">= 6" | |
+ } | |
+ }, | |
+ "node_modules/winston/node_modules/readable-stream": { | |
+ "version": "3.6.2", | |
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", | |
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", | |
+ "dependencies": { | |
+ "inherits": "^2.0.3", | |
+ "string_decoder": "^1.1.1", | |
+ "util-deprecate": "^1.0.1" | |
+ }, | |
+ "engines": { | |
+ "node": ">= 6" | |
+ } | |
+ }, | |
"node_modules/word-wrap": { | |
"version": "1.2.3", | |
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", | |
@@ -23780,6 +24034,21 @@ | |
"integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", | |
"dev": true | |
}, | |
+ "@colors/colors": { | |
+ "version": "1.6.0", | |
+ "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz", | |
+ "integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==" | |
+ }, | |
+ "@dabh/diagnostics": { | |
+ "version": "2.0.3", | |
+ "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz", | |
+ "integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==", | |
+ "requires": { | |
+ "colorspace": "1.1.x", | |
+ "enabled": "2.0.x", | |
+ "kuler": "^2.0.0" | |
+ } | |
+ }, | |
"@eslint/eslintrc": { | |
"version": "1.3.2", | |
"resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-1.3.2.tgz", | |
@@ -26886,6 +27155,11 @@ | |
"integrity": "sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==", | |
"dev": true | |
}, | |
+ "@types/triple-beam": { | |
+ "version": "1.3.4", | |
+ "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.4.tgz", | |
+ "integrity": "sha512-HlJjF3wxV4R2VQkFpKe0YqJLilYNgtRtsqqZtby7RkVsSs+i+vbyzjtUwpFEdUCKcrGzCiEJE7F/0mKjh0sunA==" | |
+ }, | |
"@types/ws": { | |
"version": "8.5.3", | |
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.3.tgz", | |
@@ -27273,6 +27547,11 @@ | |
} | |
} | |
}, | |
+ "async": { | |
+ "version": "3.2.5", | |
+ "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz", | |
+ "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg==" | |
+ }, | |
"async-limiter": { | |
"version": "1.0.1", | |
"resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz", | |
@@ -28389,6 +28668,15 @@ | |
"integrity": "sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==", | |
"dev": true | |
}, | |
+ "color": { | |
+ "version": "3.2.1", | |
+ "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz", | |
+ "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==", | |
+ "requires": { | |
+ "color-convert": "^1.9.3", | |
+ "color-string": "^1.6.0" | |
+ } | |
+ }, | |
"color-convert": { | |
"version": "1.9.3", | |
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", | |
@@ -28402,11 +28690,29 @@ | |
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", | |
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=" | |
}, | |
+ "color-string": { | |
+ "version": "1.9.1", | |
+ "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", | |
+ "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", | |
+ "requires": { | |
+ "color-name": "^1.0.0", | |
+ "simple-swizzle": "^0.2.2" | |
+ } | |
+ }, | |
"color-support": { | |
"version": "1.1.3", | |
"resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", | |
"integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==" | |
}, | |
+ "colorspace": { | |
+ "version": "1.1.4", | |
+ "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz", | |
+ "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==", | |
+ "requires": { | |
+ "color": "^3.1.3", | |
+ "text-hex": "1.0.x" | |
+ } | |
+ }, | |
"combined-stream": { | |
"version": "1.0.8", | |
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", | |
@@ -28582,6 +28888,21 @@ | |
} | |
} | |
}, | |
+ "csv-parser": { | |
+ "version": "3.0.0", | |
+ "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.0.0.tgz", | |
+ "integrity": "sha512-s6OYSXAK3IdKqYO33y09jhypG/bSDHPuyCme/IdEHfWpLf/jKcpitVFyOC6UemgGk8v7Q5u2XE0vvwmanxhGlQ==", | |
+ "requires": { | |
+ "minimist": "^1.2.0" | |
+ }, | |
+ "dependencies": { | |
+ "minimist": { | |
+ "version": "1.2.8", | |
+ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", | |
+ "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==" | |
+ } | |
+ } | |
+ }, | |
"dashdash": { | |
"version": "1.14.1", | |
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", | |
@@ -28835,6 +29156,11 @@ | |
"integrity": "sha512-uDfvUjVrfGJJhymx/kz6prltenw1u7WrCg1oa94zYY8xxVpLLUu045LAT0dhDZdXG58/EpPL/5kA180fQ/qudg==", | |
"dev": true | |
}, | |
+ "enabled": { | |
+ "version": "2.0.0", | |
+ "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz", | |
+ "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==" | |
+ }, | |
"encodeurl": { | |
"version": "1.0.2", | |
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", | |
@@ -29622,6 +29948,11 @@ | |
"bser": "2.1.1" | |
} | |
}, | |
+ "fecha": { | |
+ "version": "4.2.3", | |
+ "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz", | |
+ "integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw==" | |
+ }, | |
"file-entry-cache": { | |
"version": "6.0.1", | |
"resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", | |
@@ -29737,6 +30068,11 @@ | |
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==", | |
"dev": true | |
}, | |
+ "fn.name": { | |
+ "version": "1.1.0", | |
+ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", | |
+ "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" | |
+ }, | |
"follow-redirects": { | |
"version": "1.5.10", | |
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.10.tgz", | |
@@ -30498,8 +30834,7 @@ | |
"is-stream": { | |
"version": "2.0.1", | |
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", | |
- "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", | |
- "dev": true | |
+ "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==" | |
}, | |
"is-typedarray": { | |
"version": "1.0.0", | |
@@ -32599,6 +32934,11 @@ | |
"integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", | |
"dev": true | |
}, | |
+ "kuler": { | |
+ "version": "2.0.0", | |
+ "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", | |
+ "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==" | |
+ }, | |
"lazy-ass": { | |
"version": "1.6.0", | |
"resolved": "https://registry.npmjs.org/lazy-ass/-/lazy-ass-1.6.0.tgz", | |
@@ -32704,6 +33044,26 @@ | |
"resolved": "https://registry.npmjs.org/lodash.partition/-/lodash.partition-4.6.0.tgz", | |
"integrity": "sha1-o45GtzRp4EILDaEhLmbUFL42S6Q=" | |
}, | |
+ "logform": { | |
+ "version": "2.6.0", | |
+ "resolved": "https://registry.npmjs.org/logform/-/logform-2.6.0.tgz", | |
+ "integrity": "sha512-1ulHeNPp6k/LD8H91o7VYFBng5i1BDE7HoKxVbZiGFidS1Rj65qcywLxX+pVfAPoQJEjRdvKcusKwOupHCVOVQ==", | |
+ "requires": { | |
+ "@colors/colors": "1.6.0", | |
+ "@types/triple-beam": "^1.3.2", | |
+ "fecha": "^4.2.0", | |
+ "ms": "^2.1.1", | |
+ "safe-stable-stringify": "^2.3.1", | |
+ "triple-beam": "^1.3.0" | |
+ }, | |
+ "dependencies": { | |
+ "ms": { | |
+ "version": "2.1.3", | |
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", | |
+ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" | |
+ } | |
+ } | |
+ }, | |
"lowercase-keys": { | |
"version": "1.0.1", | |
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-1.0.1.tgz", | |
@@ -32715,10 +33075,9 @@ | |
"integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==" | |
}, | |
"luxon": { | |
- "version": "3.0.4", | |
- "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.0.4.tgz", | |
- "integrity": "sha512-aV48rGUwP/Vydn8HT+5cdr26YYQiUZ42NM6ToMoaGKwYfWbfLeRkEu1wXWMHBZT6+KyLfcbbtVcoQFCbbPjKlw==", | |
- "dev": true | |
+ "version": "3.4.3", | |
+ "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.4.3.tgz", | |
+ "integrity": "sha512-tFWBiv3h7z+T/tDaoxA8rqTxy1CHV6gHS//QdaH4pulbq/JuBSGgQspQQqcgnwdAx6pNI7cmvz5Sv/addzHmUg==" | |
}, | |
"make-dir": { | |
"version": "3.1.0", | |
@@ -33232,6 +33591,22 @@ | |
"integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=", | |
"dev": true | |
}, | |
+ "neat-csv": { | |
+ "version": "7.0.0", | |
+ "resolved": "https://registry.npmjs.org/neat-csv/-/neat-csv-7.0.0.tgz", | |
+ "integrity": "sha512-ZmiKZNkdqb6hrBU3lDHm52vWXs6CuFPfw6ZoJZNnY7IIpfA1fxM0UPPi+iQpqQo82qcLbsZPwLkQ1cdrMDtwwA==", | |
+ "requires": { | |
+ "csv-parser": "^3.0.0", | |
+ "get-stream": "^6.0.1" | |
+ }, | |
+ "dependencies": { | |
+ "get-stream": { | |
+ "version": "6.0.1", | |
+ "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", | |
+ "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==" | |
+ } | |
+ } | |
+ }, | |
"negotiator": { | |
"version": "0.6.2", | |
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", | |
@@ -33646,6 +34021,14 @@ | |
"wrappy": "1" | |
} | |
}, | |
+ "one-time": { | |
+ "version": "1.0.0", | |
+ "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz", | |
+ "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==", | |
+ "requires": { | |
+ "fn.name": "1.x.x" | |
+ } | |
+ }, | |
"onetime": { | |
"version": "5.1.2", | |
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", | |
@@ -34491,6 +34874,11 @@ | |
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", | |
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" | |
}, | |
+ "safe-stable-stringify": { | |
+ "version": "2.4.3", | |
+ "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.4.3.tgz", | |
+ "integrity": "sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g==" | |
+ }, | |
"safer-buffer": { | |
"version": "2.1.2", | |
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", | |
@@ -34657,6 +35045,21 @@ | |
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", | |
"integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" | |
}, | |
+ "simple-swizzle": { | |
+ "version": "0.2.2", | |
+ "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", | |
+ "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==", | |
+ "requires": { | |
+ "is-arrayish": "^0.3.1" | |
+ }, | |
+ "dependencies": { | |
+ "is-arrayish": { | |
+ "version": "0.3.2", | |
+ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", | |
+ "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==" | |
+ } | |
+ } | |
+ }, | |
"simple-update-notifier": { | |
"version": "1.1.0", | |
"resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-1.1.0.tgz", | |
@@ -34845,6 +35248,11 @@ | |
"minipass": "^3.1.1" | |
} | |
}, | |
+ "stack-trace": { | |
+ "version": "0.0.10", | |
+ "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz", | |
+ "integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==" | |
+ }, | |
"stack-utils": { | |
"version": "2.0.2", | |
"resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.2.tgz", | |
@@ -35196,6 +35604,11 @@ | |
} | |
} | |
}, | |
+ "text-hex": { | |
+ "version": "1.0.0", | |
+ "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz", | |
+ "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==" | |
+ }, | |
"text-table": { | |
"version": "0.2.0", | |
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", | |
@@ -35290,6 +35703,11 @@ | |
"optional": true, | |
"peer": true | |
}, | |
+ "triple-beam": { | |
+ "version": "1.4.1", | |
+ "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz", | |
+ "integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==" | |
+ }, | |
"tslib": { | |
"version": "1.10.0", | |
"resolved": "https://registry.npmjs.org/tslib/-/tslib-1.10.0.tgz", | |
@@ -35684,6 +36102,58 @@ | |
"optional": true, | |
"peer": true | |
}, | |
+ "winston": { | |
+ "version": "3.11.0", | |
+ "resolved": "https://registry.npmjs.org/winston/-/winston-3.11.0.tgz", | |
+ "integrity": "sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==", | |
+ "requires": { | |
+ "@colors/colors": "^1.6.0", | |
+ "@dabh/diagnostics": "^2.0.2", | |
+ "async": "^3.2.3", | |
+ "is-stream": "^2.0.0", | |
+ "logform": "^2.4.0", | |
+ "one-time": "^1.0.0", | |
+ "readable-stream": "^3.4.0", | |
+ "safe-stable-stringify": "^2.3.1", | |
+ "stack-trace": "0.0.x", | |
+ "triple-beam": "^1.3.0", | |
+ "winston-transport": "^4.5.0" | |
+ }, | |
+ "dependencies": { | |
+ "readable-stream": { | |
+ "version": "3.6.2", | |
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", | |
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", | |
+ "requires": { | |
+ "inherits": "^2.0.3", | |
+ "string_decoder": "^1.1.1", | |
+ "util-deprecate": "^1.0.1" | |
+ } | |
+ } | |
+ } | |
+ }, | |
+ "winston-transport": { | |
+ "version": "4.6.0", | |
+ "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.6.0.tgz", | |
+ "integrity": "sha512-wbBA9PbPAHxKiygo7ub7BYRiKxms0tpfU2ljtWzb3SjRjv5yl6Ozuy/TkXf00HTAt+Uylo3gSkNwzc4ME0wiIg==", | |
+ "requires": { | |
+ "logform": "^2.3.2", | |
+ "readable-stream": "^3.6.0", | |
+ "triple-beam": "^1.3.0" | |
+ }, | |
+ "dependencies": { | |
+ "readable-stream": { | |
+ "version": "3.6.2", | |
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", | |
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", | |
+ "requires": { | |
+ "inherits": "^2.0.3", | |
+ "string_decoder": "^1.1.1", | |
+ "util-deprecate": "^1.0.1" | |
+ } | |
+ } | |
+ } | |
+ }, | |
"word-wrap": { | |
"version": "1.2.3", | |
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", | |
diff --git a/package.json b/package.json | |
index 366ec6e2..7e767fc9 100644 | |
--- a/package.json | |
+++ b/package.json | |
@@ -49,9 +49,11 @@ | |
"jszip": "^3.10.1", | |
"keyv": "^4.5.4", | |
"lodash.partition": "^4.6.0", | |
+ "luxon": "^3.4.3", | |
"make-fetch-happen": "^10.0.0", | |
"marked": "^0.7.0", | |
"mime": "^2.5.2", | |
+ "neat-csv": "^7.0.0", | |
"negotiator": "^0.6.2", | |
"node-fetch": "^2.6.0", | |
"passport": "^0.4.0", | |
@@ -60,6 +62,7 @@ | |
"proxy-agent": "^6.3.1", | |
"raw-body": "^2.4.2", | |
"session-file-store": "^1.3.1", | |
+ "winston": "^3.11.0", | |
"yaml-front-matter": "^4.0.0" | |
}, | |
"devDependencies": { | |
@@ -75,7 +78,6 @@ | |
"http-proxy-middleware": "^1.3.1", | |
"jest": "^27.5.1", | |
"jest-extended": "^1.1.0", | |
- "luxon": "^3.0.4", | |
"nodemon": "^2.0.22", | |
"request": "^2.88.2", | |
"start-server-and-test": "^1.11.4" | |
diff --git a/resourceIndexer/constants.js b/resourceIndexer/constants.js | |
new file mode 100644 | |
index 00000000..9ec0404e | |
--- /dev/null | |
+++ b/resourceIndexer/constants.js | |
@@ -0,0 +1,35 @@ | |
+export const DATESTAMP_REGEX = /_\d{4}-\d{2}-\d{2}/; | |
+ | |
+export const INVALID_AUSPICE_PATTERNS = [/_seq\.json$/, /_sequences\.json$/, /_entropy\.json$/, /_titers\.json$/]; | |
+ | |
+/** | |
+ * These patterns can be used to classify files which are potentially valid | |
+ * auspice files. They should be checked in order, with the first match winning. | |
+ * | |
+ * Each entry is a tuple of [subresource type, regex match pattern]. | |
+ * | |
+ * The subresource type (string) is the same as that used internally in the | |
+ * server code (used when instantiating (sub-)classes of `Subresource`) | |
+ */ | |
+export const VALID_AUSPICE_PATTERNS = [ | |
+ ["root-sequence", /_root-sequence\.json$/], | |
+ ["tip-frequencies", /_tip-frequencies\.json$/], | |
+ ["measurements", /_measurements\.json$/], | |
+ ["meta", /_meta\.json$/], | |
+ ["tree", /_tree\.json$/], | |
+ ["main", /\.json$/], | |
+] | |
+ | |
+export const SIDECAR_TYPES = new Set( | |
+ VALID_AUSPICE_PATTERNS | |
+ .map(([subresourceType, ]) => subresourceType) | |
+ .filter((subresourceType) => !['main', 'meta', 'tree'].includes(subresourceType)) | |
+) | |
+ | |
+/** | |
+ * Following values taken to match the server's `sourceNameToClass`. | |
+ */ | |
+export const SOURCE = { | |
+ CORE: "core", | |
+ STAGING: "staging", | |
+} | |
diff --git a/resourceIndexer/coreStagingS3.js b/resourceIndexer/coreStagingS3.js | |
new file mode 100644 | |
index 00000000..1f0f524f | |
--- /dev/null | |
+++ b/resourceIndexer/coreStagingS3.js | |
@@ -0,0 +1,294 @@ | |
+import { SOURCE, VALID_AUSPICE_PATTERNS, INVALID_AUSPICE_PATTERNS, | |
+ DATESTAMP_REGEX, SIDECAR_TYPES } from './constants.js'; | |
+import { collectInventory } from './inventory.js'; | |
+ | |
+/** | |
+ * The inventory of buckets (especially the core bucket) is in some ways a | |
+ * historical record of work over the years, but this isn't really what we want | |
+ * to display to users. As some examples: | |
+ * - Files which don't match a resource to list should be excluded | |
+ * - Datestampted files (i.e. _YYYY-MM-DD in the filename) are excluded | |
+ * (we use S3 versioning instead) | |
+ * | |
+ * If the s3 object is to be excluded we return false here. | |
+ * | |
+ * In the case where the object represents a (part of) a resource we want to | |
+ * expose, then we categorise it here by adding the following properties: | |
+ * - source (STAGING or CORE) | |
+ * - resourceType (dataset, narrative or intermediate) | |
+ * - id (the ID by which objects will be grouped together. | |
+ * For datasets this is the nextstrain.org URL path, without any temporal signifier) | |
+ * - subresourceType (currently only for resourceType=dataset) | |
+ */ | |
+function categoriseCoreObjects(item, staging) { | |
+ const key = item.key; | |
+ item.source = staging ? SOURCE.STAGING : SOURCE.CORE; | |
+ item.baseUrl = `https://${item.bucket}.s3.amazonaws.com/${key}` | |
+ if (key.startsWith('search_') | |
+ || key.startsWith('manifest_') | |
+ || key.startsWith('datasets_') | |
+ ) return false; | |
+ | |
+ // On the core bucket, directory-like hierarchies are used for intermediate | |
+ // files. These intermediate files may include files which auspice can | |
+ // display, but nextstrain.org cannot map URLs to directory-like hierarchies. | |
+ // There are other resourceTypes here we may consider in the future -- e.g. | |
+ // model output JSONs | |
+ if (key.includes("/")) { | |
+ if (staging===true) return false; | |
+ if (key.startsWith('files/')) { | |
+ if ( | |
+ key.includes('/archive/') | |
+ || key.includes('/test/') | |
+ || key.includes('/workflows/') | |
+ || key.includes('/branch/') | |
+ || key.includes('/trial/') | |
+ || key.includes('/test-data/') | |
+ || key.includes('jen_test/') | |
+ || key.match(/\/nextclade-full-run-[\d-]+--UTC\//) | |
+ || key.match(/\/\d{4}-\d{2}-\d{2}_results.json/) // forecasts-ncov | |
+ || key.endsWith('.png') // forecasts-ncov | |
+ ) { | |
+ return false; | |
+ } | |
+ item.resourceType = 'intermediate'; | |
+ /* The ID is used for grouping. For a nextstrain.org dataset this would be | |
+ combined with the source to form a nextstrain URL, however that's not | |
+ applicable here. Instead we use the filepath information without the | |
+ leading 'files/' and without the (trailing) filename so that different | |
+ files in the same directory structure get grouped together. For instance, | |
+ files/ncov/open/x.json -> ncov/open */ | |
+ const fields = key.split('/') | |
+ item.resourcePath = fields.slice(1, fields.length-1).join('/') | |
+ return item; | |
+ } | |
+ return false; | |
+ } | |
+ | |
+ // Some filenames have a double underscore (presumably by mistake) | |
+ if (key.includes('__')) return false; | |
+ | |
+ // We don't have narratives on the core/staging buckets, so all that's left is | |
+ // to check if the key looks like a valid auspice file | |
+ const auspiceFileInfo = auspiceFile(key); | |
+ if (!auspiceFileInfo) return false | |
+ item.resourceType = 'dataset'; | |
+ item.subresourceType = auspiceFileInfo.subresourceType; | |
+ | |
+ /** | |
+ * Currently the resourcePath is based completely off the key name, | |
+ * paralleling how the nextstrain.org URLs of datasets are mapped to resource | |
+ * paths and then to S3 keys. In the future we may change this in order to | |
+ * group together files with different s3 key names but which we want to | |
+ * associate with the same nextstrain.org URL. For example, we may which to | |
+ * combine the auspice datasets behind `ncov/gisaid/africa` and | |
+ * `ncov/gisaid/africa/all-time`. | |
+ */ | |
+ item.resourcePath = auspiceFileInfo.urlPath; | |
+ | |
+ return item; | |
+} | |
+ | |
+/** | |
+ * Returns false if the filename doesn't appear to be an auspice dataset/sidecar file | |
+ * Otherwise returns an object with properties resourceType, subresourceType | |
+ */ | |
+function auspiceFile(filename) { | |
+ if (filename.match(DATESTAMP_REGEX)) return false; | |
+ for (const pattern of INVALID_AUSPICE_PATTERNS) { | |
+ if (filename.match(pattern)) return false; | |
+ } | |
+ for (const [type, pattern] of VALID_AUSPICE_PATTERNS) { | |
+ if (filename.match(pattern)) { | |
+ return { | |
+ subresourceType: type, | |
+ urlPath: filename.replace(pattern, '').replace(/_/g, '/'), | |
+ } | |
+ } | |
+ } | |
+ return false; | |
+} | |
+ | |
+ | |
+/** | |
+ * Given a list of items (i.e. files) which appear to be valid components of a resource | |
+ * we want to group them into versioned resources. As an example, we may have | |
+ * - date: A, files: X_tree.json, X_meta.json | |
+ * - date: B, files: X_meta.json (invalid) | |
+ * - date: C, files: X.json | |
+ * - date: D, files: X.json, X_root-sequence.json | |
+ * - date: E, files: X_root-sequence.json (invalid) | |
+ * - date: F, files: X.json, X.json, X_root-sequence.json (valid, pick the newest X.json) | |
+ * and we want to produce a structure like: | |
+ * [ | |
+ * {date: F, versions: [{main: versionId, root-sequence: versionId}]}, | |
+ * {date: D, versions: [{main: versionId, root-sequence: versionId}]}, | |
+ * {date: C, versions: [{main: versionId}]}, | |
+ * {date: A, versions: [{v1-meta: versionId, v1-tree: versionId}]} | |
+ * ] | |
+ * | |
+ * The maximum temporal resolution is per-day, in other words if a resource was uploaded | |
+ * multiple times in a single day then only the last one is used. This matches our | |
+ * (implicit) expectation when we started used datestamped datasets during the ncov pandemic. | |
+ * It also covers the (somewhat common, I think) case where datasets were re-uploaded after | |
+ * an error / omission was noticed. | |
+ * | |
+ * The returned object may contain `versions:[]` (empty array) if no valid versions are found. | |
+ */ | |
+function createVersionedResources(resourceType, id, items) { | |
+ const groupedByDate = items.reduce((acc, o) => { | |
+ const date = o.date; | |
+ if (acc.hasOwnProperty(date)) { // eslint-disable-line no-prototype-builtins | |
+ acc[date].push(o) | |
+ } else { | |
+ acc[date] = [o] | |
+ } | |
+ return acc; | |
+ }, {}); | |
+ | |
+ // Associate each of the files behind this dataset to its version ID | |
+ const versions = Object.entries(groupedByDate) | |
+ // sort the groups by the day (first entry: most recent) | |
+ .sort(([dateA, ], [dateB, ]) => dateA < dateB ? 1 : dateA > dateB ? -1 : 0) | |
+ // (re-)sort the objects within each day (first entry: most recent). | |
+ .map(([date, objects]) => [date, objects.sort((a, b) => b.timestamp - a.timestamp)]) | |
+ // convert the objects for each day into resource objects (or false) | |
+ .map(([date, objects]) => { | |
+ if (resourceType==='dataset') { | |
+ return validDataset(id, date, objects); | |
+ } else if (resourceType==='intermediate') { | |
+ return validIntermediate(id, date, objects); | |
+ } else { | |
+ throw new Error(`Unknown resourceType '${resourceType}' to create versioned resource from`) | |
+ } | |
+ }) | |
+ // remove days without a resource object (some days might have files but no valid dataset) | |
+ .filter((version) => !!version); | |
+ | |
+ const resource = {versions}; | |
+ return resource; | |
+} | |
+ | |
+ | |
+/** | |
+ * Given a set of files from the same _day_ (S3 keys) return the subset such | |
+ * that, taken together, they represent a dataset. Often a dataset will be | |
+ * uploaded multiple times in a single day (often to fix minor mistakes) and we | |
+ * only want to surface the last-updated dataset on the day. Note that each | |
+ * individual object provided here is a valid dataset-related file in its own right, | |
+ * but taken together the objects may not represent a valid dataset, or only a | |
+ * subset may represent a valid dataset. | |
+ * | |
+ * We take the first (i.e. most recent) occurrence of valid files. In theory we | |
+ * could have a situation where we take a sidecar file that wasn't intended to | |
+ * be grouped with the auspice json, but I think that's worth the | |
+ * simplifications it allows here. | |
+ */ | |
+function validDataset(id, date, objects) { | |
+ // The `subresources` object represents the maximal possible collection of | |
+ // subresources for this dataset. The keys are the subresource types, and the | |
+ // values are false (subresource doesn't exist) or the relevant s3 object. | |
+ const subresources = Object.fromEntries( | |
+ VALID_AUSPICE_PATTERNS.map(([subresourceType, ]) => [subresourceType, false]) | |
+ ); | |
+ | |
+ const _firstItem = (type) => objects.filter((o) => o.subresourceType===type)[0]; | |
+ | |
+ /** | |
+ * Take a v2 dataset over a v2 dataset _even if_ the v1 dataset was uploaded | |
+ * more recently. (This is not hypothetical - it is the case for /zika as of | |
+ * 2023-11-01.) This is almost certainly an unintentional situation, and the | |
+ * behaviour of the nextstrain.org server is to look for a v2 dataset and use | |
+ * that, irregardless of whether a v1 dataset exists. | |
+ */ | |
+ const types = new Set(objects.map((o) => o.subresourceType)); | |
+ if (types.has('main')) { | |
+ subresources.main = _firstItem('main'); | |
+ } else if (types.has('meta') && types.has('tree')) { | |
+ subresources.meta = _firstItem('meta'); | |
+ subresources.tree = _firstItem('tree'); | |
+ } else { | |
+ /* It isn't unexpected to encounter days with auspice-like files but no | |
+ valid dataset. Looking at the core bucket in late 2023 identified ~2700 such | |
+ days. It seems this is (mostly?) due to delete markers being added for | |
+ certain files and so looking at dates prior to the delete marker(s) we only | |
+ see a subset of the files which were actually uploaded on that day. */ | |
+ return false; | |
+ } | |
+ | |
+ ([...types]).filter((subresourceType) => SIDECAR_TYPES.has(subresourceType)) | |
+ .forEach((subresourceType) => { | |
+ subresources[subresourceType] = _firstItem(subresourceType); | |
+ }) | |
+ | |
+ return { | |
+ date, | |
+ fileUrls: Object.fromEntries( | |
+ Object.entries(subresources).map(([subresourceType, s3object]) => { | |
+ if (!s3object.versionId) { // (bucket unversioned) | |
+ return [subresourceType, s3object.baseUrl] | |
+ } | |
+ return [subresourceType, `${s3object.baseUrl}?versionId=${encodeURIComponent(s3object.versionId)}`] | |
+ }) | |
+ ) | |
+ }; | |
+} | |
+ | |
+/** | |
+ * For a set of intermediate files (on a given day), return the subset to be | |
+ * represented by the resource. We don't perform any filename-based pruning at | |
+ * the moment, so the files for the resource are everything on the bucket which | |
+ * was assigned the same ID - this includes the same "file" under different | |
+ * compression schemes (etc), as that results in a different filename (key). | |
+ * If multiple files exist on the same day the first (most recent) is taken. | |
+ */ | |
+function validIntermediate(id, date, objects) { | |
+ const seenKeys = new Set(); | |
+ return { | |
+ date, | |
+ fileUrls: Object.fromEntries( | |
+ objects | |
+ .filter((o) => { | |
+ if (seenKeys.has(o.key)) return false; | |
+ seenKeys.add(o.key) | |
+ return true; | |
+ }) | |
+ .map((s3object) => { | |
+ const filename = s3object.key.split('/').pop(); | |
+ const url = s3object.versionId ? | |
+ s3object.baseUrl : | |
+ `${s3object.baseUrl}?versionId=${encodeURIComponent(s3object.versionId)}`; | |
+ return [filename, url] | |
+ }) | |
+ ) | |
+ }; | |
+} | |
+ | |
+ | |
+export const coreS3Data = { | |
+ name: 'core', | |
+ async collect({local}) { | |
+ return await collectInventory({ | |
+ name: this.name, | |
+ local, | |
+ inventoryBucket: "nextstrain-inventories", | |
+ inventoryPrefix: "nextstrain-data/config-v1/" | |
+ }) | |
+ }, | |
+ categorise: (item) => categoriseCoreObjects(item, false), | |
+ createResource: createVersionedResources | |
+}; | |
+ | |
+export const stagingS3Data = { | |
+ name: 'staging', | |
+ async collect({local}) { | |
+ return await collectInventory({ | |
+ name: this.name, | |
+ local, | |
+ inventoryBucket: "nextstrain-inventories", | |
+ inventoryPrefix: "nextstrain-staging/config-v1/" | |
+ }) | |
+ }, | |
+ categorise: (item) => categoriseCoreObjects(item, true), | |
+ createResource: createVersionedResources | |
+}; | |
diff --git a/resourceIndexer/errors.js b/resourceIndexer/errors.js | |
new file mode 100644 | |
index 00000000..7d5bff2c | |
--- /dev/null | |
+++ b/resourceIndexer/errors.js | |
@@ -0,0 +1 @@ | |
+export class ResourceIndexerError extends Error {} | |
diff --git a/resourceIndexer/inventory.js b/resourceIndexer/inventory.js | |
new file mode 100644 | |
index 00000000..c1c0ac74 | |
--- /dev/null | |
+++ b/resourceIndexer/inventory.js | |
@@ -0,0 +1,255 @@ | |
+import * as fs from 'node:fs/promises'; | |
+import neatCsv from 'neat-csv'; | |
+import zlib from 'zlib'; | |
+import { promisify } from 'util'; | |
+import AWS from 'aws-sdk'; | |
+import {logger} from './logger.js'; | |
+import { DateTime } from 'luxon'; | |
+import escapeStringRegexp from 'escape-string-regexp'; | |
+import { ResourceIndexerError } from './errors.js'; | |
+const gunzip = promisify(zlib.gunzip) | |
+ | |
+/** | |
+ * Fetches and reads the latest inventory from the provided bucket/prefix: | |
+ * - finds the most recent manifest.json via comparison of timestamps in keys | |
+ * - uses this manifest.json to get the schema + key of the actual inventory | |
+ * - gets the actual inventory & returns the data as an object[] with keys from the schema | |
+ * | |
+ * Note that we only read a maximum of 999 keys from the provided bucket+prefix. A typical inventory | |
+ * update adds ~4 keys, so this should allow for ~8 months of inventories. The bucket where inventories | |
+ * are stored should use lifecycles to expire objects. | |
+ * | |
+ * Returns an object with properties: | |
+ * - inventory: object[] list of entries in the inventory, using the schema to define keys | |
+ * - versionsExist: boolean are key versions present within the bucket? | |
+ */ | |
+const fetchInventoryRemote = async ({bucket, prefix, name}) => { | |
+ const S3 = new AWS.S3(); | |
+ const _prefix = escapeStringRegexp(prefix.replace(/\/*$/, "/")); | |
+ const manifestKeyPattern = new RegExp(`^${_prefix}\\d{4}-\\d{2}-\\d{2}T\\d{2}-\\d{2}Z/manifest\\.json$`); | |
+ const manifestKey = await new Promise((resolve, reject) => { | |
+ S3.listObjectsV2({Bucket: bucket, Prefix: prefix, MaxKeys: 999}, (err, data) => { | |
+ if (err) return reject(err); | |
+ const orderedKeys = data.Contents | |
+ .map((object) => object.Key) | |
+ .filter((key) => key.match(manifestKeyPattern)) | |
+ .sort() // keys are identical except for a YYYY-MM-DDTHH-MMZ timestamp within the key itself | |
+ .reverse(); // now sorted most recent object first | |
+ if (orderedKeys.length===0) reject("No valid inventory manifest.json found") | |
+ resolve(orderedKeys[0]) | |
+ }); | |
+ }); | |
+ logger.info(`inventory for ${name} - manifest key: ${manifestKey}`) | |
+ | |
+ const {schema, inventoryKey, versionsExist} = await S3.getObject({Bucket: bucket, Key: manifestKey}) | |
+ .promise() | |
+ .then((response) => _parseManifest(JSON.parse(response.Body.toString('utf-8')))); | |
+ | |
+ logger.info(`inventory for ${name} - parsed manifest JSON`) | |
+ | |
+ const inventory = await S3.getObject({Bucket: bucket, Key: inventoryKey}) | |
+ .promise() | |
+ .then((response) => gunzip(response.Body)) | |
+ .then((data) => neatCsv(data, schema)); | |
+ | |
+ logger.info(`inventory for ${name} - fetched ${inventory.length} rows`) | |
+ return {inventory, versionsExist}; | |
+} | |
+ | |
+/** | |
+ * Parse an on-disk inventory. This expects the following files to be present: | |
+ * - `./devData/${name}.manifest.json` | |
+ * - `./devData/${name}.inventory.csv.gz` | |
+ * | |
+ * Returns an object with properties: | |
+ * - inventory: object[] list of entries in the inventory, using the schema to define keys | |
+ * - versionsExist: boolean are key versions present within the bucket? | |
+ */ | |
+const fetchInventoryLocal = async ({name}) => { | |
+ const manifestPath = `./devData/${name}.manifest.json`; | |
+ const inventoryPath = `./devData/${name}.inventory.csv.gz`; | |
+ logger.info(`inventory for ${name} -- reading S3 inventories from ${manifestPath} and ${inventoryPath}`); | |
+ const manifest = JSON.parse(await fs.readFile(manifestPath)); | |
+ const {schema, versionsExist} = _parseManifest(manifest); | |
+ const inventory = await neatCsv(await gunzip(await fs.readFile(inventoryPath)), schema); | |
+ logger.info(`inventory for ${name} - read ${inventory.length} rows from the local file`) | |
+ return {inventory, versionsExist}; | |
+} | |
+ | |
+ | |
+/** | |
+ * Returns a list of objects in the requested S3 inventory, which itself represents a list of | |
+ * objects + versions within a specific bucket+prefix. Keys before a delete marker are excluded | |
+ */ | |
+const parseInventory = async ({objects, versionsExist}) => { | |
+ // Ensure all objects are chronological | |
+ objects.map((item) => { | |
+ item.timestamp = DateTime.fromISO(item.LastModifiedDate) | |
+ return item; | |
+ }).sort((a, b) => b.timestamp - a.timestamp); | |
+ | |
+ objects = versionsExist ? _checkVersionedObjects(objects) : _checkNonVersionedObjects(objects); | |
+ objects = _removeDeletedObjects(objects); | |
+ | |
+ /* rename / prune / add properties as I find the default S3 properties / | |
+ values awkward to work with */ | |
+ return objects.map((item) => { | |
+ return { | |
+ timestamp: item.timestamp, | |
+ date: item.LastModifiedDate.split("T")[0], | |
+ key: item.Key, | |
+ bucket: item.Bucket, | |
+ versionId: item.VersionId, // will be undefined if bucket is not versioned | |
+ latest: versionsExist ? item.IsLatest==='true' : true, | |
+ } | |
+ }); | |
+} | |
+ | |
+ | |
+/** | |
+ * Fetch and parse the latest inventory in the inventoryBucket / inventoryPrefix | |
+ * _or_ source a local inventory file (useful for dev purposes to avoid constant | |
+ * downloads from S3) | |
+ * @returns S3Object[] | |
+ */ | |
+export const collectInventory = async ({name, local, inventoryBucket, inventoryPrefix}) => { | |
+ let objects, versionsExist; | |
+ try { | |
+ const fetchInventory = local ? fetchInventoryLocal : fetchInventoryRemote; | |
+ ({ inventory: objects, versionsExist} = await fetchInventory( | |
+ {bucket: inventoryBucket, prefix: inventoryPrefix, name} | |
+ )); | |
+ } catch (e) { | |
+ logger.error(`There was an error while fetching the S3 inventory for ${name}. This is fatal.`) | |
+ throw e; | |
+ } | |
+ return await parseInventory({objects, versionsExist}) | |
+} | |
+ | |
+/** | |
+ * For a versioned bucked, ensure that version ID is present on every object | |
+ * by filtering out those without a valid-looking version ID. For instance, | |
+ * s3://nextstrain-data/WNV_NA_tree.json from 2018-05-09 has an empty-string version ID. | |
+ * These may represent objects from before versioning was enabled. | |
+ * @param {S3Item[]} Objects chronologically sorted, latest first | |
+ */ | |
+function _checkVersionedObjects(objects) { | |
+ const keysSeen = new Set(); | |
+ | |
+ return objects.filter((item) => { | |
+ if (!item.VersionId) { | |
+ logger.verbose(`Object ${item.Bucket}/${item.Key} is missing a versionId but the bucket is versioned. The item will be ignored.`); | |
+ return false; | |
+ } | |
+ if (!item.hasOwnProperty('IsLatest')) { // eslint-disable-line no-prototype-builtins | |
+ logger.verbose(`Object ${item.Bucket}/${item.Key} is (unexpectedly) missing the IsLatest property. The item will be ignored.`); | |
+ return false; | |
+ } | |
+ return true; | |
+ }) | |
+ .map((item) => { | |
+ if (item.IsLatest === 'true') { | |
+ if (keysSeen.has(item.Key)) { | |
+ throw new ResourceIndexerError(` | |
+ These appears to be something amiss for S3 objects ${item.Bucket}/${item.Key}. | |
+ Specifically, the version ${item.VersionId} is considered by S3 to be the latest, | |
+ however it is not the most recent after sorting on LastModified. | |
+ This may result in an invalid index and so this is a fatal error. | |
+ `.replace(/\s+/g, ' ')) | |
+ } | |
+ keysSeen.add(item.Key); | |
+ } else { | |
+ if (!keysSeen.has(item.Key)) { | |
+ throw new ResourceIndexerError(` | |
+ These appears to be something amiss for S3 objects ${item.Bucket}/${item.Key}. | |
+ Specifically, the most recent object (via sorting on LastModified, version ID: | |
+ ${item.VersionId}) is not classified by S3 as the latest. | |
+ This may result in an invalid index and so this is a fatal error. | |
+ `.replace(/\s+/g, ' ')) | |
+ } | |
+ } | |
+ return item; | |
+ }) | |
+} | |
+ | |
+/** | |
+ * For a non-versioned object, check that the VersionId is _not_ present and that keys are never duplicated. | |
+ * Adds the property 'IsLatest' = 'true' for every object | |
+ */ | |
+function _checkNonVersionedObjects(objects) { | |
+ const keys = new Set(); | |
+ objects.forEach((item) => { | |
+ if (item.hasOwnProperty('VersionId')) { // eslint-disable-line no-prototype-builtins | |
+ logger.verbose(`Object ${item.Bucket}/${item.Key} has a versionId ('${item.VersionId}') but the bucket is not versioned! The item will be ignored.`); | |
+ return false; | |
+ } | |
+ if (keys.has(item.Key)) { | |
+ throw new ResourceIndexerError(` | |
+ The S3 Object for ${item.Bucket}/${item.Key} (unexpectedly) appears multiple times in an un-versioned bucket. | |
+ This may result in a corrupted index and so is a fatal error. | |
+ `.replace(/\s+/g, ' ')) | |
+ } | |
+ keys.add(item.Key); | |
+ }) | |
+ return objects; | |
+ | |
+} | |
+ | |
+/** | |
+ * Removed 'deleted' objects in a (versioned) bucket. Versions more recent than | |
+ * the most recent delete marker will _not_ be removed. Delete markers | |
+ * themselves will be removed. | |
+ * | |
+ * Non-versioned buckets don't have delete markers, and it's safe to run this | |
+ * function for them. | |
+ */ | |
+function _removeDeletedObjects(objects) { | |
+ | |
+ // Store the most recent delete makers. Keys are s3 keys, values are the timestamp of the delete marker | |
+ const deleteMarkers = {}; | |
+ | |
+ return objects | |
+ .filter((item) => { | |
+ if (item.IsDeleteMarker === "true") { | |
+ if (!deleteMarkers[item.Key] || deleteMarkers[item.Key]<item.timestamp) { | |
+ deleteMarkers[item.Key] = item.timestamp; | |
+ } | |
+ return false; | |
+ } | |
+ return true; | |
+ }) | |
+ .filter((item) => { | |
+ if (deleteMarkers[item.Key]) { | |
+ if (item.timestamp <= deleteMarkers[item.Key]) { | |
+ return false; | |
+ } | |
+ } | |
+ return true; | |
+ }) | |
+} | |
+ | |
+/** | |
+ * Parses a S3 inventory manifest JSON file | |
+ * @param {object} manifest | |
+ * @returns {object} object.schema = string[] | |
+ * object.inventoryKey = string | |
+*/ | |
+function _parseManifest(manifest) { | |
+ if (manifest.files.length>1) { | |
+ throw new ResourceIndexerError(` | |
+ The manifest file for the S3 inventory for bucket ${manifest.sourceBucket} | |
+ includes more than one inventory file. This situation was not encountered | |
+ during development, but this is presumably caused by the inventory size | |
+ exceeding some threshold and being chunked into multiple files. Please check | |
+ this is indeed the case and, if so, amend the code to parse and join each file. | |
+ `.replace(/\s+/g, ' ')) | |
+ } | |
+ const schema = manifest.fileSchema.split(",").map((f) => f.trim()); | |
+ return { | |
+ schema, | |
+ inventoryKey: manifest.files[0].key, | |
+ // Buckets without versioning cannot produce inventories with VersionId | |
+ versionsExist: schema.includes('VersionId'), | |
+ } | |
+} | |
+ | |
diff --git a/resourceIndexer/logger.js b/resourceIndexer/logger.js | |
new file mode 100644 | |
index 00000000..1f307713 | |
--- /dev/null | |
+++ b/resourceIndexer/logger.js | |
@@ -0,0 +1,10 @@ | |
+import { createLogger, transports } from 'winston'; | |
+ | |
+const logger = createLogger({ | |
+ level: 'info', | |
+ transports: [new transports.Console()], | |
+}); | |
+ | |
+export { | |
+ logger, | |
+} | |
\ No newline at end of file | |
diff --git a/resourceIndexer/main.js b/resourceIndexer/main.js | |
new file mode 100644 | |
index 00000000..5f234ef8 | |
--- /dev/null | |
+++ b/resourceIndexer/main.js | |
@@ -0,0 +1,113 @@ | |
+ | |
+import { ArgumentParser } from 'argparse'; | |
+import fs from 'fs'; | |
+import { coreS3Data, stagingS3Data } from "./coreStagingS3.js"; | |
+import {logger} from './logger.js'; | |
+import zlib from 'zlib'; | |
+import { promisify } from 'util'; | |
+import { ResourceIndexerError } from './errors.js'; | |
+ | |
+const gzip = promisify(zlib.gzip) | |
+ | |
+/** | |
+ * We define a number of collections which each represent some listing of | |
+ * nextstrain resources. The actual details are deferred to the provided | |
+ * collection objects - e.g. they may represent a GitHub repo listing, an S3 | |
+ * inventory. Each of these collections provides functions which allow items | |
+ * (files) across collections to be collected into a master list of resources | |
+ * using three identifiers: source, resourceType and resourcePath. The intention | |
+ * is for source to parallel the information in the corresponding Source | |
+ * (sub-)class and resourcePath to parallel the information in the Resource | |
+ * (sub-)class. | |
+ * | |
+ * Currently only core & staging sources + datasets & intermediates are part of | |
+ * the index. For instance, the core WNV/NA (nextstrain.org/WNV/NA) dataset is | |
+ * indexed like so: | |
+ * | |
+ * core → dataset → WNV/NA → versions -> [ | |
+ * {date: "2021-04-08", fileUrls: {main: ...}, | |
+ * {date: "2019-08-30", fileUrls: {meta: ..., tree: ...} | |
+ * ] | |
+ * | |
+ */ | |
+const COLLECTIONS = [ | |
+ coreS3Data, | |
+ stagingS3Data, | |
+]; | |
+ | |
+function parseArgs() { | |
+ const argparser = new ArgumentParser({ | |
+ description: ` | |
+ Fetch file lists from a number of provided collections (e.g. S3 inventories) and collect them into | |
+ resources. Resources are organised in a hierarchical fashion via source → resourceType → resourcePath. | |
+ Each resource contains a list of available versions, where applicable. | |
+ The output JSON is intended for consumption by the nextstrain.org server. | |
+ `, | |
+ }); | |
+ argparser.addArgument("--local", {action: 'storeTrue', | |
+ help: 'Access a local copy of S3 inventories within ./devData/. See docstring of fetchInventoryLocal() for expected filenames.'}) | |
+ argparser.addArgument("--collections", {metavar: "<name>", type: "string", nargs: '+', | |
+ help: "Only fetch data from a subset of collections. Source names are those defined in COLLECTIONS"}); | |
+ argparser.addArgument("--output", {metavar: "<json>", required: true}) | |
+ argparser.addArgument("--indent", {action: 'storeTrue', help: 'Indent the output JSON'}) | |
+ argparser.addArgument("--gzip", {action: 'storeTrue', help: 'GZip the output JSON'}) | |
+ argparser.addArgument("--verbose", {action: 'storeTrue', help: 'Verbose logging'}) | |
+ | |
+ return argparser.parseArgs(); | |
+} | |
+ | |
+ | |
+main(parseArgs()) | |
+ .catch((err) => { | |
+ logger.error(err.message); | |
+ if (!(err instanceof ResourceIndexerError)) { | |
+ console.trace(err); | |
+ } | |
+ }) | |
+ | |
+ | |
+async function main(args) { | |
+ | |
+ if (args.verbose) { | |
+ logger.transports.forEach((t) => t.level = 'verbose'); | |
+ } | |
+ | |
+ const resources = {}; | |
+ | |
+ for (const collection of COLLECTIONS) { | |
+ if (args.collections && !args.collections.includes(collection.name)) { | |
+ continue | |
+ } | |
+ | |
+ const groupedObjects = (await collection.collect({local: args.local})) | |
+ .map(collection.categorise) | |
+ .filter((item) => !!item) | |
+ // Collect together all items ("files") based on their assigned resourceType & resourcePath | |
+ .reduce((store, item) => { | |
+ const {resourceType, resourcePath, source} = item; | |
+ if (!store[source]) store[source]={} | |
+ if (!store[source][resourceType]) store[source][resourceType]={} | |
+ if (!store[source][resourceType][resourcePath]) store[source][resourceType][resourcePath]=[] | |
+ store[source][resourceType][resourcePath].push(item); | |
+ return store; | |
+ }, {}); | |
+ | |
+ for (const source of Object.keys(groupedObjects)) { | |
+ for (const resourceType of Object.keys(groupedObjects[source])) { | |
+ for (const [resourcePath, items] of Object.entries(groupedObjects[source][resourceType])) { | |
+ const resource = collection.createResource(resourceType, resourcePath, items); | |
+ if (resource.versions.length===0) continue; | |
+ if (!resources[source]) resources[source]={} | |
+ if (!resources[source][resourceType]) resources[source][resourceType]={} | |
+ resources[source][resourceType][resourcePath] = resource; | |
+ } | |
+ } | |
+ } | |
+ } | |
+ | |
+ let output = JSON.stringify(resources, null, args.indent ? 2 : null); | |
+ if (args.gzip) { | |
+ output = await gzip(output) | |
+ } | |
+ fs.writeFileSync(args.output, output); | |
+} | |
\ No newline at end of file | |
-- | |
2.42.0 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment