Diagram:
flowchart TB
subgraph Harvest-by-ORCID
direction RL
Dimensions-by-ORCID
OpenAlex-by-ORCID
PubMed-by-ORCID
hostname | ip | provider | |
---|---|---|---|
research.noaa.gov | 3.171.38.80 | Amazon Technologies Inc. | |
research.noaa.gov | 3.171.38.79 | Amazon Technologies Inc. | |
research.noaa.gov | 3.171.38.59 | Amazon Technologies Inc. | |
research.noaa.gov | 3.171.38.3 | Amazon Technologies Inc. | |
epic.noaa.gov | 108.138.64.54 | Amazon.com, Inc. | |
epic.noaa.gov | 108.138.64.32 | Amazon.com, Inc. | |
epic.noaa.gov | 108.138.64.6 | Amazon.com, Inc. | |
epic.noaa.gov | 108.138.64.49 | Amazon.com, Inc. | |
adp.noaa.gov | 18.165.98.15 | Amazon Technologies Inc. |
research.noaa.gov | |
epic.noaa.gov | |
adp.noaa.gov | |
ci.noaa.gov | |
oeab.noaa.gov | |
orta.research.noaa.gov | |
testbeds.noaa.gov | |
qosap.research.noaa.gov | |
oss.research.noaa.gov | |
eeo.oar.noaa.gov |
#!/usr/bin/env python3 | |
import fileinput | |
import re | |
import subprocess | |
from functools import cache | |
@cache | |
def ips(hostname): |
Traceback (most recent call last): | |
File "/Users/edsu/.pyenv/versions/3.13.0/bin/sciop", line 8, in <module> | |
sys.exit(_main()) | |
~~~~~^^ | |
File "/Users/edsu/Projects/sciop/src/sciop/cli/main.py", line 16, in _main | |
main(max_content_width=100) | |
~~~~^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/Users/edsu/.pyenv/versions/3.13.0/lib/python3.13/site-packages/click/core.py", line 1161, in __call__ | |
return self.main(*args, **kwargs) | |
~~~~~~~~~^^^^^^^^^^^^^^^^^ |
#!/usr/bin/env python3 | |
# The program will read WARC or WACZ data looking for Browsertrix text records | |
# and print them out as files using the archived URL as the path. | |
# | |
# You can run it right here from Gist using pipx: | |
# | |
# pipx run https://gist.githubusercontent.com/edsu/89bd2844b9d3d4536e68956b3a16eaef/raw/warc_text.py file1.warc.gz file2.warc.gz | |
# | |
# If you give it a WACZ file it will read any WARC files contained in the WACZ: |
Diagram:
flowchart TB
subgraph Harvest-by-ORCID
direction RL
Dimensions-by-ORCID
OpenAlex-by-ORCID
PubMed-by-ORCID
#!/usr/bin/env python3 | |
# This program will fetch the first page of recently updated Library of Congress | |
# Subject Headings from id.loc.gov and print out the MARC records for them. | |
# | |
# /// script | |
# dependencies = ["requests", "pymarc"] | |
# /// | |
# | |
# see PEP 723 |
#!/usr/bin/env python3 | |
import getpass | |
print(f"Hello {getpass.getuser()}!") |
#!/usr/bin/env -S pipx run | |
# This program walks through the URLs in the sitemap and checks to see if they | |
# are in the Internet Archive Wayback Machine. | |
# | |
# You can run it like: | |
# | |
# pipx run data-usaid-gov-check.py > results.csv | |
# | |
# |
#!/usr/bin/env python3 | |
""" | |
Look up a URL in swap.stanford.edu and print out the collections and crawl | |
SDR object identifiers that contain a snapshot of the URL. | |
""" | |
import sys | |
import json | |
import collections |