Created
February 15, 2018 21:03
-
-
Save anonymous/cd8a0c460a419c2de351ccc48ecc1a3c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get a list of all organisms | |
curl -s "http://rest.kegg.jp/list/organism" > organisms-all.txt | |
# Get just a few of interest | |
cat organisms-all.txt | awk '$2~/^(hsa|mmu|rno|cfa|bta|gga|xla|xtr|dre|dme|cel|ath|ehi|tgo|eco|sau|mtu|mav|cje|ccol)$/' > organisms-of-interest.txt | |
# Get the accession codes for each | |
cut -f1 organisms-of-interest.txt > organisms-of-interest-codes.txt | |
# Make a directory to put all the kgml files downloaded | |
mkdir -p kgml | |
# Write a script that will download all the kgml files for all the organisms of interest | |
cat organisms-of-interest-codes.txt | while read code; do echo "curl 'http://rest.kegg.jp/list/pathway/$code' | cut -f1 | while read path; do curl -o \"kgml/\${path}.xml\" \"http://rest.kegg.jp/get/\${path}/kgml\"; done "; done > scrape-kegg-2-curl-loops.sh | |
# Get in tmux and Run it | |
bash scrape-kegg-2-curl-loops.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment