Last active
June 15, 2016 19:18
-
-
Save lindenb/6090786 to your computer and use it in GitHub Desktop.
comparing the md5 sum of g1kv37 vs hg19
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ curl -s "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/human_g1k_v37.fasta.gz" | gunzip -c | java FastaMD5 > a.txt | |
$ curl -s "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz" | gunzip -c | tar Oxvf - 2> /dev/null | java FastaMD5 > b.txt | |
##join | |
$ join -t ' ' -1 2 -2 2 <(sort -t ' ' -k2,2 a.txt ) <(sort -t ' ' -k2,2 b.txt ) | cut -d ' ' -f 1,2,4 | sort -t ' ' -k3,3 | |
#unjoinable | |
$ join -t ' ' -1 2 -2 2 -v 1 -v 2 <(sort -t ' ' -k2,2 a.txt ) <(sort -t ' ' -k2,2 b.txt ) | sort -t ' ' -k2,2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.*; | |
import java.security.MessageDigest; | |
public class FastaMD5 | |
{ | |
public static void main(String args[]) throws Exception | |
{ | |
int len=0; | |
byte[] buffer = new byte[1]; | |
MessageDigest complete = null; | |
for(;;) | |
{ | |
int c=System.in.read(); | |
switch(c) | |
{ | |
case -1: case '>': | |
{ | |
if(complete!=null) | |
{ | |
for(byte b:complete.digest()) | |
{ | |
System.out.print(Integer.toString( (b & 0xff ) + 0x100, 16).substring( 1 )); | |
} | |
System.out.println("\t"+len); | |
complete=null; | |
len=0; | |
} | |
if(c==-1) return; | |
while((c=System.in.read())!=-1 && c!='\n') System.out.print((char)c); | |
System.out.print('\t'); | |
complete=MessageDigest.getInstance("MD5"); | |
len=0; | |
break; | |
} | |
case '\n':case ' ':case '\r': break; | |
default: | |
{ | |
buffer[0]=(byte)Character.toUpperCase(c); | |
complete.update(buffer, 0, 1); | |
++len; | |
break; | |
} | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1b22b98cdeb4a9304cb5d48026a85128 | 1 dna:chromosome chromosome:GRCh37:1:1:249250621:1 | chr1 | |
---|---|---|---|
988c28e000e84c26d552359af1ea2e1d | 10 dna:chromosome chromosome:GRCh37:10:1:135534747:1 | chr10 | |
98c59049a2df285c76ffb1c6db8f8b96 | 11 dna:chromosome chromosome:GRCh37:11:1:135006516:1 | chr11 | |
06cbf126247d89664a4faebad130fe9c | GL000202.1 dna:supercontig supercontig::GL000202.1:1:40103:1 | chr11_gl000202_random | |
51851ac0e1a115847ad36449b0015864 | 12 dna:chromosome chromosome:GRCh37:12:1:133851895:1 | chr12 | |
283f8d7892baa81b510a015719ca7b0b | 13 dna:chromosome chromosome:GRCh37:13:1:115169878:1 | chr13 | |
98f3cae32b2a2e9524bc19813927542e | 14 dna:chromosome chromosome:GRCh37:14:1:107349540:1 | chr14 | |
e5645a794a8238215b2cd77acb95a078 | 15 dna:chromosome chromosome:GRCh37:15:1:102531392:1 | chr15 | |
fc9b1a7b42b97a864f56b348b06095e6 | 16 dna:chromosome chromosome:GRCh37:16:1:90354753:1 | chr16 | |
351f64d4f4f9ddd45b35336ad97aa6de | 17 dna:chromosome chromosome:GRCh37:17:1:81195210:1 | chr17 | |
96358c325fe0e70bee73436e8bb14dbd | GL000203.1 dna:supercontig supercontig::GL000203.1:1:37498:1 | chr17_gl000203_random | |
efc49c871536fa8d79cb0a06fa739722 | GL000204.1 dna:supercontig supercontig::GL000204.1:1:81310:1 | chr17_gl000204_random | |
d22441398d99caf673e9afb9a1908ec5 | GL000205.1 dna:supercontig supercontig::GL000205.1:1:174588:1 | chr17_gl000205_random | |
43f69e423533e948bfae5ce1d45bd3f1 | GL000206.1 dna:supercontig supercontig::GL000206.1:1:41001:1 | chr17_gl000206_random | |
b15d4b2d29dde9d3e4f93d1d0f2cbc9c | 18 dna:chromosome chromosome:GRCh37:18:1:78077248:1 | chr18 | |
f3814841f1939d3ca19072d9e89f3fd7 | GL000207.1 dna:supercontig supercontig::GL000207.1:1:4262:1 | chr18_gl000207_random | |
1aacd71f30db8e561810913e0b72636d | 19 dna:chromosome chromosome:GRCh37:19:1:59128983:1 | chr19 | |
aa81be49bf3fe63a79bdc6a6f279abf6 | GL000208.1 dna:supercontig supercontig::GL000208.1:1:92689:1 | chr19_gl000208_random | |
f40598e2a5a6b26e84a3775e0d1e2c81 | GL000209.1 dna:supercontig supercontig::GL000209.1:1:159169:1 | chr19_gl000209_random | |
d75b436f50a8214ee9c2a51d30b2c2cc | GL000191.1 dna:supercontig supercontig::GL000191.1:1:106433:1 | chr1_gl000191_random | |
325ba9e808f669dfeee210fdd7b470ac | GL000192.1 dna:supercontig supercontig::GL000192.1:1:547496:1 | chr1_gl000192_random | |
a0d9851da00400dec1098a9255ac712e | 2 dna:chromosome chromosome:GRCh37:2:1:243199373:1 | chr2 | |
0dec9660ec1efaaf33281c0d5ea2560f | 20 dna:chromosome chromosome:GRCh37:20:1:63025520:1 | chr20 | |
2979a6085bfe28e3ad6f552f361ed74d | 21 dna:chromosome chromosome:GRCh37:21:1:48129895:1 | chr21 | |
851106a74238044126131ce2a8e5847c | GL000210.1 dna:supercontig supercontig::GL000210.1:1:27682:1 | chr21_gl000210_random | |
a718acaa6135fdca8357d5bfe94211dd | 22 dna:chromosome chromosome:GRCh37:22:1:51304566:1 | chr22 | |
23dccd106897542ad87d2765d28a19a1 | 4 dna:chromosome chromosome:GRCh37:4:1:191154276:1 | chr4 | |
dbb6e8ece0b5de29da56601613007c2a | GL000193.1 dna:supercontig supercontig::GL000193.1:1:189789:1 | chr4_gl000193_random | |
6ac8f815bf8e845bb3031b73f812c012 | GL000194.1 dna:supercontig supercontig::GL000194.1:1:191469:1 | chr4_gl000194_random | |
0740173db9ffd264d728f32784845cd7 | 5 dna:chromosome chromosome:GRCh37:5:1:180915260:1 | chr5 | |
1d3a93a248d92a729ee764823acbbc6b | 6 dna:chromosome chromosome:GRCh37:6:1:171115067:1 | chr6 | |
618366e953d6aaad97dbe4777c29375e | 7 dna:chromosome chromosome:GRCh37:7:1:159138663:1 | chr7 | |
5d9ec007868d517e73543b005ba48535 | GL000195.1 dna:supercontig supercontig::GL000195.1:1:182896:1 | chr7_gl000195_random | |
96f514a9929e410c6651697bded59aec | 8 dna:chromosome chromosome:GRCh37:8:1:146364022:1 | chr8 | |
d92206d1bb4c3b4019c43c0875c06dc0 | GL000196.1 dna:supercontig supercontig::GL000196.1:1:38914:1 | chr8_gl000196_random | |
6f5efdd36643a9b8c8ccad6f2f1edc7b | GL000197.1 dna:supercontig supercontig::GL000197.1:1:37175:1 | chr8_gl000197_random | |
3e273117f15e0a400f01055d9f393768 | 9 dna:chromosome chromosome:GRCh37:9:1:141213431:1 | chr9 | |
868e7784040da90d900d2d1b667a1383 | GL000198.1 dna:supercontig supercontig::GL000198.1:1:90085:1 | chr9_gl000198_random | |
569af3b73522fab4b40995ae4944e78e | GL000199.1 dna:supercontig supercontig::GL000199.1:1:169874:1 | chr9_gl000199_random | |
75e4c8d17cd4addf3917d1703cacaf25 | GL000200.1 dna:supercontig supercontig::GL000200.1:1:187035:1 | chr9_gl000200_random | |
dfb7e7ec60ffdcb85cb359ea28454ee9 | GL000201.1 dna:supercontig supercontig::GL000201.1:1:36148:1 | chr9_gl000201_random | |
7daaa45c66b288847b9b32b964e623d3 | GL000211.1 dna:supercontig supercontig::GL000211.1:1:166566:1 | chrUn_gl000211 | |
563531689f3dbd691331fd6c5730a88b | GL000212.1 dna:supercontig supercontig::GL000212.1:1:186858:1 | chrUn_gl000212 | |
9d424fdcc98866650b58f004080a992a | GL000213.1 dna:supercontig supercontig::GL000213.1:1:164239:1 | chrUn_gl000213 | |
46c2032c37f2ed899eb41c0473319a69 | GL000214.1 dna:supercontig supercontig::GL000214.1:1:137718:1 | chrUn_gl000214 | |
5eb3b418480ae67a997957c909375a73 | GL000215.1 dna:supercontig supercontig::GL000215.1:1:172545:1 | chrUn_gl000215 | |
642a232d91c486ac339263820aef7fe0 | GL000216.1 dna:supercontig supercontig::GL000216.1:1:172294:1 | chrUn_gl000216 | |
6d243e18dea1945fb7f2517615b8f52e | GL000217.1 dna:supercontig supercontig::GL000217.1:1:172149:1 | chrUn_gl000217 | |
1d708b54644c26c7e01c2dad5426d38c | GL000218.1 dna:supercontig supercontig::GL000218.1:1:161147:1 | chrUn_gl000218 | |
f977edd13bac459cb2ed4a5457dba1b3 | GL000219.1 dna:supercontig supercontig::GL000219.1:1:179198:1 | chrUn_gl000219 | |
fc35de963c57bf7648429e6454f1c9db | GL000220.1 dna:supercontig supercontig::GL000220.1:1:161802:1 | chrUn_gl000220 | |
3238fb74ea87ae857f9c7508d315babb | GL000221.1 dna:supercontig supercontig::GL000221.1:1:155397:1 | chrUn_gl000221 | |
6fe9abac455169f50470f5a6b01d0f59 | GL000222.1 dna:supercontig supercontig::GL000222.1:1:186861:1 | chrUn_gl000222 | |
399dfa03bf32022ab52a846f7ca35b30 | GL000223.1 dna:supercontig supercontig::GL000223.1:1:180455:1 | chrUn_gl000223 | |
d5b2fc04f6b41b212a4198a07f450e20 | GL000224.1 dna:supercontig supercontig::GL000224.1:1:179693:1 | chrUn_gl000224 | |
63945c3e6962f28ffd469719a747e73c | GL000225.1 dna:supercontig supercontig::GL000225.1:1:211173:1 | chrUn_gl000225 | |
1c1b2cd1fccbc0a99b6a447fa24d1504 | GL000226.1 dna:supercontig supercontig::GL000226.1:1:15008:1 | chrUn_gl000226 | |
a4aead23f8053f2655e468bcc6ecdceb | GL000227.1 dna:supercontig supercontig::GL000227.1:1:128374:1 | chrUn_gl000227 | |
c5a17c97e2c1a0b6a9cc5a6b064b714f | GL000228.1 dna:supercontig supercontig::GL000228.1:1:129120:1 | chrUn_gl000228 | |
d0f40ec87de311d8e715b52e4c7062e1 | GL000229.1 dna:supercontig supercontig::GL000229.1:1:19913:1 | chrUn_gl000229 | |
b4eb71ee878d3706246b7c1dbef69299 | GL000230.1 dna:supercontig supercontig::GL000230.1:1:43691:1 | chrUn_gl000230 | |
ba8882ce3a1efa2080e5d29b956568a4 | GL000231.1 dna:supercontig supercontig::GL000231.1:1:27386:1 | chrUn_gl000231 | |
3e06b6741061ad93a8587531307057d8 | GL000232.1 dna:supercontig supercontig::GL000232.1:1:40652:1 | chrUn_gl000232 | |
7fed60298a8d62ff808b74b6ce820001 | GL000233.1 dna:supercontig supercontig::GL000233.1:1:45941:1 | chrUn_gl000233 | |
93f998536b61a56fd0ff47322a911d4b | GL000234.1 dna:supercontig supercontig::GL000234.1:1:40531:1 | chrUn_gl000234 | |
118a25ca210cfbcdfb6c2ebb249f9680 | GL000235.1 dna:supercontig supercontig::GL000235.1:1:34474:1 | chrUn_gl000235 | |
fdcd739913efa1fdc64b6c0cd7016779 | GL000236.1 dna:supercontig supercontig::GL000236.1:1:41934:1 | chrUn_gl000236 | |
e0c82e7751df73f4f6d0ed30cdc853c0 | GL000237.1 dna:supercontig supercontig::GL000237.1:1:45867:1 | chrUn_gl000237 | |
131b1efc3270cc838686b54e7c34b17b | GL000238.1 dna:supercontig supercontig::GL000238.1:1:39939:1 | chrUn_gl000238 | |
99795f15702caec4fa1c4e15f8a29c07 | GL000239.1 dna:supercontig supercontig::GL000239.1:1:33824:1 | chrUn_gl000239 | |
445a86173da9f237d7bcf41c6cb8cc62 | GL000240.1 dna:supercontig supercontig::GL000240.1:1:41933:1 | chrUn_gl000240 | |
ef4258cdc5a45c206cea8fc3e1d858cf | GL000241.1 dna:supercontig supercontig::GL000241.1:1:42152:1 | chrUn_gl000241 | |
2f8694fc47576bc81b5fe9e7de0ba49e | GL000242.1 dna:supercontig supercontig::GL000242.1:1:43523:1 | chrUn_gl000242 | |
cc34279a7e353136741c9fce79bc4396 | GL000243.1 dna:supercontig supercontig::GL000243.1:1:43341:1 | chrUn_gl000243 | |
0996b4475f353ca98bacb756ac479140 | GL000244.1 dna:supercontig supercontig::GL000244.1:1:39929:1 | chrUn_gl000244 | |
89bc61960f37d94abf0df2d481ada0ec | GL000245.1 dna:supercontig supercontig::GL000245.1:1:36651:1 | chrUn_gl000245 | |
e4afcd31912af9d9c2546acf1cb23af2 | GL000246.1 dna:supercontig supercontig::GL000246.1:1:38154:1 | chrUn_gl000246 | |
7de00226bb7df1c57276ca6baabafd15 | GL000247.1 dna:supercontig supercontig::GL000247.1:1:36422:1 | chrUn_gl000247 | |
5a8e43bec9be36c7b49c84d585107776 | GL000248.1 dna:supercontig supercontig::GL000248.1:1:39786:1 | chrUn_gl000248 | |
1d78abec37c15fe29a275eb08d5af236 | GL000249.1 dna:supercontig supercontig::GL000249.1:1:38502:1 | chrUn_gl000249 | |
7e0e2e580297b7764e31dbc80c2540dd | X dna:chromosome chromosome:GRCh37:X:1:155270560:1 | chrX |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d89517b400226d3b56e753972a7cad67 chr17_ctg5_hap1 1680828 | |
641e4338fa8d52a5b781bd2a2c08d3c3 chr3 198022430 | |
fa24f81b680df26bcfb6d69b784fbe36 chr4_ctg9_hap1 590426 | |
fe71bc63420d666884f37a3ad79f3317 chr6_apd_hap1 4622290 | |
18c17e1641ef04873b15f40f6c8659a4 chr6_cox_hap2 4795371 | |
2a3c677c426a10e137883ae1ffb8da3f chr6_dbb_hap3 4610396 | |
9d51d4152174461cd6715c7ddc588dc8 chr6_mann_hap4 4683263 | |
efed415dd8742349cb7aaca054675b9a chr6_mcf_hap5 4833398 | |
094d037050cad692b57ea12c4fef790f chr6_qbl_hap6 4611984 | |
3b6d666200e72bcc036bf88a4d7e0749 chr6_ssto_hap7 4928567 | |
d2ed829b8a1628d16cbeee88e88e39eb chrM 16571 | |
1e86411d73e6f00a10590f976be01623 chrY 59373566 | |
fdfd811849cc2fadebc929bb925902e5 3 dna:chromosome chromosome:GRCh37:3:1:198022430:1 198022430 | |
c68f52674c9fb33aef52dcf399755519 MT gi|251831106|ref|NC_012920.1| Homo sapiens mitochondrion, complete genome 16569 | |
1fa3474750af0948bdf97d5a0ee52e51 Y dna:chromosome chromosome:GRCh37:Y:2649521:59034049:1 59373566 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment