Created
October 21, 2017 16:41
-
-
Save mikkun/e15cbbbe0f5e32adf349940f88b7440e to your computer and use it in GitHub Desktop.
「シェル芸勉強会 meets バイオインフォマティクス vol.1」解答例
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
「シェル芸勉強会 meets バイオインフォマティクス vol.1」解答例 | |
資料URL: https://bio-shell.connpass.com/event/66089/presentation/ | |
問題URL: https://www.slideshare.net/EmiHattori1/ss-80854726 | |
A1: | |
$ echo 'Gorilla gorilla' | perl -nlE 's/ori/a/g;s/a\b/us/g;say' | |
Gallus gallus | |
A2: | |
$ mkdir -p koi/chr{{1..50},MT,Un} | |
$ ls koi/ | |
chr1 chr12 chr15 chr18 chr20 chr23 chr26 chr29 chr31 chr34 chr37 chr4 chr42 chr45 chr48 chr50 chr8 chrUn | |
chr10 chr13 chr16 chr19 chr21 chr24 chr27 chr3 chr32 chr35 chr38 chr40 chr43 chr46 chr49 chr6 chr9 | |
chr11 chr14 chr17 chr2 chr22 chr25 chr28 chr30 chr33 chr36 chr39 chr41 chr44 chr47 chr5 chr7 chrMT | |
A3: | |
$ awk '!/^#/{if(length($4)<length($5)&&$NF~/1\/1/)print}' merge-test-c.vcf | |
1 3199812 . G GT . . . GT 1/1 | |
A4: | |
$ ls | perl -nlE '$s=$_;s/^(.*)\.(fa|mfa|fas|fasta)$/$1.fa/;s/^(.*)\.(fq|fastq)$/$1.fq/;say "mv -f $s $_"' | sh 2>/dev/null | |
$ ls | |
fa.fa.fas.fas.fas.fa fas.fas.fastq.fastq.mfa.fa fasta.fasta.fq.fq.fas.fa fastq.fq.fq.fas.fasta.fa fq.fq.fasta.fasta.fa.fa | |
fa.fa.fas.fas.fastq.fa fas.fasta.fa.fa.fas.fa fasta.fastq.fq.fq.fas.fa fastq.fq.fq.fasta.fasta.fa fq.fq.fasta.fasta.fa.fq | |
fa.fa.fas.fas.fastq.fq fas.fasta.fa.fa.fq.fa fasta.fastq.fq.fq.fasta.fa fastq.fq.mfa.fasta.fasta.fa fq.fq.mfa.fasta.fasta.fa | |
fa.fa.mfa.fas.fastq.fq fas.fasta.fa.fa.mfa.fa fasta.fastq.fq.fq.mfa.fa fastq.fq.mfa.fasta.fasta.fq fq.mfa.fasta.fasta.fa.fq | |
fa.fa.mfa.mfa.fastq.fq fas.fastq.fa.mfa.mfa.fa fasta.fq.fq.fas.fa.fa fastq.mfa.fasta.fastq.fq.fq fq.mfa.fasta.fasta.fq.fq | |
fa.fas.fas.fastq.fa.fa fas.fastq.fastq.fa.mfa.fa fasta.fq.fq.fas.fas.fa fastq.mfa.mfa.fasta.fasta.fq mfa.fas.fastq.fastq.fq.fa | |
fa.fas.fas.fastq.fastq.fa fas.fastq.fastq.fq.mfa.fa fasta.fq.fq.fas.fasta.fa fastq.mfa.mfa.fasta.fastq.fq mfa.fas.fastq.fastq.mfa.fa | |
fa.fq.fas.fa.fa.fa fas.fastq.fastq.mfa.mfa.fa fastq.fa.fa.mfa.fas.fq fq.fas.fas.fa.fa.fa mfa.fasta.fasta.fa.fq.fa | |
fa.fq.fas.fas.fa.fa fasta.fa.fa.fa.fas.fa fastq.fa.mfa.mfa.fas.fq fq.fas.fas.fasta.fa.fa mfa.fasta.fasta.fq.fq.fa | |
fa.fq.fas.fas.fastq.fa fasta.fa.fa.fas.fas.fq fastq.fa.mfa.mfa.fastq.fq fq.fas.fas.fastq.fa.fa mfa.fasta.fasta.fq.fq.fq | |
fa.mfa.mfa.fasta.fastq.fq fasta.fa.fa.mfa.fas.fq fastq.fastq.fastq.mfa.mfa.fa fq.fas.fasta.fa.fa.fa mfa.fasta.fastq.fq.fq.fa | |
fa.mfa.mfa.fastq.fastq.fq fasta.fa.fq.fas.fas.fa fastq.fastq.fq.fq.fasta.fa fq.fas.fasta.fasta.fa.fq mfa.fastq.fastq.fq.fq.fa | |
fas.fa.fa.fa.mfa.fa fasta.fa.fq.fas.fas.fq fastq.fastq.fq.mfa.fasta.fa fq.fasta.fasta.fa.fq.fa mfa.fastq.fastq.fq.mfa.fa | |
fas.fa.fa.mfa.fas.fq fasta.fasta.fa.fa.fas.fa fastq.fastq.mfa.mfa.fasta.fa fq.fq.fas.fas.fa.fa mfa.mfa.fasta.fastq.fq.fq | |
fas.fas.fa.fa.mfa.fa fasta.fasta.fa.fq.fas.fa fastq.fastq.mfa.mfa.fasta.fq fq.fq.fas.fasta.fa.fa mfa.mfa.fastq.fastq.fq.fa | |
fas.fas.fastq.fa.mfa.fa fasta.fasta.fastq.fq.fq.fa fastq.fastq.mfa.mfa.mfa.fa fq.fq.fas.fasta.fasta.fa mfa.mfa.fastq.fq.mfa.fa | |
A5: | |
$ cat linda.txt | awk '!/^#/' | rev | awk -F, '{print $2}' | rev | |
Linda annamensis | |
Linda annulicornis | |
Linda assamensis | |
Linda atricornis | |
Linda bimaculicollis | |
Linda femorata | |
Linda fraterna | |
Linda gracilicornis | |
Linda javaensis | |
Linda macilenta | |
Linda major | |
Linda nigroscutata | |
Linda pyritosa | |
Linda rubescens | |
Linda semiatra | |
Linda semivittata | |
Linda signaticornis | |
Linda stolata | |
Linda subannulicornis | |
Linda zayuensis | |
A6-1: | |
$ join -t $'\t' -a 1 -a 2 -o 0 1.2 2.2 -e '-' exp_survivor.txt exp_walker.txt | |
GeneA 1.1 0.1 | |
GeneB 29.1 0.1 | |
GeneC 0.3 - | |
GeneD - 500.6 | |
GeneE 4.2 4.2 | |
GeneF 3.9 - | |
GeneG - 34.7 | |
A6-2: | |
$ awk '{k[$1]=k[$1]" "$2"@"FILENAME}END{for(v in k){print v,k[v]}}' exp_survivor.txt exp_walker.txt | awk '{if(NF==3){print}else{if($2~/exp_survivor\.txt/){print $1,$2,"-"}else{print $1,"-",$2}}}' | sed 's/@exp_\(survivor\|walker\)\.txt//g' | sed 's/ \+/\t/g' | sort -k1,1 | |
GeneA 1.1 0.1 | |
GeneB 29.1 0.1 | |
GeneC 0.3 - | |
GeneD - 500.6 | |
GeneE 4.2 4.2 | |
GeneF 3.9 - | |
GeneG - 34.7 | |
A7: | |
$ awk 'NR!=1{if($9>=0.9)print $1"\t"$2-1"\t"$2}' tommo_EGFR.txt | |
chr7 55127783 55127784 | |
chr7 55128206 55128207 | |
chr7 55135472 55135473 | |
chr7 55136396 55136397 | |
chr7 55156418 55156419 | |
chr7 55261341 55261342 | |
chr7 55266416 55266417 | |
chr7 55272825 55272826 | |
chr7 55276009 55276010 | |
chr7 55279279 55279280 | |
chr7 55294240 55294241 | |
chr7 55294677 55294678 | |
chr7 55297655 55297656 | |
chr7 55299233 55299234 | |
chr7 55305315 55305316 | |
chr7 55305750 55305751 | |
chr7 55305823 55305824 | |
chr7 55306471 55306472 | |
chr7 55306624 55306625 | |
chr7 55308929 55308930 | |
chr7 55309731 55309732 | |
chr7 55312339 55312340 | |
A8: | |
$ jq -c '.results[] | [.accession, .symbol, .definition]' fridayThe13th.json | sed 's/^\["//; s/","/\t/g; s/"\]$//' | |
NM_001303620 DNASE1L1 Homo sapiens deoxyribonuclease 1 like 1 (DNASE1L1), transcript variant 5, mRNA. | |
NM_001009934 DNASE1L1 Homo sapiens deoxyribonuclease 1 like 1 (DNASE1L1), transcript variant 4, mRNA. | |
NM_001009933 DNASE1L1 Homo sapiens deoxyribonuclease 1 like 1 (DNASE1L1), transcript variant 3, mRNA. | |
NM_006730 DNASE1L1 Homo sapiens deoxyribonuclease 1 like 1 (DNASE1L1), transcript variant 1, mRNA. | |
NM_001009932 DNASE1L1 Homo sapiens deoxyribonuclease 1 like 1 (DNASE1L1), transcript variant 2, mRNA. | |
NM_019078 UGT1A5 Homo sapiens UDP glucuronosyltransferase family 1 member A5 (UGT1A5), mRNA. | |
NM_001144952 SDK2 Homo sapiens sidekick cell adhesion molecule 2 (SDK2), mRNA. | |
NM_205862 UGT1A6 Homo sapiens UDP glucuronosyltransferase family 1 member A6 (UGT1A6), transcript variant 2, mRNA. | |
NM_019077 UGT1A7 Homo sapiens UDP glucuronosyltransferase family 1 member A7 (UGT1A7), mRNA. | |
NM_001072 UGT1A6 Homo sapiens UDP glucuronosyltransferase family 1 member A6 (UGT1A6), transcript variant 1, mRNA. | |
NM_001079653 SDK1 Homo sapiens sidekick cell adhesion molecule 1 (SDK1), transcript variant 2, mRNA. | |
NM_019093 UGT1A3 Homo sapiens UDP glucuronosyltransferase family 1 member A3 (UGT1A3), mRNA. | |
NM_019076 UGT1A8 Homo sapiens UDP glucuronosyltransferase family 1 member A8 (UGT1A8), mRNA. | |
NM_152744 SDK1 Homo sapiens sidekick cell adhesion molecule 1 (SDK1), transcript variant 1, mRNA. | |
A9: | |
$ awk '!/^@/&&$2!=0{times[$1]+=1;lines[$1]=lines[$1]"%NEXT%"$0}END{for(i in times)print times[i],lines[i]}' test_input_1_a.sam | awk '/^2/' | sed 's/^2 %NEXT%//;s/%NEXT%/\n/' | |
r000 99 insert 50 30 10M = 80 30 ATTTAGCTAC AAAAAAAAAA RG:Z:cow PG:Z:bull | |
r000 211 insert 80 30 10M = 50 -30 CCCAATCATT AAAAAAAAAA RG:Z:cow PG:Z:bull | |
r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:fish PG:Z:colt | |
r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:fish PG:Z:colt | |
A10: | |
$ clear; echo 'This message will self-destruct in five seconds.'; sleep 5; clear |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment