Created
October 26, 2016 21:02
-
-
Save arq5x/8d9c2767d2495ba4b9bf6f555d29c088 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# mnake sure all rows have 99 fields | |
$ awk 'BEGIN{FS="\t"} {print NF}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | uniq | |
99 | |
# how many HET (1) and HOM_ALT (3) genotypes were there? | |
$ awk 'BEGIN{FS="\t"} {print $99}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | sort | uniq -c | |
# get rid of headers except for the first one | |
(head -n 1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt; grep -v gt_types /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt) | |
# make sure there are no LOW entries (note grep -w for specifificty) | |
(head -n 1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt; grep -v gt_types /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt) | grep -w LOW | |
# what col number is the gene column? | |
head -1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | tr "\t" "\n" | cat -n | grep gene | |
20 gene | |
# print, in descending order, the number of variants observed per gene | |
awk '{print $20}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | sort | uniq -c | awk '{print $2"\t"$1}' | sort -k2,2nr | head -n 50 | |
MUC16 25126 | |
MUC6 20998 | |
AHNAK2 8647 | |
TTN 8244 | |
MUC5B 6907 | |
MUC19 6872 | |
FLG 6681 | |
NBPF10 6375 | |
ANKRD36 5687 | |
HRNR 5576 | |
NBPF1 5472 | |
FAM230A 4745 | |
PABPC3 4733 | |
FAM182B 4497 | |
FRG1B 4409 | |
PDE4DIP 4334 | |
TAS2R31 4208 | |
IGFN1 4176 | |
FCGBP 3925 | |
OBSCN 3869 | |
MUC2 3636 | |
AHNAK 3621 | |
TAS2R43 3586 | |
SERPINA1 3432 | |
CTD-3088G3.8 3371 | |
TAS2R19 3115 | |
ANKRD36C 3096 | |
MKI67 3073 | |
ANKRD30B 2986 | |
TAS2R46 2905 | |
NBPF14 2796 | |
GOLGA6L2 2717 | |
NEB 2652 | |
PRAMEF1 2621 | |
TCHH 2564 | |
FLG2 2548 | |
KRT18 2455 | |
ZNF257 2412 | |
NBPF12 2374 | |
PLIN4 2371 | |
NBPF9 2349 | |
FAM186A 2348 | |
PRAMEF2 2345 | |
PRAMEF4 2251 | |
OR4A16 2247 | |
GXYLT1 2228 | |
CNN2 2200 | |
FSIP2 2194 | |
RP11-683L23.1 2173 | |
LILRA6 2137 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment