Skip to content

Instantly share code, notes, and snippets.

@ChrisBeaumont
Created April 6, 2015 23:30
Show Gist options
  • Save ChrisBeaumont/bc5769e5e9578a37f64d to your computer and use it in GitHub Desktop.
Save ChrisBeaumont/bc5769e5e9578a37f64d to your computer and use it in GitHub Desktop.
arup duplicates
In [40]: from collections import defaultdict
In [41]: data = defaultdict(list)
In [42]: for row in json.load(open('arup.json')):
data[getkey(row)].append(row)
....:
In [43]: for vals in data.values():
if not all(v == vals[0] for v in vals):
print json.dumps(vals, indent=2, sort_keys=True)
print '\n-----------------------------\n\n'
....:
[
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/20039896",
"pmid": [
"20039896",
"17344846"
],
"protein_change": "p.V292M",
"sequence_change": "c.874G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43601830,
"ref": "G"
},
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/21655256",
"pmid": [
"21655256"
],
"protein_change": "p.V292M",
"sequence_change": "c.874G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43601830,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "C",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=15531714&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"16705552",
"15531714"
],
"protein_change": "p.M918T",
"sequence_change": "c.2753T>C"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43617416,
"ref": "T"
},
{
"alt": "C",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=7906866&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"19240193",
"8880581",
"10445857",
"7906417",
"7906866",
"7824936",
"15281979",
"9242375",
"10679286",
"17848262",
"22992277"
],
"protein_change": "p.M918T",
"sequence_change": "c.2753T>C"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43617416,
"ref": "T"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=11230481&ordinalpos=7&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"11230481",
"12016484",
"12116277"
],
"protein_change": "p.E768D",
"sequence_change": "c.2304G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613840,
"ref": "G"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.arup.utah.edu/dev/database/MEN2/unpublished.html",
"pmid": null,
"protein_change": "p.E768D",
"sequence_change": "c.2304G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613840,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "G",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=12072055&dopt=AbstractPlus",
"pmid": [
"12072055"
],
"protein_change": "p.Q781R",
"sequence_change": "c.2342A>G"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613878,
"ref": "A"
},
{
"alt": "G",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/23468374",
"pmid": [
"23468374"
],
"protein_change": "p.Q781R",
"sequence_change": "c.2342A>G"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613878,
"ref": "A"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=16705552&ordinalpos=3&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"19826964",
"16705552"
],
"protein_change": "p.C620F",
"sequence_change": "c.1859G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609103,
"ref": "G"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=7916559&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"8909322",
"19443294",
"20979234",
"7916559",
"18063059",
"7874109",
"18976013"
],
"protein_change": "p.C620F",
"sequence_change": "c.1859G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609103,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/18322301",
"pmid": [
"18322301"
],
"protein_change": "p.S649L",
"sequence_change": "c.1946C>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609994,
"ref": "C"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/18322301",
"pmid": [
"18322301"
],
"protein_change": "p.S649L",
"sequence_change": "c.1946C>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609994,
"ref": "C"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/19826964",
"pmid": [
"19826964"
],
"protein_change": "p.S649L",
"sequence_change": "c.1946C>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609994,
"ref": "C"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=11230481&ordinalpos=5&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"18322301",
"19906784",
"11230481",
"15320968",
"21551259"
],
"protein_change": "p.S649L",
"sequence_change": "c.1946C>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609994,
"ref": "C"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=18058472&ordinalpos=1&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"18058472"
],
"protein_change": "p.S649L",
"sequence_change": "c.1946C>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609994,
"ref": "C"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=14718397&ordinalpos=1&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"14718397"
],
"protein_change": "p.D631D",
"sequence_change": "c.1893C>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609941,
"ref": "C"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/17344846",
"pmid": [
"17344846"
],
"protein_change": "p.D631D",
"sequence_change": "c.1893C>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609941,
"ref": "C"
}
]
-----------------------------
[
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/21810974",
"pmid": [
"21810974"
],
"protein_change": "p.V648I",
"sequence_change": "c.1942G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609990,
"ref": "G"
},
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=12466368&dopt=AbstractPlus",
"pmid": [
"12466368",
"18209889"
],
"protein_change": "p.V648I",
"sequence_change": "c.1942G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609990,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/21655256",
"pmid": [
"21655256"
],
"protein_change": "p.C634Y",
"sequence_change": "c.1901G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609949,
"ref": "G"
},
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/20080836",
"pmid": [
"23330657",
"20080836",
"17610518"
],
"protein_change": "p.C634Y",
"sequence_change": "c.1901G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609949,
"ref": "G"
},
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=8099202&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"12604374",
"9230192",
"19240193",
"11524247",
"8099202",
"9820617",
"7824936",
"7491519",
"12864791",
"18063059",
"7914213",
"12711285"
],
"protein_change": "p.C634Y",
"sequence_change": "c.1901G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609949,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=7608256&ordinalpos=3&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"14718397",
"16839264",
"18062802",
"22274720",
"10049754",
"7608256",
"24134185"
],
"protein_change": "p.D631Y",
"sequence_change": "c.1891G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609939,
"ref": "G"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=11149622&ordinalpos=13&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"11149622"
],
"protein_change": "p.D631Y",
"sequence_change": "c.1891G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609939,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=9506724&dopt=Abstract",
"pmid": [
"18058472",
"16388093",
"17483988",
"12566528",
"17021738",
"18062802",
"9506724",
"9090527",
"12205548",
"19826964",
"19906784",
"17102091",
"15753368"
],
"protein_change": "p.Y791F",
"sequence_change": "c.2372A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613908,
"ref": "A"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/19826964",
"pmid": [
"19826964"
],
"protein_change": "p.Y791F",
"sequence_change": "c.2372A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613908,
"ref": "A"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=16705552&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"19826964",
"16705552"
],
"protein_change": "p.Y791F",
"sequence_change": "c.2372A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613908,
"ref": "A"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.arup.utah.edu/database/MEN2/unpublished.html",
"pmid": null,
"protein_change": "p.Y791F",
"sequence_change": "c.2372A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613908,
"ref": "A"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=15531714&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"19826964",
"16705552",
"15531714"
],
"protein_change": "p.Y791F",
"sequence_change": "c.2372A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613908,
"ref": "A"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/20080836",
"pmid": [
"23330657",
"20080836",
"17610518"
],
"protein_change": "p.Y791F",
"sequence_change": "c.2372A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613908,
"ref": "A"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/pubmed/17344846",
"pmid": [
"17344846"
],
"protein_change": "p.R844L",
"sequence_change": "c.2531G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43615117,
"ref": "G"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=10826520&dopt=Abstract",
"pmid": [
"10826520"
],
"protein_change": "p.R844L",
"sequence_change": "c.2531G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43615117,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=12016484&dopt=AbstractPlus",
"pmid": [
"12016484"
],
"protein_change": "p.V778I",
"sequence_change": "c.2332G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613868,
"ref": "G"
},
{
"alt": "A",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=11732489&ordinalpos=5&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"11732489"
],
"protein_change": "p.V778I",
"sequence_change": "c.2332G>A"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43613868,
"ref": "G"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=15858153&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"15858153"
],
"protein_change": "p.D631V",
"sequence_change": "c.1892A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609940,
"ref": "A"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Uncertain",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=14718397&ordinalpos=1&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"14718397"
],
"protein_change": "p.D631V",
"sequence_change": "c.1892A>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43609940,
"ref": "A"
}
]
-----------------------------
[
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=pubmed&Cmd=ShowDetailView&TermToSearch=7784092&ordinalpos=2&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum",
"pmid": [
"7784092",
"11114642",
"9384613",
"20497437",
"16813623",
"10235148",
"16343097",
"11932300",
"15741265"
],
"protein_change": "p.V804L",
"sequence_change": "c.2410G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43614996,
"ref": "G"
},
{
"alt": "T",
"annotations": [
{
"annotator": "arup",
"data": {
"classification": "Pathogenic",
"external_links": "http://www.arup.utah.edu/database/MEN2/unpublished.html",
"pmid": null,
"protein_change": "p.V804L",
"sequence_change": "c.2410G>T"
},
"transcript": "NM_020630.4"
}
],
"chrom": "chr10",
"genome": "hg19",
"offset": 43614996,
"ref": "G"
}
]
-----------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment