Skip to content

Instantly share code, notes, and snippets.

@maelle
Last active July 20, 2023 14:55
Show Gist options
  • Save maelle/9fb6a9c6676ccec796fff9e00ab82979 to your computer and use it in GitHub Desktop.
Save maelle/9fb6a9c6676ccec796fff9e00ab82979 to your computer and use it in GitHub Desktop.
glitter + UniProt
# pak::pak("lvaudor/glitter")
library("glitter")
# Select all taxa from the UniProt taxonomy
# doesn't work yet https://github.com/lvaudor/glitter/issues/144
# Select all bacterial taxa and their scientific name from the UniProt taxonomy
# rdfs is a common prefix so built-in
spq_init() %>%
spq_prefix(prefixes = c(
up = "http://purl.uniprot.org/core/",
taxon = "http://purl.uniprot.org/taxonomy/"
)) %>%
spq_add("?taxon a up:Taxon") %>%
spq_mutate(name = up::scientificName(taxon)) %>%
spq_add("?taxon rdfs:subClassOf taxon:2") %>%
spq_head(10) %>%
spq_perform(endpoint = "https://sparql.uniprot.org/sparql")
# Select all UniProt entries that were integrated on the 30th of November 2010
spq_init() %>%
spq_prefix(prefixes = c(up = "http://purl.uniprot.org/core/")) %>%
spq_add("?protein a up:Protein") %>%
spq_add("?protein up:created '2010-11-30'^^xsd:date") %>%
spq_head(10) %>%
spq_perform(endpoint = "https://sparql.uniprot.org/sparql")
# Select reviewed UniProt entries (Swiss-Prot), and their recommended protein name, that have a preferred gene name that contains the text 'DNA'
spq_init() %>%
spq_prefix(prefixes = c(up = "http://purl.uniprot.org/core/")) %>%
spq_add("?protein a up:Protein") %>%
spq_add("?protein up:reviewed true") %>%
spq_add("?protein up:recommendedName ?recommended") %>%
spq_add("?recommended up:fullName ?name") %>%
spq_add("?protein up:encodedBy ?gene") %>%
spq_add("?gene skos:prefLabel ?text") %>%
spq_filter(str_detect(text, 'DNA')) %>%
spq_head(10) %>%
spq_perform(endpoint = "https://sparql.uniprot.org/sparql")
@maelle
Copy link
Author

maelle commented Jul 20, 2023

Results!

# pak::pak("lvaudor/glitter")
library("glitter") 

# Select all taxa from the UniProt taxonomy
# doesn't work yet https://github.com/lvaudor/glitter/issues/144

# Select all bacterial taxa and their scientific name from the UniProt taxonomy
# rdfs is a common prefix so built-in
spq_init() %>%
  spq_prefix(prefixes = c(
    up = "http://purl.uniprot.org/core/",
    taxon = "http://purl.uniprot.org/taxonomy/"
    )) %>%
  spq_add("?taxon a up:Taxon") %>%
  spq_mutate(name = up::scientificName(taxon)) %>%
  spq_add("?taxon rdfs:subClassOf taxon:2") %>%
  spq_head(10) %>%
  spq_perform(endpoint = "https://sparql.uniprot.org/sparql")
#> # A tibble: 10 × 2
#>    name                                               taxon                     
#>    <chr>                                              <chr>                     
#>  1 Pseudomonas sp. A2                                 http://purl.uniprot.org/t…
#>  2 Thioalkalivibrio paradoxus                         http://purl.uniprot.org/t…
#>  3 Bombilactobacillus mellifer                        http://purl.uniprot.org/t…
#>  4 Leuconostoc mesenteroides                          http://purl.uniprot.org/t…
#>  5 Burkholderia pseudomallei MSHR840                  http://purl.uniprot.org/t…
#>  6 Campylobacter fetus subsp. venerealis cfvi03/293   http://purl.uniprot.org/t…
#>  7 'Brassica oleracea' chlorantie phytoplasma         http://purl.uniprot.org/t…
#>  8 Mycobacteroides abscessus subsp. bolletii CRM-0020 http://purl.uniprot.org/t…
#>  9 Mycobacterium sp. N1001                            http://purl.uniprot.org/t…
#> 10 Sphaerisporangium sp. SANK 60911                   http://purl.uniprot.org/t…


# Select all UniProt entries that were integrated on the 30th of November 2010
spq_init() %>%
  spq_prefix(prefixes = c(up = "http://purl.uniprot.org/core/")) %>%
  spq_add("?protein a up:Protein") %>%
  spq_add("?protein up:created '2010-11-30'^^xsd:date") %>%
  spq_head(10) %>%
  spq_perform(endpoint = "https://sparql.uniprot.org/sparql")
#> # A tibble: 10 × 1
#>    protein                               
#>    <chr>                                 
#>  1 http://purl.uniprot.org/uniprot/E1CXN8
#>  2 http://purl.uniprot.org/uniprot/E1CZM6
#>  3 http://purl.uniprot.org/uniprot/E1D0E5
#>  4 http://purl.uniprot.org/uniprot/E1D4Y1
#>  5 http://purl.uniprot.org/uniprot/E1DDK4
#>  6 http://purl.uniprot.org/uniprot/E1DGL0
#>  7 http://purl.uniprot.org/uniprot/E1DLZ5
#>  8 http://purl.uniprot.org/uniprot/E1DMG1
#>  9 http://purl.uniprot.org/uniprot/E1E9V7
#> 10 http://purl.uniprot.org/uniprot/E1EC84

# Select reviewed UniProt entries (Swiss-Prot), and their recommended protein name, that have a preferred gene name that contains the text 'DNA'
spq_init() %>%
  spq_prefix(prefixes = c(up = "http://purl.uniprot.org/core/")) %>%
  spq_add("?protein a up:Protein") %>%
  spq_add("?protein up:reviewed true") %>%
  spq_add("?protein up:recommendedName ?recommended") %>%
  spq_add("?recommended up:fullName ?name") %>%
  spq_add("?protein up:encodedBy ?gene") %>%
  spq_add("?gene skos:prefLabel ?text") %>%
  spq_filter(str_detect(text, 'DNA')) %>%
  spq_head(10) %>%
  spq_perform(endpoint = "https://sparql.uniprot.org/sparql")
#> # A tibble: 10 × 5
#>    gene                                          protein name  text  recommended
#>    <chr>                                         <chr>   <chr> <chr> <chr>      
#>  1 http://purl.uniprot.org/uniprot/O18998#gene-… http:/… Deox… DNAS… http://pur…
#>  2 http://purl.uniprot.org/uniprot/O18998#gene-… http:/… Deox… DNAS… http://pur…
#>  3 http://purl.uniprot.org/uniprot/O18998#gene-… http:/… Deox… DNAS… http://pur…
#>  4 http://purl.uniprot.org/uniprot/O18998#gene-… http:/… Deox… DNAS… http://pur…
#>  5 http://purl.uniprot.org/uniprot/A5D7F5#gene-… http:/… DnaJ… DNAJ… http://pur…
#>  6 http://purl.uniprot.org/uniprot/A5D7F5#gene-… http:/… DnaJ… DNAJ… http://pur…
#>  7 http://purl.uniprot.org/uniprot/A5D7F5#gene-… http:/… DnaJ… DNAJ… http://pur…
#>  8 http://purl.uniprot.org/uniprot/A5D7F5#gene-… http:/… DnaJ… DNAJ… http://pur…
#>  9 http://purl.uniprot.org/uniprot/Q9NYC9#gene-… http:/… Dyne… DNAH9 http://pur…
#> 10 http://purl.uniprot.org/uniprot/Q9NYC9#gene-… http:/… Dyne… DNAH9 http://pur…

Created on 2023-07-20 with reprex v2.0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment