Skip to content

Instantly share code, notes, and snippets.

@mercutio22
Created June 1, 2012 08:03
Show Gist options
  • Save mercutio22/2850122 to your computer and use it in GitHub Desktop.
Save mercutio22/2850122 to your computer and use it in GitHub Desktop.
main search class
from django.views.generic import ListView
from django.db.models import Q
from models import GeneInformation, MirnaInformation
import re
from django.core.exceptions import FieldError
from base.views import Intragenic, Intergenic, HostGenes
class Search(ListView):
paginate_by = 10
queryset = GeneInformation.objects.none()
def get_queryset(self):
if 'terms' in self.request.GET:
query = self.request.GET.get('terms', False)
question = Q()
if not query:
self.queryset = GeneInformation.objects.none() #just an empty qs
elif query.lower() == 'Intragenic miRNAs'.lower():
self.queryset = Intragenic().get_queryset()
elif query.lower() == 'Intergenic miRNAs'.lower():
self.queryset = Intergenic().get_queryset()
elif query.lower() == 'Genes hosting miRNAs'.lower():
return HostGenes().get_queryset()
else:
query = query.strip().split(' ')
# starts with an empty question
question = Q()
# MIRBASE mirna symbol pattern:
pattern = re.compile('mir-?(\d+)|hsa|mirlet|let-?|MI\d+',
re.IGNORECASE)
for keyword in query:
if pattern.match(keyword):
data = MirnaInformation.objects
attempt = self.MirnaQuery(keyword)
else:
data = GeneInformation.objects
attempt = self.GeneQuery(keyword)
question = question | attempt
try:
self.queryset = data.filter(question).distinct()
except FieldError: #the queryset is of a different type
continue
self.queryset
#self.queryset = list(self.queryset)
#self.queryset.sort(cmp=self.compare)
return self.queryset.order_by('symbol')
def MirnaQuery(self, keyword):
keyword = keyword.lower()
data = MirnaInformation.objects
#first, check if keyword is an accession number match!
attempt = Q(accession_id__iexact=keyword)
if not data.filter(attempt).exists():
if '-' not in keyword and 'mir' in keyword:
keyword='-'.join(keyword.partition('mir'))
attempt = Q(symbol__iexact=keyword)
if not data.filter(attempt).exists():
attempt = Q(symbol__regex=keyword + r'[A-Za-z]+')
if not data.filter(attempt).exists():
attempt = Q(symbol__icontains=keyword)
if not data.filter(attempt).exists():
pattern = r'(?P<species>hsa)?-?(?P<family>let|mir)-?(?P<suffix>.*)'
m = re.search(pattern, keyword)
if m:
attempt = Q(symbol__regex=r'{0}-?{1}'.format(
m.groupdict()['family'],
m.groupdict()['suffix'],)
)
return attempt
def GeneQuery(self, keyword):
data = GeneInformation.objects
if keyword.isdigit():
attempt = Q(entrez_id__exact=keyword)
else:
attempt = (
Q(symbol__iexact=keyword) |
Q(ensembl_id__iexact=keyword)
)
if not data.filter(attempt).exists():
attempt = (
Q(symbol__icontains=keyword) |
Q(genesynonyms__symbol__iexact=keyword)
)
if not data.filter(attempt).exists():
"""search description for *exact* match. (cannot use
django's iexact lookup because the description field is
just one big string). I use mysql regex pattern matching instead.
Pattern consists of the keyword wrapped by word boundaries.
Regex pattern syntax is dependent on database backend,
research and change if necessary.)
"""
pattern = r'[[:<:]]{0}[[:>:]]'.format(keyword) #myScaryQuirkyLanguage
# pattern = r'\b{0}\b'.format(keyword) #sqlite3, python syntax
attempt = Q(description__iregex=pattern)
return attempt
def compare(self, gene1, gene2):
"""Compares the length of the official symbols:
returns a positive number if Gene1's symbol > Gene2's,
0 in case they have the same length,
a negative number otherwise.
# breaks regular alfabetical sorting.
"""
return len(gene1.symbol) - len(gene2.symbol)
def get_context_data(self, **kwargs):
context = super(Search, self).get_context_data(**kwargs)
context['GENE'] = False
context['MIRNA'] = False
if self.queryset.model == GeneInformation:
context['GENE'] = True
self.template_name = 'gene_results.html'
elif self.queryset.model == MirnaInformation:
context['MIRNA'] = True
self.template_name = 'mirna_results.html'
context['query'] = self.request.GET.get('terms', '')
context['title'] = 'Miriad Search'
return context
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment