Skip to content

Instantly share code, notes, and snippets.

@Muzietto
Last active January 8, 2017 10:24
Show Gist options
  • Save Muzietto/7345f9aa910a409fe7eac8bdf93a008d to your computer and use it in GitHub Desktop.
Save Muzietto/7345f9aa910a409fe7eac8bdf93a008d to your computer and use it in GitHub Desktop.
Kinda SumPrefix solution using differences to express distances from relevant targets
function genomicRangeQuery(nucleotides, mins, maxs) {
var result = new Array(mins.length).fill(-1);
var distances = distancesFromLastSeen(nucleotides);
for (var i = 0; i < mins.length; i++) {
var segmentWidth = maxs[i] - mins[i] + 1;
if (distances['A'][maxs[i]] < segmentWidth) {
result[i] = 1;
continue;
}
if (distances['C'][maxs[i]] < segmentWidth) {
result[i] = 2;
continue;
}
if (distances['G'][maxs[i]] < segmentWidth) {
result[i] = 3;
continue;
}
if (distances['T'][maxs[i]] < segmentWidth) {
result[i] = 4;
continue;
}
}
return result;
}
function distancesFromLastSeen(nucleotides) {
var nucs = nucleotides.split('');
var result = {
'A': new Array(nucs.length).fill(-1),
'C': new Array(nucs.length).fill(-1),
'G': new Array(nucs.length).fill(-1),
'T': new Array(nucs.length).fill(-1)
}
var lastSeen_A = -1;
var lastSeen_C = -1;
var lastSeen_G = -1;
var lastSeen_T = -1;
for (var i = 0; i < nucs.length; i++) {
if (nucs[i] === 'A') lastSeen_A = i;
if (nucs[i] === 'C') lastSeen_C = i;
if (nucs[i] === 'G') lastSeen_G = i;
if (nucs[i] === 'T') lastSeen_T = i;
result['A'][i] = i - lastSeen_A;
result['C'][i] = i - lastSeen_C;
result['G'][i] = i - lastSeen_G;
result['T'][i] = i - lastSeen_T;
}
return result;
}
@Muzietto
Copy link
Author

Muzietto commented Jan 8, 2017

Problem is presented here.
Given a DNA sequence e.g. 'AGTACCTGA' we want to know whether a given subsequence contains a given nucleotide without having to parse the whole subsequence at each query.
What are we to do?

@Muzietto
Copy link
Author

Muzietto commented Jan 8, 2017

We preprocess the DNA string storing at each position the distance from the last time we saw each nucleotide. Obviously the preprocess creates a rectangular matrix.

@Muzietto
Copy link
Author

Muzietto commented Jan 8, 2017

For each query we check the last positions in each queried subsequence. If the distance from e.g. 'A' is less than the subsequence width, our subsequence contains at least one adenine.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment