Last active
March 27, 2021 08:03
-
-
Save slimlime/58a3b9ffbb09337f58a255453a8f19f1 to your computer and use it in GitHub Desktop.
Generate non-overlapping (gapped) sequences. Partitioned sequences auto-incrementing numbers. generate-non-overlapping-sequences.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Quick no doco utility fn. Not sure what the SEO for searchability of this concept is. | |
* Reminds me of a hacky workaround for distributed database reconciliation of separately incrementing distinct IDs | |
* Can probably shift some parameters to different places. | |
* Functional! FP | |
* - TODO: Rename to "get____" function name convention. ref transp | |
*/ | |
function generateSteppedSequence( | |
stepNumberOfSequences, | |
originalBaseLength, | |
baseOffsetStartingNumber, | |
sequenceIdentifier | |
) { | |
// Valid range sequenceIdentifier should be 1 2 3 if base length is 3 | |
const sequenceOffsetForNoOverlaps = sequenceIdentifier - 1; | |
const individualPartitionedSteppedLength = Math.ceil(originalBaseLength / stepNumberOfSequences); | |
const separatedNumbers = Array.from( | |
{ | |
length: individualPartitionedSteppedLength, | |
}, | |
(empty, index) => { | |
const calcJump = | |
index * stepNumberOfSequences + | |
sequenceOffsetForNoOverlaps + | |
baseOffsetStartingNumber; | |
return calcJump; | |
} | |
); | |
return separatedNumbers; | |
} | |
function generateNonOverlappingSequences( | |
numSequences, | |
totalBaseLength, | |
baseOffset | |
) { | |
const parentArrayOfSequences = Array.from( | |
{ length: numSequences }, | |
(empty, index) => | |
generateSteppedSequence( | |
numSequences, | |
totalBaseLength, | |
baseOffset, | |
index | |
) | |
); | |
return parentArrayOfSequences; | |
} | |
/** | |
* Building word list | |
* @param listOfSequences {Array<number[]>} the sequences we generated | |
* naming things is hard | |
*/ | |
function getListOfTextNewLinedStringifiedSequencesFromListOfSequences(listOfSequences) { | |
const listOfInterspersedNewLinedSequences = listOfSequences.map(sequence => sequence.join("\n")); | |
return listOfInterspersedNewLinedSequences; | |
} | |
/** | |
* Quick fn | |
*/ | |
function directMapListOfUrlsFromTokenIDs(sequence, strPrefix="https://.../?&blah=") { | |
return sequence | |
.map( | |
(num) => | |
`${strPrefix}${num}` | |
) | |
// New line accumulated into single string | |
.join("\n"); | |
} |
alternative techniques
oop maybe off by one
did not expect notepad++ to freeze on simple regex replace for less than a million lines :D
oof 16 million characters for one of the splits? whoops!
nothing to see here
not generating wordlists 👀
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
numSequences = number of splits