This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from heapq import heappush, heappop | |
""" | |
Our internal analytics system relies heavily on producing sorted streams of | |
(key: string, payload: bytearray). | |
In the following, I will forget the payload as they are not important here. | |
We will only consider a stream of sorted keys. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(alloc_system)] | |
extern crate alloc_system; | |
extern crate fst; | |
use fst::set::* ; | |
struct A<'a> { | |
stream: StreamBuilder<'a>, | |
} | |
impl<'a> A<'a> { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Default)] | |
pub struct Token { | |
pub offset_from: usize, | |
pub offset_to: usize, | |
pub position: usize, | |
pub term: String, | |
} | |
pub trait TokenStream { | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.type _ZN8rust_out13binary_search17hd76cf76916892235E,@function_ZN8rust_out13binary_search17hd76cf76916892235E: | |
.cfi_startproc | |
movl 256(%rdi), %ecx | |
movl $64, %edx | |
xorl %eax, %eax | |
cmpl %ecx, %esi | |
cmovgeq %rdx, %rax | |
movl 128(%rdi,%rax,4), %ecx | |
leaq 32(%rax), %rdx | |
cmpl %ecx, %esi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let start = 0; | |
let end = NUM_DOCS_PER_BLOCK; | |
let mut half = NUM_DOCS_PER_BLOCK / 2; | |
for _ in 0..7 { | |
let mid = start + half; | |
let doc = block_docs[mid]; | |
let cmp = (doc > target); | |
(start, end) = | |
if (doc <= target) { | |
(mid, end) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use tantivy::*; | |
use tantivy::postings::SkipResult; | |
fn extract_termfreq(searcher: &Searcher, term: &Term, doc_address: &DocAddress) -> Option<u32> { | |
searcher | |
.segment_reader(doc_address.segment_ord() as usize) | |
.read_postings(term, SegmentPostingsOption::Freq) | |
.and_then(|mut postings| { | |
if postings.skip_next(doc_address.doc()) == SkipResult::Reached { | |
Some(postings.term_freq()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import Counter, defaultdict | |
import hashlib | |
import time | |
from multiprocessing import Pool | |
WHITESPACE = re.compile("\W+") | |
def iter_shingles(desc): | |
md5 = hashlib.md5() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
from collections import defaultdict | |
from copy import deepcopy | |
import itertools | |
from math import log | |
c = 1.22179 | |
def make_str_to_edge(salts, n_vertices): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
def rotate(circle): | |
return circle[1:] + circle[:1] | |
def _swap(circle_list, i, j): | |
val_i = circle_list[i] | |
val_j = circle_list[j] | |
circle_list[j] = val_i | |
circle_list[i] = val_j |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import Counter, defaultdict | |
import hashlib | |
import time | |
WHITESPACE = re.compile("\W+") | |
def iter_shingles(desc): | |
md5 = hashlib.md5() |