This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import * | |
import multiprocessing as mp | |
import multiprocessing.connection | |
def map_per_process(fn, input_sequence: Iterable) -> Iterable: | |
pipeno_to_pipe: Dict[int, multiprocessing.connection.Connection] = {} | |
pipeno_to_process: Dict[int, mp.Process] = {} | |
def process_one_item(send_pipe: multiprocessing.connection.Connection, item): | |
try: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import * | |
import random | |
def shuffle_iterable(i: Iterable, pool_size: int = 1024) -> Iterable: | |
import random | |
i = iter(i) | |
pool = [] | |
# fill up the pool | |
for item in i: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dill | |
import lmdb | |
import functools | |
from typing import * | |
import io | |
import mmh3 | |
def memoize(exceptions: Optional[List] = None, version = None): | |
if exceptions is None: | |
exceptions = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ExtractReferences referenceExtractor; | |
try( | |
final InputStream gazetteerIs = new FileInputStream(Parser.getDefaultGazetteer().toFile()); | |
final DataInputStream bibModelIs = new DataInputStream(new FileInputStream(Parser.getDefaultBibModel().toFile())) | |
) { | |
referenceExtractor = new ExtractReferences(gazetteerIs, bibModelIs); | |
} | |
List<String> referenceLines = Arrays.asList("Reference 1", "Reference 2", "etc."); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
case class CountedReference[T <: AutoCloseable](counted: ReferenceCounted[T]) extends AutoCloseable { | |
counted.inc() | |
override def close() = counted.dec() | |
def get = counted.inner | |
} | |
case class ReferenceCounted[T <: AutoCloseable](inner: T) { | |
private val count = new AtomicInteger(0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object SuffixTree extends App { | |
case class TreeNode(children: Map[Char, TreeNode] = Map.empty, end: Boolean = false) { | |
def addString(string: CharSequence): TreeNode = { | |
if(string.length() == 0) { | |
this.copy(end = true) | |
} else { | |
val c = string.charAt(0) | |
val existingNode = children.getOrElse(c, TreeNode()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
logger.info(s"Finding out how many files to process in $inputDirectory") | |
// work around Java's dumb file IO methods | |
def forEachFileToProcess(f: Path => Unit): Unit = { | |
val filenamePattern = "^([a-f0-9]{40})\\.pdf$"r | |
val visitor = new FileVisitor[Path] { | |
override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = | |
FileVisitResult.SKIP_SUBTREE | |
override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.allenai.common.Resource | |
import com.amazonaws.services.s3.AmazonS3Client | |
import java.net.URI | |
import java.nio.file.{Path, Files} | |
object IO { | |
private val s3 = new AmazonS3Client() | |
private val acceptableUrlSchemes = Set("s3", "s3n", "s3a") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Returns the first failure or the first success out of an iterator of Try, taking care not to | |
* exhaust the iterator when possible. | |
* | |
* @param tries The iterator of Try. Must not be empty. | |
* @param defaultFailure The return value to use if all tries left in the iterators are | |
* failures. If this is None, use the first failure from the iterator. | |
* @return the first success out of the iterator, or the first failure if there is no success | |
*/ | |
@tailrec | |
def firstSuccessOrFirstFailure[A]( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
perl -pe 'BEGIN{ $/="}{" } s/}{/}\n{/g' |