First, download the data files using a BitTorrent client:
aria2c https://academictorrents.com/download/4dcfdf804775f2d92b7a030305fa0350ebef6f3e.torrent
Next, convert the data files to a single newline-delimited JSON file:
deno run process.ts
[...document.querySelectorAll('div,main,body')].forEach(node => { | |
node.style.position = 'relative' | |
node.style.height = 'auto' | |
node.style.overflowY = 'visible' | |
}); | |
[...document.querySelectorAll('button')].forEach(node => { | |
node.remove() | |
}); |
get_iplayer --pid m001d2h4 --subtitles --output "m001d2h4" | |
ffmpeg -i m001d2h4/Only_Connect_Series_18_-_07._Scrummagers_v_Crustaceans_m001d2h4_original.mp4 -vf "subtitles=m001d2h4/Only_Connect_Series_18_-_07._Scrummagers_v_Crustaceans_m001d2h4_original.srt" -ss 17:49 -t 5 -copyts output.mov |
lineReader = () => { | |
let buffer = ""; | |
return new TransformStream({ | |
transform(chunk, controller) { | |
buffer += chunk; | |
const parts = buffer.split("\n"); | |
parts.slice(0, -1).forEach((part) => controller.enqueue(part)); | |
buffer = parts[parts.length - 1]; | |
}, |
import { parse } from 'https://deno.land/x/[email protected]/mod.ts' | |
import { readableStreamFromIterable } from 'https://deno.land/[email protected]/io/streams.ts' | |
import { Database } from 'https://deno.land/x/[email protected]/mod.ts' | |
import ProgressBar from 'https://deno.land/x/[email protected]/mod.ts' | |
let counter = 0 | |
const progress = new ProgressBar({ | |
title: 'processing:', | |
interval: 100, |
First, download the data files using a BitTorrent client:
aria2c https://academictorrents.com/download/4dcfdf804775f2d92b7a030305fa0350ebef6f3e.torrent
Next, convert the data files to a single newline-delimited JSON file:
deno run process.ts
export const cloudStorageJsonLinesWriter = (url: string) => { | |
// gcloud components install alpha | |
const process = Deno.run({ | |
cmd: [ | |
'gcloud', | |
'alpha', | |
'storage', | |
'cp', | |
'-', | |
url, |
import { TextLineStream } from 'https://deno.land/[email protected]/streams/mod.ts' | |
// const input = await jsonLinesReader('input.jsonl.gz') | |
// const output = await jsonLinesWriter('output.jsonl.gz') | |
// for await (const item of input) { | |
//// do something | |
// await output.write(item) | |
// } |
const createInputReader = async (path: string) => { | |
const file = await Deno.open(path, { | |
read: true, | |
}) | |
return file.readable | |
.pipeThrough(new DecompressionStream('gzip')) | |
.pipeThrough(new TextDecoderStream()) | |
.pipeThrough(new TextLineStream()) | |
.pipeThrough( |
// adapted from https://github.com/distribution/distribution/issues/1252#issuecomment-274944254 | |
const IMAGE = 'example/foo' | |
const TAG = 'latest' | |
const authorise = async () => { | |
const url = new URL('https://auth.docker.io/token') | |
url.searchParams.set('scope', `repository:${IMAGE}:pull`) | |
url.searchParams.set('service', 'registry.docker.io') | |
const response = await fetch(url) |
get_iplayer --file-prefix="<pid>" --output=$PWD --pid $PID --tv-quality sd --start $START --stop $STOP |