First, let's make ourselves a simple python web server with flask:
from flask import Flask
app = Flask(__name__)
import os
PORT = int(os.getenv('FLASK_PORT', 5000))| <!-- Shamelessly borrowed from bulma docs --> | |
| <div class="container"> | |
| <div class="card"> | |
| <header class="card-header"> | |
| <p class="card-header-title is-translated" translate="cardTitle"> | |
| Component | |
| </p> | |
| </header> | |
| <div class="card-content"> | |
| <div class="content"> |
| def ensure_subscription(subscription_name: str, subscriber: Optional[pubsub_v1.SubscriberClient]=None): | |
| subscriber = subscriber or pubsub_v1.SubscriberClient() | |
| subscription_path = subscriber.subscription_path('MY_GCP_PROJECT', subscription_name) | |
| try: | |
| return subscriber.get_subscription(subscription_path) | |
| except GoogleAPICallError: | |
| logging.warning(f'No subscription {subscription_path} found, creating...') | |
| # ensure topic exists | |
| topic_path = get_pubsub_topic_path(get_publisher('my-topic-name')) |
| echo ".mode csv && .header on && .once report.csv && select * from report;" | sed -E -e 's/\s*\&+\s*/\n/g' | sqlite3 database.sqlite |
| import pandas | |
| import numpy | |
| import csv | |
| from typing import List, Optional | |
| class WordVectorizer: | |
| def __init__(self, embeddings_path: str, embedding_dim: int, limit=None): | |
| with open(embeddings_path) as infile: | |
| # Skip header if this was produced by fasttext, which has metadata on first line |
| #!/usr/bin/env python3.6 | |
| """ | |
| Example usage: | |
| $ python3.6 wvsqlite.py glove.840B.300d.txt | |
| Produces an sqlite database at with byte strings of floats for each word vector, indexed by | |
| token for fast lookup for vocabs much smaller than the embedding vocab (aka most real vocabs). | |
| Float size can be set via FLOAT_BYTES env var, and can be 4 or 8, and LIMIT can be set to take |
| FROM rustlang/rust:nightly AS builder | |
| WORKDIR /app | |
| COPY src/ src/ | |
| COPY Cargo.* ./ | |
| RUN cargo build --release | |
| FROM debian:stretch-slim | |
| COPY --from=builder /app/target/release/api . | |
| CMD ["./api"] |
| # Need wf - install with `cargo install wf` | |
| mkdir splits wfs | |
| echo 'Splitting file into parts...' | |
| split -a 5 -l 200000 $1 splits/split | |
| ls splits/ | parallel 'echo "Counting {}..."; cat splits/{} | wf > wfs/{}_wf.txt' | |
| echo 'Combining split counts...' | |
| python -c 'from tqdm import tqdm; from functools import reduce; from glob import glob; from collections import Counter; of = open("wfs.txt", "w"); wf = reduce(lambda a, b: a + b, (Counter(dict((pair[0], int(pair[1])) for pair in (line.strip().split() for line in open(fpath)))) for fpath in tqdm(glob("wfs/*"))), Counter()); [of.write("{} {}\n".format(key, count)) for key, count in sorted(wf.items(), key=lambda p: -p[1])]' | |
| rm -rf wfs splits | |
| echo 'Word frequencies written to wfs.txt.' |
| import keras | |
| import tensorflow | |
| import numpy | |
| import re | |
| # Capturing group is important so it can be left padded with space (token splitter) | |
| token_pattern = r"([\w']+|[,\.\?;\-\(\)])" | |
| substitution = r" \1" | |
| use yew::events::ChangeData; | |
| use yew::web_sys::File; | |
| use yew::prelude::*; | |
| pub struct MyFileInput { | |
| link: ComponentLink<Self>, | |
| file: Option<File>, | |
| } | |
| pub enum Msg { |