Skip to content

Instantly share code, notes, and snippets.

@d33tah
Last active July 13, 2018 00:36
Show Gist options
  • Save d33tah/25db3f7e00970b9bd70cf0529fe77831 to your computer and use it in GitHub Desktop.
Save d33tah/25db3f7e00970b9bd70cf0529fe77831 to your computer and use it in GitHub Desktop.
[package]
name = "publicsuffixtest"
version = "0.1.0"
authors = ["Jacek Wielemborek <[email protected]>"]
[[bin]]
name = "publicsuffixtest"
path = "main.rs"
[dependencies]
psl = "*"
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y cargo curl jq libssl-dev pkg-config time pigz && apt-get clean
RUN curl https://publicsuffix.org/list/public_suffix_list.dat -o public_suffix_list.dat
RUN curl -s https://opendata.rapid7.com/sonar.rdns_v2/ | \
grep 'href="/sonar.rdns_v2/' | cut -d'"' -f2 > url.txt
RUN curl --location https://opendata.rapid7.com/`cat url.txt` \
| pigz -dc | head -n 1M | jq -r .value > domains.txt
RUN apt-get update && apt-get -y install pypy && apt-get clean
RUN curl -O https://bootstrap.pypa.io/get-pip.py && pypy get-pip.py
RUN pypy -m pip install publicsuffix
# https://stackoverflow.com/a/38261124/1091116
ARG CACHE_DATE=not_a_date
ADD ./Cargo.toml .
ADD ./main.rs .
RUN time cargo build --release --quiet
ADD ./main.py .
# warm up cache:
RUN cat domains.txt > /dev/null
RUN time ./target/release/publicsuffixtest public_suffix_list.dat < domains.txt | wc -l
RUN time pypy main.py public_suffix_list.dat < domains.txt | wc -l
import codecs
import sys
from publicsuffix import PublicSuffixList
psl_file = codecs.open(sys.argv[1], encoding='utf8')
psl = PublicSuffixList(psl_file)
for line in sys.stdin:
print(psl.get_public_suffix(line.rstrip()))
extern crate psl;
use std::io::{self, BufRead, Write};
use psl::{Psl, List};
fn main() {
let stdout = io::stdout();
let mut handle = stdout.lock();
let list = List::new();
let stdin = io::stdin();
for line in stdin.lock().lines() {
let domain_str = line.unwrap();
if let Some(domain) = list.suffix(&domain_str) {
handle.write(domain.as_str().as_bytes()).unwrap();
handle.write(b"\n").unwrap();
};
}
}
Sending build context to Docker daemon 91.14kB
Step 1/16 : FROM ubuntu:18.04
---> 02f9d6707661
Step 2/16 : RUN apt-get update && apt-get install -y cargo curl jq libssl-dev pkg-config time pigz && apt-get clean
---> Using cache
---> 454b504c9e39
Step 3/16 : RUN curl https://publicsuffix.org/list/public_suffix_list.dat -o public_suffix_list.dat
---> Using cache
---> b900e888753f
Step 4/16 : RUN curl -s https://opendata.rapid7.com/sonar.rdns_v2/ | grep 'href="/sonar.rdns_v2/' | cut -d'"' -f2 > url.txt
---> Using cache
---> 59069fc17a3c
Step 5/16 : RUN curl --location https://opendata.rapid7.com/`cat url.txt` | pigz -dc | head -n 1M | jq -r .value > domains.txt
---> Using cache
---> c4e7a9994234
Step 6/16 : RUN apt-get update && apt-get -y install pypy && apt-get clean
---> Using cache
---> 93ba244ae88f
Step 7/16 : RUN curl -O https://bootstrap.pypa.io/get-pip.py && pypy get-pip.py
---> Using cache
---> c6a1f4450451
Step 8/16 : RUN pypy -m pip install publicsuffix
---> Using cache
---> 8380c939048b
Step 9/16 : ARG CACHE_DATE=not_a_date
---> Using cache
---> 1983d1b237c8
Step 10/16 : ADD ./Cargo.toml .
---> Using cache
---> 8b83c08c141e
Step 11/16 : ADD ./main.rs .
---> 370fdc2ca7a1
Step 12/16 : RUN time cargo build --release --quiet
---> Running in dc3653df03a6
888.79user 5.92system 12:54.66elapsed 115%CPU (0avgtext+0avgdata 1438004maxresident)k
0inputs+396136outputs (11968major+2413006minor)pagefaults 0swaps
 ---> 3359e938ed5a
Removing intermediate container dc3653df03a6
Step 13/16 : ADD ./main.py .
---> a7dcf5aa9e0a
Step 14/16 : RUN cat domains.txt > /dev/null
---> Running in f31ebed1354d
---> 897bdc24ab1f
Removing intermediate container f31ebed1354d
Step 15/16 : RUN time ./target/release/publicsuffixtest public_suffix_list.dat < domains.txt | wc -l
---> Running in 068609c698c5
2.42user 1.15system 0:03.57elapsed 99%CPU (0avgtext+0avgdata 2568maxresident)k
0inputs+0outputs (0major+94minor)pagefaults 0swaps
1048576
---> 53451f385480
Removing intermediate container 068609c698c5
Step 16/16 : RUN time pypy main.py public_suffix_list.dat < domains.txt | wc -l
---> Running in e39ca5cdc142
1.58user 0.05system 0:01.66elapsed 98%CPU (0avgtext+0avgdata 88852maxresident)k
472inputs+0outputs (2major+11245minor)pagefaults 0swaps
1048576
---> 86e76c15b8a9
Removing intermediate container e39ca5cdc142
Successfully built 86e76c15b8a9
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment