Last active
July 13, 2018 00:36
-
-
Save d33tah/25db3f7e00970b9bd70cf0529fe77831 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "publicsuffixtest" | |
version = "0.1.0" | |
authors = ["Jacek Wielemborek <[email protected]>"] | |
[[bin]] | |
name = "publicsuffixtest" | |
path = "main.rs" | |
[dependencies] | |
psl = "*" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM ubuntu:18.04 | |
RUN apt-get update && apt-get install -y cargo curl jq libssl-dev pkg-config time pigz && apt-get clean | |
RUN curl https://publicsuffix.org/list/public_suffix_list.dat -o public_suffix_list.dat | |
RUN curl -s https://opendata.rapid7.com/sonar.rdns_v2/ | \ | |
grep 'href="/sonar.rdns_v2/' | cut -d'"' -f2 > url.txt | |
RUN curl --location https://opendata.rapid7.com/`cat url.txt` \ | |
| pigz -dc | head -n 1M | jq -r .value > domains.txt | |
RUN apt-get update && apt-get -y install pypy && apt-get clean | |
RUN curl -O https://bootstrap.pypa.io/get-pip.py && pypy get-pip.py | |
RUN pypy -m pip install publicsuffix | |
# https://stackoverflow.com/a/38261124/1091116 | |
ARG CACHE_DATE=not_a_date | |
ADD ./Cargo.toml . | |
ADD ./main.rs . | |
RUN time cargo build --release --quiet | |
ADD ./main.py . | |
# warm up cache: | |
RUN cat domains.txt > /dev/null | |
RUN time ./target/release/publicsuffixtest public_suffix_list.dat < domains.txt | wc -l | |
RUN time pypy main.py public_suffix_list.dat < domains.txt | wc -l |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import codecs | |
import sys | |
from publicsuffix import PublicSuffixList | |
psl_file = codecs.open(sys.argv[1], encoding='utf8') | |
psl = PublicSuffixList(psl_file) | |
for line in sys.stdin: | |
print(psl.get_public_suffix(line.rstrip())) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate psl; | |
use std::io::{self, BufRead, Write}; | |
use psl::{Psl, List}; | |
fn main() { | |
let stdout = io::stdout(); | |
let mut handle = stdout.lock(); | |
let list = List::new(); | |
let stdin = io::stdin(); | |
for line in stdin.lock().lines() { | |
let domain_str = line.unwrap(); | |
if let Some(domain) = list.suffix(&domain_str) { | |
handle.write(domain.as_str().as_bytes()).unwrap(); | |
handle.write(b"\n").unwrap(); | |
}; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sending build context to Docker daemon 91.14kB | |
Step 1/16 : FROM ubuntu:18.04 | |
---> 02f9d6707661 | |
Step 2/16 : RUN apt-get update && apt-get install -y cargo curl jq libssl-dev pkg-config time pigz && apt-get clean | |
---> Using cache | |
---> 454b504c9e39 | |
Step 3/16 : RUN curl https://publicsuffix.org/list/public_suffix_list.dat -o public_suffix_list.dat | |
---> Using cache | |
---> b900e888753f | |
Step 4/16 : RUN curl -s https://opendata.rapid7.com/sonar.rdns_v2/ | grep 'href="/sonar.rdns_v2/' | cut -d'"' -f2 > url.txt | |
---> Using cache | |
---> 59069fc17a3c | |
Step 5/16 : RUN curl --location https://opendata.rapid7.com/`cat url.txt` | pigz -dc | head -n 1M | jq -r .value > domains.txt | |
---> Using cache | |
---> c4e7a9994234 | |
Step 6/16 : RUN apt-get update && apt-get -y install pypy && apt-get clean | |
---> Using cache | |
---> 93ba244ae88f | |
Step 7/16 : RUN curl -O https://bootstrap.pypa.io/get-pip.py && pypy get-pip.py | |
---> Using cache | |
---> c6a1f4450451 | |
Step 8/16 : RUN pypy -m pip install publicsuffix | |
---> Using cache | |
---> 8380c939048b | |
Step 9/16 : ARG CACHE_DATE=not_a_date | |
---> Using cache | |
---> 1983d1b237c8 | |
Step 10/16 : ADD ./Cargo.toml . | |
---> Using cache | |
---> 8b83c08c141e | |
Step 11/16 : ADD ./main.rs . | |
---> 370fdc2ca7a1 | |
Step 12/16 : RUN time cargo build --release --quiet | |
---> Running in dc3653df03a6 | |
[91m888.79user 5.92system 12:54.66elapsed 115%CPU (0avgtext+0avgdata 1438004maxresident)k | |
0inputs+396136outputs (11968major+2413006minor)pagefaults 0swaps | |
[0m ---> 3359e938ed5a | |
Removing intermediate container dc3653df03a6 | |
Step 13/16 : ADD ./main.py . | |
---> a7dcf5aa9e0a | |
Step 14/16 : RUN cat domains.txt > /dev/null | |
---> Running in f31ebed1354d | |
---> 897bdc24ab1f | |
Removing intermediate container f31ebed1354d | |
Step 15/16 : RUN time ./target/release/publicsuffixtest public_suffix_list.dat < domains.txt | wc -l | |
---> Running in 068609c698c5 | |
[91m2.42user 1.15system 0:03.57elapsed 99%CPU (0avgtext+0avgdata 2568maxresident)k | |
0inputs+0outputs (0major+94minor)pagefaults 0swaps | |
[0m1048576 | |
---> 53451f385480 | |
Removing intermediate container 068609c698c5 | |
Step 16/16 : RUN time pypy main.py public_suffix_list.dat < domains.txt | wc -l | |
---> Running in e39ca5cdc142 | |
[91m1.58user 0.05system 0:01.66elapsed 98%CPU (0avgtext+0avgdata 88852maxresident)k | |
472inputs+0outputs (2major+11245minor)pagefaults 0swaps | |
[0m1048576 | |
---> 86e76c15b8a9 | |
Removing intermediate container e39ca5cdc142 | |
Successfully built 86e76c15b8a9 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment