Moved to GitHub due to requests, see
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__pycache__ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import ast | |
import sys | |
encodings = ["ascii", "big5", "big5hkscs", "cp037", "cp424", "cp437", "cp500", "cp720", "cp737", "cp775", "cp850", "cp852", "cp855", "cp856", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863", "cp864", "cp865", "cp866", "cp869", "cp874", "cp875", "cp932", "cp949", "cp950", "cp1006", "cp1026", "cp1140", "cp1250", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "euc_jp", "euc_jis_2004", "euc_jisx0213", "euc_kr", "gb2312", "gbk", "gb18030", "hz", "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004", "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", "latin_1", "iso8859_2", "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6", "iso8859_7", "iso8859_8", "iso8859_9", "iso8859_10", "iso8859_13", "iso8859_14", "iso8859_15", "iso8859_16", "johab", "koi8_r", "koi8_u", "mac_cyrillic", "mac_greek", "mac_iceland", "mac_latin2", "mac_roman", "mac_turkish", "ptcp154", "shift_jis", "shift_jis_2004", "shift_jisx0213", "utf_3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import collections | |
import os | |
import sys | |
CDXFile = collections.namedtuple("CDXFile", ["sep", "fields", "file"]) | |
essential_fields = frozenset("aku") | |
warn_warc_dedup = True | |
def say(*args): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
_SPACE = "\u0020\u0009\u000a\u000c\u000d" | |
_POS_OUTSIDE = 0 | |
_POS_URL = 1 | |
_POS_DESCRIPTOR = 2 | |
def urls(srcset): | |
# URLs may contain commas, so we can't just .split(',') | |
pos = _POS_OUTSIDE | |
url = "" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
import sys | |
import bs4 | |
import requests | |
# this is public domain | |
# A00100C is pc-9800, untested on other groups |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From: Fabian Maurer <[email protected]> | |
Subject: [PATCH v3 1/2] kernelbase/locale: Implement comparison on top of official unicode weight tables | |
Message-Id: <[email protected]> | |
Date: Sat, 15 Feb 2020 20:27:47 +0100 | |
This is the first patch to get proper string comparison. | |
The algorithm is loosely based on MS-UCODEREF, and the tables | |
are taken from the official Microsoft download. | |
We start by implementing the sortkey step by step, | |
this first version is enough to not break any existing tests. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# adjust as needed | |
fdisk /dev/sda | |
# /dev/sda1 200M /boot | |
# /dev/sda2 300G /home | |
# /dev/sda3 20G / | |
mkfs.ext2 /dev/sda1 | |
mkfs.ext4 /dev/sda2 | |
mkfs.ext4 /dev/sda3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from bs4 import BeautifulSoup | |
import urllib.request | |
import os.path | |
import sys | |
def get_contents_plain(html): | |
soup = BeautifulSoup(html) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from os.path import basename | |
import argparse | |
import json | |
import os | |
import sys | |
import urllib.parse | |
import urllib.request | |
if sys.stdout.isatty(): |
NewerOlder