This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WARC/1.0 | |
WARC-Type: request | |
WARC-Target-URI: http://static.tumblr.com/vr9xgox/lKbnf8tc5/sidebar-following.png | |
Content-Type: application/http;msgtype=request | |
WARC-Date: 2018-12-14T11:39:08Z | |
WARC-Record-ID: <urn:uuid:0c92740b-134a-496b-a3e3-5c95b358bcd9> | |
WARC-IP-Address: 152.199.19.43 | |
WARC-Warcinfo-ID: <urn:uuid:d02a4e4c-ac44-4e81-be23-2331528e1674> | |
WARC-Block-Digest: sha1:KPGRN2MGW6AREHOTHRQVBJFDBFMQQUXH | |
Content-Length: 257 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
URI='http://staff.tumblr.com' | |
UA='ArchiveTeam' | |
CSRF=$(wget -o log --user-agent ${UA} ${URI} -O - | sed -n 's|.*tumblr_form_key" content="\(.*\)">.*"tumblr-gpop.*|\1|p') | |
REF='https://www.tumblr.com/privacy/consent?redirect=http%3A%2F%2Fstaff.tumblr.com%2F' | |
echo "CSRF Token: ${CSRF}" | |
wget --user-agent "ArchiveTeam" \ | |
--save-cookies cookies.txt \ | |
--referer=${REF} \ | |
--header "Content-Type: application/json" \ | |
--header "X-tumblr-form-key: ${CSRF}" \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding=utf8 | |
import datetime | |
from distutils.version import StrictVersion | |
import hashlib | |
import os.path | |
import random | |
from seesaw.config import realize, NumberConfigValue | |
from seesaw.externalprocess import ExternalProcess | |
from seesaw.item import ItemInterpolation, ItemValue | |
from seesaw.task import SimpleTask, LimitConcurrent |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dofile("table_show.lua") | |
dofile("urlcode.lua") | |
local item_type = os.getenv('item_type') | |
local item_value = os.getenv('item_value') | |
local item_dir = os.getenv('item_dir') | |
local warc_file_base = os.getenv('warc_file_base') | |
local ids = {} |