Skip to content

Instantly share code, notes, and snippets.

@adamalesandro
Created July 24, 2017 17:47
Show Gist options
  • Save adamalesandro/3e08ce575c3e3f8bd71ae58609ff309d to your computer and use it in GitHub Desktop.
Save adamalesandro/3e08ce575c3e3f8bd71ae58609ff309d to your computer and use it in GitHub Desktop.
Info for coding test
### Sample wayback timestamp url: http://web.archive.org/web/20141214092900/http://www.coinbase.com/about
URLS = [
{"url": "http://www.coinbase.com/about", "parser": "coinbase"}
]
class WaybackConstants:
TIMEMAP_BASE_URL = "http://web.archive.org/web/timemap/json/{0}"
WAYBACK_DATETIME_FORMAT = "%Y%m%d%H%M%S"
WAYBACK_DIRECT_URL = "http://web.archive.org/web/{timestamp}/{target_url}"
def get_data_for_wayback_timemap(url=None, parser=None):
assert url is not None
assert parser is not None
timemap = requests.get(WaybackConstants.TIMEMAP_BASE_URL.format(url))
wayback_snapshots = json.loads(timemap.content)
waybacks_iterator = iter(wayback_snapshots)
next(waybacks_iterator)
wayback_history = []
for wayback_record in waybacks_iterator:
### BEGIN HERE
if __name__ == '__main__':
for url in URLS:
get_data_for_wayback_timemap(url["url"], url["parser"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment