Skip to content

Instantly share code, notes, and snippets.

@dahlia
Last active December 17, 2015 23:09
Show Gist options
  • Save dahlia/5687599 to your computer and use it in GitHub Desktop.
Save dahlia/5687599 to your computer and use it in GitHub Desktop.
Google Reader to Lookbook.nu (subscriptions → fans, stars → hypes)
.*.swp
*.pyc
credentials.ini
; Copy this and rename it to credentials.ini
[google]
username = [email protected]
password = yourpassword
[lookbook]
username = [email protected]
password = yourpassword
import ConfigParser
from lxml.html import document_fromstring
from requests import Session
__all__ = 'google', 'login_lookbook', 'lookbook'
config = ConfigParser.SafeConfigParser()
config.read('credentials.ini')
google = (config.get('google', 'username'),
config.get('google', 'password'))
lookbook = (config.get('lookbook', 'username'),
config.get('lookbook', 'password'))
def login_lookbook(username=lookbook[0], password=lookbook[1]):
session = Session()
session.headers['Origin'] = 'http://lookbook.nu'
# authenticity_token
url = 'http://lookbook.nu/login'
response = session.get(url)
html = document_fromstring(response.content)
form = html.xpath('//form[@action={0!r}][@method="post"]'.format(url))[0]
authenticity_token, = form.xpath('.//input[@name="authenticity_token"]'
'/@value')
utf8, = form.xpath('.//input[@name="utf8"]/@value')
form = {
'user[email]': username,
'user[password]': password,
'authenticity_token': authenticity_token,
'utf8': utf8.encode('utf-8')
}
response = session.post(url, headers={'Referer': url}, data=form)
assert 'http://lookbook.nu/logout' in response.content, \
'You seem to fail to login'
return session
"""Migrate Google Reader subscriptions to Lookbook.nu fans."""
from libgreader import ClientAuthMethod, GoogleReader
from lxml.html import document_fromstring
from credentials import google, login_lookbook
auth = ClientAuthMethod(*google)
reader = GoogleReader(auth)
reader.buildSubscriptionList()
feeds = reader.getSubscriptionList()
lookbook_feeds = [feed for feed in feeds if 'lookbook.nu/' in feed.feedUrl]
lookbook_urls = {feed.siteUrl for feed in lookbook_feeds
if feed.siteUrl and 'lookbook.nu' in feed.siteUrl}
lookbook = login_lookbook()
for i, lookbook_url in enumerate(lookbook_urls):
print '[{0}/{1}] {2} ->'.format(
i + 1,
len(lookbook_urls),
lookbook_url
),
response = lookbook.get(lookbook_url)
lookbook_html = document_fromstring(response.content)
csrf_token = lookbook_html.xpath('//meta[@name="csrf-token"]/@content')[0]
fan_url = lookbook_html.xpath('//a[@id="become_a_fan_btn"]/@href')[0]
print fan_url,
response = lookbook.post(fan_url, headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token
})
print '->', response.status_code
assert response.status_code == 200
"""Migrate Google Reader stars to Lookbook.nu hypes."""
import datetime
import json
import re
import time
from libgreader import ClientAuthMethod, GoogleReader
from lxml.html import document_fromstring
from credentials import google, login_lookbook
auth = ClientAuthMethod(*google)
reader = GoogleReader(auth)
reader.makeSpecialFeeds()
feed = reader.getSpecialFeed('starred')
def iter_feeds(feed):
feed.loadItems(loadLimit=100)
loaded = 0
while 1:
print '(loaded more)'
for item in feed.getItems()[loaded:]:
yield item
loaded += feed.lastLoadLength
feed.loadMoreItems(loadLimit=100)
lookbook = login_lookbook()
oldest_limit = datetime.datetime(2008, 1, 1)
hyped_ids_re = re.compile(r'var\s+hypedLookIds\s*=\s*(\[[\d,]*\])\s*;')
look_id_re = re.compile(r'^https?://(?:www\.)?lookbook\.nu/look/(\d+)')
hyped_looks = set()
for item in iter_feeds(feed):
updated = datetime.datetime.utcfromtimestamp(item.data['updated'])
if updated < oldest_limit:
break
if not item.url or '/lookbook.nu/' not in item.url:
continue
print item.url[7:], '->',
response = lookbook.get(item.url)
if (response.url == 'http://lookbook.nu/' and
response.history[-1].status_code == 302):
print 'GONE'
look_id = int(look_id_re.match(item.url).group(1))
if look_id in hyped_looks:
print 'HYPED'
continue
hyped_looks.add(look_id)
hyped_ids_match = hyped_ids_re.search(response.content)
if hyped_ids_match:
if look_id in json.loads(hyped_ids_match.group(1)):
print 'HYPED'
continue
html = document_fromstring(response.content)
csrf_token = html.xpath('//meta[@name="csrf-token"]/@content')[0]
hype_url = html.xpath('//*[@class="look_hype"]/@hypeurl')[0]
print hype_url[7:],
response = lookbook.get(hype_url, headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token
})
print '->', response.status_code
assert response.status_code == 200
time.sleep(5)
libgreader >= 0.7.0
lxml >= 3.2.0
requests >= 1.2.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment