Skip to content

Instantly share code, notes, and snippets.

@showyou
Created May 17, 2010 10:43
Show Gist options
  • Select an option

  • Save showyou/403633 to your computer and use it in GitHub Desktop.

Select an option

Save showyou/403633 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib
from BeautifulSoup import BeautifulSoup
import re
def li(id):
f = urllib.urlopen("http://togetter.com/li/"+id)
html = f.read().decode("utf-8", "replace")
soup = BeautifulSoup(html)
results = {}
results["title"] = soup("div",\
{"id":"toptitle"})[0]("a",{"class":"cssblk"})[0].string
info_desc = soup("div",{ "class":"info_description"})
text = ""
if len(info_desc) > 0:
text = ""
for c in info_desc[0].contents:
c2 = c.string
if(c2 != None):
text += c2
results["description"] = text
results["body"] = []
for contents in soup("div", {"class":"list_body"}):
h4 = contents.h4
text = ""
for c in h4.contents:
text = c
#print type(dir(c)["contents"])
"""for c2 in c.contents:
c3 = c2.contents
if(c3 != None):
text += c3"""
result = {}
result["text"] = text
result["user"] = contents.h5.a.string
results["body"].append(result)
print results["description"]
for b in results["body"]:
print b["text"],
print b["user"]
def get():
f = urllib.urlopen("http://togetter.com")
html = f.read().decode("utf-8", "replace")
#return html
soup = BeautifulSoup(html)
results = []
for contents in soup("div", {"class":"info_box"}):
result = {}
result["title"] = contents("a",{ "class":"info_title"})[0].string
info_desc = contents("div",{ "class":"info_description"})
text = ""
if len(info_desc) > 0:
text = ""
for c in info_desc[0].contents:
c2 = c.string
if(c2 != None):
text += c2
result["description"] = text
results.append(result)
return results
if __name__ == "__main__":
li("21583")
#for g in get():
# print g["title"], g["description"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment