Created
April 1, 2018 07:35
-
-
Save jennyonjourney/d360eecec92cf9fa28471035648cc6ef to your computer and use it in GitHub Desktop.
facebook API - news json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request as ur | |
import json | |
page_name='MBC' | |
base="https://graph.facebook.com/v2.8/" | |
page_id='240263402699918' | |
from_date='2018-01-01' | |
to_date='2018-02-05' | |
num_st = '10' | |
node = page_id+'/posts/?fields=id,message,link,name,type,shares,reactions,'+\ | |
'created_time,comments.limit(0).summary(true).limit(0).summary(true)' | |
duration = '&since='+from_date+'&until='+to_date+"&limit="+num_st | |
app_id = "200920440387013" | |
app_secret="daccef14d5cd41c0e95060d65e66c41d" | |
access_token=app_id+"|"+app_secret | |
url = base+node+duration+'&access_token='+access_token | |
print(url) | |
req = ur.Request(url) | |
response = ur.urlopen(req) | |
if response.getcode()==200: | |
data = json.loads(response.read().decode('utf-8')) | |
print(data) | |
----------------------- | |
import urllib.request as ur | |
import json | |
page_name='MBC' | |
base="https://graph.facebook.com/v2.8/" | |
page_id='240263402699918' | |
from_date='2018-01-01' | |
to_date='2018-02-05' | |
num_st = '10' | |
node = page_id+'/posts/?fields=id,message,link,name,type,shares,reactions,'+\ | |
'created_time,comments.limit(0).summary(true).limit(0).summary(true)' | |
duration = '&since='+from_date+'&until='+to_date+"&limit="+num_st | |
app_id = "425314704574762" | |
app_id = "200920440387013" | |
app_secret="d72bc837cd5439ea0d6ce2dd871750a4" | |
app_secret="daccef14d5cd41c0e95060d65e66c41d" | |
access_token=app_id+"|"+app_secret | |
url = base+node+duration+'&access_token='+access_token | |
print(url) | |
req = ur.Request(url) | |
response = ur.urlopen(req) | |
if response.getcode()==200: | |
data = json.loads(response.read().decode('utf-8')) | |
#print(data) | |
################################### | |
for i in data: | |
print(i,data[i]) | |
print("----------------------------") | |
cnt =0 | |
for dd in data['data']: | |
#print(dd) | |
print(cnt,">>>>>>>>") | |
for key in dd: | |
print(key,":",dd[key]) | |
cnt+=1 | |
################################### | |
f = open('../fff/jtbc.csv','w',encoding='utf-8') | |
for dd in data['data']: | |
for key in dd: | |
f.write(str(dd[key]).replace('\n',' ')+',') | |
f.write('\n') | |
f.close() | |
------------------------------------------------ | |
# 웹에서 데이터 크롤링하기. http://cafe.naver.com/jx007s | |
# 도킹할 예정 -> 시가총액 | |
# html을 이해할 수 있는 모듈lxml을가져오고 parse변환시킬것이다 | |
# 입출력 io에서 sting으로 가져올게 | |
import requests | |
from lxml.html import parse | |
from io import StringIO | |
url = 'http://finance.naver.com/sise/sise_market_sum.nhn' | |
text = requests.get(url) | |
print(text) | |
# <Response [200]> 의 의미는 해당 페이지의 '정보'를 잘 가지고 왔다. '200'값 | |
# 페이지->오른쪽마우스->소스보기 | |
# html은 기본적으로 알아야.. 소스분석이 가능.. | |
print(text.status_code) | |
print(text.text) | |
# 웹 소스를 그대로 불러온다. | |
ppp = parse(StringIO(text.text)) | |
doc = ppp.getroot() | |
tables = doc.findall('.//table') | |
print(tables) | |
tt = tables[1] ###표1번째 | |
rows = tt.findall('.//tr') ###줄들 | |
# rr = rows[0] ###줄 0번째 | |
# cols = rr.findall('.//th') ###칸들 | |
# #print(cols) | |
# for cc in cols: | |
# print(cc.text_content()) | |
def rowData(rr, kind): | |
cols = rr.findall('.//'+kind) | |
return [vv.text_content() for vv in cols] | |
print(rowData(rows[0], 'th')) | |
print(rowData(rows[2], 'td')) | |
for row in rows[2:]: | |
print(rowData(row , 'td')) | |
---------------------------------------------- | |
import requests | |
from lxml.html import parse | |
from io import StringIO | |
url = 'http://finance.naver.com/sise/sise_market_sum.nhn' | |
text = requests.get(url) | |
print(text) | |
print(text.status_code) | |
print(text.text) | |
ppp = parse(StringIO(text.text)) | |
doc = ppp.getroot() | |
tables = doc.findall('.//table') | |
print('---------------------') | |
print(tables) | |
tt = tables[1] | |
rows = tt.findall('.//tr') | |
print('---------------------') | |
print(rows) | |
rr = rows[0] | |
cols = rr.findall('.//th') | |
print('----------rr-----------') | |
print(rr) | |
print('---------cols----------') | |
print(cols) | |
for cc in cols: | |
print(cc.text_content()) | |
def rowData(rr, kind): | |
cols = rr.findall('.//'+kind) | |
return [vv.text_content() for vv in cols] | |
print(rowData(rows[0], 'th')) | |
print(rowData(rows[2], 'td')) | |
for row in rows[2:]: | |
print(rowData(row, 'td')) | |
------------------------------------------------- | |
import requests | |
from lxml.html import parse | |
from io import StringIO | |
url = 'http://finance.daum.net/quote/rise.daum?stype=P&nil_profile=stocktop&nil_menu=nstock142' | |
text = requests.get(url) | |
print(text) | |
print(text.status_code) | |
print(text.text) | |
ppp = parse(StringIO(text.text)) | |
doc = ppp.getroot() | |
tables = doc.findall('.//table') | |
print('---------------------') | |
print(tables) | |
tt = tables[1] | |
rows = tt.findall('.//tr') | |
print('---------------------') | |
print(rows) | |
rr = rows[0] | |
cols = rr.findall('.//th') | |
print('----------rr-----------') | |
print(rr) | |
print('---------cols----------') | |
print(cols) | |
for cc in cols: | |
print(cc.text_content()) | |
def rowData(rr, kind): | |
cols = rr.findall('.//'+kind) | |
return [vv.text_content() for vv in cols] | |
print(rowData(rows[0], 'th')) | |
print(rowData(rows[2], 'td')) | |
for row in rows[2:]: | |
print(rowData(row, 'td')) | |
# csv파일로 저장해보기! | |
------------------------------------- | |
import requests | |
from lxml.html import parse | |
from io import StringIO | |
url='http://finance.naver.com/sise/sise_market_sum.nhn' | |
text = requests.get(url) | |
ppp = parse(StringIO(text.text)) | |
doc = ppp.getroot() | |
tables = doc.findall('.//table') | |
#print(tables) ###표들 | |
tt = tables[1] ###표1번째 | |
rows = tt.findall('.//tr') ###줄들 | |
def rowData(rr, kind): | |
cols = rr.findall('.//'+kind) | |
res = [vv.text_content().replace("\t","").replace("\n","") for vv in cols] | |
return res | |
def rowrite(rr): | |
res = '연습삼아\n' | |
return res | |
# res는 해당 글자를 합쳐서 내보내기만 하면 됨 | |
f=open('../fff/webData.csv','w', encoding='utf-8') | |
for row in rows[2:]: | |
rr = rowData(row, 'td'); | |
if len(rr)>2: | |
print(rr) | |
f.write(rowrite(rr)) | |
f.close() | |
print('-------이제 본격적으로 증권정보를 csv로 저장하자--------') | |
# res는 해당 글자를 합쳐서 내보내기만 하면 됨 | |
def rowWrite(rr): | |
res ='' | |
cnt =0 | |
for i in rr: | |
cnt += 1 | |
res+=i | |
if len(rr)>cnt: | |
res+=',' | |
------------------------------------------ | |
import requests | |
from lxml.html import parse | |
from io import StringIO | |
def rowData(rr, kind): | |
cols = rr.findall('.//'+kind) | |
res = [vv.text_content().replace("\t","").replace("\n","") for vv in cols] | |
return res[:-1] | |
def rowWrite(rr): | |
res ='' | |
cnt =0 | |
for i in rr: | |
cnt += 1 | |
res+=i | |
if len(rr)>cnt: | |
res+=',' | |
res +='\n' | |
return res | |
f=open('../fff/webData.csv','w', encoding='utf-8') | |
for i in range(1,30): | |
url='http://finance.naver.com/sise/sise_market_sum.nhn?page=%d'%i | |
text = requests.get(url) | |
ppp = parse(StringIO(text.text)) | |
doc = ppp.getroot() | |
tables = doc.findall('.//table') | |
#print(tables) ###표들 | |
tt = tables[1] ###표1번째 | |
rows = tt.findall('.//tr') ###줄들 | |
for row in rows[2:]: | |
rr = rowData(row, 'td'); | |
if len(rr)>2: | |
print(rr) | |
f.write(rowWrite(rr)) | |
f.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment