This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data <- read.table(header = TRUE, text = "GUID URLLIST | |
guid1 url1:4,url2:5 | |
guid2 url1:3,url2:7,url3:8") | |
test = apply(data,1,function(one_row){ | |
lapply(unlist(strsplit(as.character(one_row[2]),split=",")),function(one_str){unlist(strsplit(one_str,split="\\:"))[1]}) | |
}) | |
names(test) = data$GUID | |
test$guid1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ptt extraction | |
import requests | |
from bs4 import BeautifulSoup | |
rs = requests.session() | |
payload = { | |
'from':'/bbs/Gossiping/index.html', | |
'yes':'yes' | |
} | |
res1 = rs.post("https://www.ptt.cc/ask/over18", data=payload, verify=False) | |
for i in range(7649, 7600, -1): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
payload ={ | |
'StartStation':'977abb69-413a-4ccf-a109-0272c24fd490', | |
'EndStation':'fbd828d8-b1da-4b06-a3bd-680cdca4d2cd', | |
'SearchDate':'2015/01/07', | |
'SearchTime':'10:00', | |
'SearchWay':'DepartureInMandarin' | |
} | |
res = requests.post("http://www.thsrc.com.tw/tw/TimeTable/SearchResult", data=payload) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
access_token = '<acces token>' | |
res = requests.get("https://graph.facebook.com/v2.2/me/friends?access_token=%s"%(access_token)) | |
js = json.loads(res.text) | |
for i in js['data']: | |
print i['name'], i['id'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from selenium import selenium | |
from selenium import webdriver | |
from selenium.common.exceptions import NoSuchElementException | |
from selenium.webdriver.common.keys import Keys | |
import time | |
browser = webdriver.Firefox() | |
#browser = webdriver.Ie('IEDriverServer.exe') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
res = requests.get("http://140.115.236.11/query_tea.asp?STR=ZA101&Query=%ACd%B8%DF&MM2=1&YY2=2015&item2=%AFZ%AF%C5") | |
res.encoding = "big5" | |
print res.text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
header = {'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} | |
res = requests.get("https://www.google.com.tw/search?tbs=sbi%3AAMhZZisDdaia0AXISbyJd4X5hHQCRAQCgZMV-EbUWeSAAu1pRFVlaPE8OFg9ABtUKsyL1Mv-FEH0V6Au_1jUrygpKCIufrYROqHRkaFeELfwD40fp03kp2bSKJFZL9T_1VR_1yyXV_1Rl-Eq9_1a9JfuY-8QC_1fPWOISfFyVfSVB4NR8F63F-Gh4shJyYrFN5S144Cst9qQtmonNcxMKitRyCVMhzViFlUrUi8rr-3ZhJdjKcHS-GcoxbFoy6ima-ngIHoIjlQh7B462BjDdOTppgwwy1KK7-SqY11ai-IyapTJec7JkFS_182sfULocSDtL3TZiwDxLiUP1GZws9fTtgQp1wmzRS78JcoEups3pgh_1CAIqIj9cb22eABqchmgdr2cxIHVxJzhqh491nHs1_1qbLY8Hb9r03JxvErYf8r-Ju-j7LjdwHhpqwA1U2HB6tHkjmnz5qvw1GciQiLHlierin3aPAxqw3w4W959fOlot4hS5YjceWFREvxtELeSQI-UgPfGkgbCkCZrlmqXAwLh9gEahKdkwecjCWdEliC9AxL_1j4bkerV3itA69S8_1Uw4k7CWPLV-hNfzhNQkXPGSnUrhMpVXJGrteAfyxUtSqhZ5EujWkkzY8PWrcV8BwFfeDth_1gBNbT5kCMyyLLHErK2KWPgls8-mg7-RqxcyhP5UV79qA9RMk4bkVjz_1vnoNChzJv_15gZqWTQeYqUzPVEKLi7BgyMCLNCXhw3jAUD3oeJ0aYlffOPM0iCO1CxbMXr-wqG_1QdqzC2ed_19I5Juvz0SLWgqG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
header = {'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} | |
res = requests.get("https://www.google.com.tw/search?tbs=sbi%3AAMhZZisDdaia0AXISbyJd4X5hHQCRAQCgZMV-EbUWeSAAu1pRFVlaPE8OFg9ABtUKsyL1Mv-FEH0V6Au_1jUrygpKCIufrYROqHRkaFeELfwD40fp03kp2bSKJFZL9T_1VR_1yyXV_1Rl-Eq9_1a9JfuY-8QC_1fPWOISfFyVfSVB4NR8F63F-Gh4shJyYrFN5S144Cst9qQtmonNcxMKitRyCVMhzViFlUrUi8rr-3ZhJdjKcHS-GcoxbFoy6ima-ngIHoIjlQh7B462BjDdOTppgwwy1KK7-SqY11ai-IyapTJec7JkFS_182sfULocSDtL3TZiwDxLiUP1GZws9fTtgQp1wmzRS78JcoEups3pgh_1CAIqIj9cb22eABqchmgdr2cxIHVxJzhqh491nHs1_1qbLY8Hb9r03JxvErYf8r-Ju-j7LjdwHhpqwA1U2HB6tHkjmnz5qvw1GciQiLHlierin3aPAxqw3w4W959fOlot4hS5YjceWFREvxtELeSQI-UgPfGkgbCkCZrlmqXAwLh9gEahKdkwecjCWdEliC9AxL_1j4bkerV3itA69S8_1Uw4k7CWPLV-hNfzhNQkXPGSnUrhMpVXJGrteAfyxUtSqhZ5EujWkkzY8PWrcV8BwFfeDth_1gBNbT5kCMyyLLHErK2KWPgls8-mg7-RqxcyhP5UV79qA9RMk4bkVjz_1vnoNChzJv_15gZqWTQeYqUzPVEKLi7BgyMCLNCXhw3jAUD3oeJ0aYlffOPM0iCO1CxbMXr-wqG_1QdqzC2ed_19I5Juvz0SLWgqG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
res = requests.get("http://www.gretai.org.tw/web/stock/aftertrading/daily_trading_info/st43_result.php?l=zh-tw&d=104%2F01&stkno=3290&_=1420965808599") | |
j = json.loads(res.text) | |
print j['stkName'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
url ="http://www.gretai.org.tw/web/stock/aftertrading/daily_trading_info/st43_result.php?l=zh-tw&d=%s&stkno=3290&_=1421136081459" | |
dt = "104/1" | |
from datetime import timedelta | |
from dateutil.relativedelta import relativedelta | |
getyear = dt.split('/',1) | |
bctime = str(int(getyear[0]) + 1911) +"/"+ getyear[1] | |
for i in range(1,10): |
OlderNewer