Skip to content

Instantly share code, notes, and snippets.

View bryanyang0528's full-sized avatar
🎯
Focusing

Bryan Yang bryanyang0528

🎯
Focusing
View GitHub Profile
library(XML)
library(RCurl)
##generate the list of url
#產生空白的list存放欲抓網站的URL
url_list <- list()
#觀察原始網站如"http://lishi.tianqi.com/taibei/201101.html"
##generate the list of url
#產生空白的list存放欲抓網站的URL
url_list <- list()
#觀察原始網站如"http://lishi.tianqi.com/taibei/201101.html"
#月份'01'、'02'是字串格式,預先製作一個陣列存放
month <- c('01','02','03','04','05','06','07', '08', '09', '10', '11', '12')
#利用迴圈自動產生月份網址
##Data Source:http://www.cwb.gov.tw/V7/climate/dailyPrecipitation/dP.htm
#讀入檔案
Kau_2011 <- as.matrix(read.csv("Kau_2011.csv",header=FALSE,fileEncoding="UTF-8-BOM"))
##Transform the row data(as matrix) to data frame type
#建立新的資料格式,只有兩欄,第一欄式日期,第二欄是雨量
Kau_2011_N=matrix(nrow = 31*12, ncol = 2)
#建立一個計算變項,讓R知道等下要把數值填到哪一欄
##讀入資料
eva05 = read.csv(file = "201405_Clothes_data.csv", header=TRUE, sep=",")
##計算各區的百分位數,因為要分成五組,所以每20%分ㄧ組
qx_north <- quantile(eva05$北區_均分, c(0, .2, .4, .6 ,.8, 1))
qx_middle <- quantile(eva05$中區_均分, c(0, .2, .4, .6 ,.8, 1))
qx_south <- quantile(eva05$南區_均分, c(0, .2, .4, .6 ,.8, 1))
##建立新變項來儲存分組
eva05$star_north <- 0
Revised <- read.csv(file="D:/Data/Revised/revised data2013.csv", na.string = "NA", header = T, sep = ",", quote = "\"", dec = ".", fill = T, encoding="ANSI", stringsAsFactors = FALSE )
##載入檔案
library(tm)
library(tmcn)
library(rJava)
library(Rwordseg)
##一些TEXT MINING必用的套件
## import 需要用的套件
import requests
from BeautifulSoup import BeautifulSoup
import HTMLParser
##GET目標網站
res = requests.get("http://bryannotes.blogspot.tw/")
##轉換成SOUP物件
soup = BeautifulSoup(res.text.encode("utf-8"))
{
"metadata": {
"name": "",
"signature": "sha256:65f47926354c8a7ea76eb54818c1f9122dbeb0aeb2e38b2768d96b18c5b7be19"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"metadata": {
"name": "",
"signature": "sha256:f844c7076bd537dedadb2fe248e886e56fe450fd60c794d4148ed4a0ace2f581"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"metadata": {
"name": "",
"signature": "sha256:d0e70bba76e533c765aa57c9511a49d143a281d7ab469d94c5c12b1f34eb3f77"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
text = "AbCdEf"
text[0] = "1"
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-10-639914bf643c> in <module>()
1
2 text = "AbCdEf"
----> 3 text[0] = "1"