Last active
July 30, 2019 09:27
-
-
Save conancat/5dfd0252d0f3136fe920 to your computer and use it in GitHub Desktop.
如何快速用nodejs的request和xml2js把xml下載下來,然後將xml轉換成js object或json檔案
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 如果要跑coffee的話,記得先安裝coffee-script | |
# npm install coffee-script -g | |
# npm install request xml2js | |
# coffee requestXmlToJs.coffee | |
# 先需要拿到一些package | |
fs = require "fs" | |
path = require "path" | |
request = require "request" | |
xml2js = require "xml2js" | |
# Function的開始 | |
requestXmlToJs = (fileUrl, callback) -> | |
# 製造個暫時能用的tmp folder | |
tmpFolder = __dirname + "/tmp/" | |
if not fs.existsSync tmpFolder then fs.mkdirSync tmpFolder | |
# 從url拿到檔案的名字和設定檔案該存去哪裡 | |
filename = path.basename fileUrl | |
filepath = tmpFolder + filename | |
# 快快去抓那個fileUrl來看看 | |
request fileUrl | |
.on "error", callback # 有error的話快快彈回去 | |
.on "response", (response) -> | |
# 在request給我們第一個回覆的時候我們趕快先暫停下載任何東西, | |
# 因為要先看看目前這個檔案我們是不是已經下載過了 | |
this.pause() | |
# 先默認我們是需要下載檔案的 | |
download = true | |
# 我們從header裏面拿content-length和last-modified來作校對 | |
headers = response.headers | |
totalSize = parseInt headers["content-length"] | |
lastModified = new Date(headers["last-modified"]) | |
# 看看我們有沒有已經下載過的檔案 | |
if fs.existsSync filepath | |
currentFileStats = fs.statSync filepath | |
# 如果已經下載了的檔案的size和header傳回來的size是一樣的, | |
# 還有我們檔案最後更新時間是大於header傳回來的最後更新時間, | |
# 我們便能確保說我們已經下載的檔案沒有最新更新,那就不需要重新下載了, 直接跳過 | |
if currentFileStats.size is totalSize and lastModified <= currentFileStats.mtime | |
download = false | |
# 如果需要下載的話,我們就用request stream的功能直接把檔案下載下來 | |
# 只需要pipe到一個新創建的writeStream就可以直接下載了 | |
# 製造了pipe過後我們就可以告訴這個request可以resume(),繼續跑 | |
if download | |
console.log "Downloading file #{fileUrl} to #{filepath}" | |
this.pipe fs.createWriteStream(filepath) | |
this.resume() | |
# 不需要下載的話我們直接跳過, 直接跳到讀取xml的步驟 | |
else | |
console.log "Existing file found, no changes detected, skipping download" | |
this.emit "end" | |
.on "end", -> | |
console.log "Begin parsing XML" | |
# 先讀取已經下載過的檔案 | |
fs.readFile filepath, (err, data) -> | |
if err then return callback err | |
# 把讀取的data轉成string,我們便能讓xml2js.parseString的功能將xml變成 | |
# 漂亮的Javascript Object | |
xml2js.parseString data.toString(), callback | |
# 現在我們便能call我們的function,來試下看看有沒有用 | |
url = "http://opendata.cwb.gov.tw/opendata/MMC/F-A0021-001.xml" | |
requestXmlToJs url, (err, result) -> | |
# 有 error的話我們要知道發生什麼事咯 | |
if err then console.error err | |
# 把已經轉成JS Object的result log一下看看 | |
console.log result | |
# 把xml轉成json file,這只是為了好玩罷了呼呼呼 | |
console.log "Writing to JSON file" | |
fs.writeFileSync __dirname + "/tmp/F-A0021-001.json", JSON.stringify(result, null, "\t") | |
console.log "Done!" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs, path, request, requestXmlToJs, url, xml2js; | |
fs = require("fs"); | |
path = require("path"); | |
request = require("request"); | |
xml2js = require("xml2js"); | |
requestXmlToJs = function(fileUrl, callback) { | |
var filename, filepath, tmpFolder; | |
tmpFolder = __dirname + "/tmp/"; | |
if (!fs.existsSync(tmpFolder)) { | |
fs.mkdirSync(tmpFolder); | |
} | |
filename = path.basename(fileUrl); | |
filepath = tmpFolder + filename; | |
return request(fileUrl).on("error", callback).on("response", function(response) { | |
var currentFileStats, download, headers, lastModified, totalSize; | |
this.pause(); | |
download = true; | |
headers = response.headers; | |
totalSize = parseInt(headers["content-length"]); | |
lastModified = new Date(headers["last-modified"]); | |
if (fs.existsSync(filepath)) { | |
currentFileStats = fs.statSync(filepath); | |
if (currentFileStats.size === totalSize && lastModified <= currentFileStats.mtime) { | |
download = false; | |
} | |
} | |
if (download) { | |
console.log("Downloading file " + fileUrl + " to " + filepath); | |
this.pipe(fs.createWriteStream(filepath)); | |
return this.resume(); | |
} else { | |
console.log("Existing file found, no changes detected, skipping download"); | |
return this.emit("end"); | |
} | |
}).on("end", function() { | |
console.log("Begin parsing XML"); | |
return fs.readFile(filepath, function(err, data) { | |
if (err) { | |
return callback(err); | |
} | |
return xml2js.parseString(data.toString(), callback); | |
}); | |
}); | |
}; | |
url = "http://opendata.cwb.gov.tw/opendata/MMC/F-A0021-001.xml"; | |
requestXmlToJs(url, function(err, result) { | |
if (err) { | |
console.error(err); | |
} | |
console.log(result); | |
console.log("Writing to JSON file"); | |
fs.writeFileSync(__dirname + "/tmp/F-A0021-001.json", JSON.stringify(result, null, "\t")); | |
return console.log("Done!"); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment