Created
July 28, 2016 11:32
-
-
Save msva/f90ff8ce5a2fb0202ef06cc836ac4bec to your computer and use it in GitHub Desktop.
stdin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env luajit | |
local iconv=require"iconv"; | |
local w2u = iconv.new("utf-8","cp1251"); | |
local u2w = iconv.new("cp1251","utf-8"); | |
local cURL = require("cURL"); | |
local html=require"htmlparser" | |
math.randomseed(tonumber(math.randomseed(os.time()) or os.time())+os.time()); -- randomize | |
local UAs = { | |
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20130101 Firefox/37.0"; -- Firefox 37, Windows 7, 64bit | |
"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:37.0) Gecko/20130101 Firefox/37.0"; -- Firefox 37, Ubuntu 12, 32bit | |
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.22"; -- Chrome 34, Windows 7, 64bit | |
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/43.0.2357.10 Safari/537.22"; -- Chrome 34, Windows XP | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/42.0.2311.82 Safari/537.22"; -- Chrome 25, Mac OS 10.7 | |
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/43.0.2537.10 Safari/537.22"; -- Chrome 25, Linux, 64bit | |
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/43.0.2537.10 Safari/537.22"; -- Chrome 25, Linux, 32bit | |
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"; -- Internet Explorer 9, Windows 7, 64bit | |
"Mozilla/5.0 (compatible; MSIE 11.0; Windows NT 8.1; WOW64; Trident/7.0)"; -- Internet Explorer 11, Windows 8.1, 64bit | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/536.26.17 (KHTML, like Gecko) Version/6.0.2 Safari/536.26.17"; -- Safari, Mac OS 10.8 | |
} | |
local headers = {"Accept: text/*", | |
"Accept-Language: ru,en", | |
"Accept-Charset: utf-8,cp1251,koi8-r,iso-8859-5,*", | |
"Cache-Control: no-cache"} | |
local buf=""; | |
local base_url="http://nsk-mahaon.ru"; | |
local c = cURL.easy_init() | |
c:setopt_httpheader(headers) | |
c:setopt_cookiefile("") | |
c:setopt_followlocation(1) | |
c:setopt_useragent(UAs[math.random(1,#UAs)]); | |
c:setopt_url(base_url); | |
c:setopt_writefunction(function(result) buf=buf..result; return true; end); | |
c:perform(); | |
local main_page=html.parse(buf)("a[class|='cat']"); | |
buf=""; | |
for _,e in ipairs(main_page) do | |
local cat_url=base_url..e.attributes.href; | |
local cat_name=e:getcontent():gsub(" ","_"); | |
print([[Начало обработки категории "]]..cat_name..[["]]) | |
print("",[[Обработка страницы 1 в категории "]]..cat_name..[["...]]) | |
c:setopt_useragent(UAs[math.random(1,#UAs)]); | |
c:setopt_url(cat_url); | |
c:setopt_writefunction(function(result) buf=buf..result; return true; end); | |
c:perform(); | |
local page_url=html.parse(buf)("div.item-list > ul.pager > li.pager-last.last > a.active"); | |
local pages = page_url[1] and tonumber(page_url[1].attributes.href:match(".*page=(%d+)")); | |
pages = pages or 0; | |
local cat_pages = {}; | |
cat_pages[1]=html.parse(buf)("table.views-table > tbody > tr > td.views-field"); | |
buf=""; | |
if pages > 0 then | |
for pc = 1,pages,1 do | |
print("",[[Обработка страницы ]]..(pc+1)..[[ в категории "]]..cat_name..[["...]]) | |
c:setopt_useragent(UAs[math.random(1,#UAs)]); | |
c:setopt_url(cat_url.."?page="..pc); | |
buf=""; | |
c:setopt_writefunction(function(result) buf=buf..result; return true; end); | |
c:perform(); | |
cat_pages[pc+1]=html.parse(buf)("table.views-table > tbody > tr > td.views-field"); | |
buf=""; | |
end | |
end | |
cat_price={}; | |
for _,cat_page in ipairs(cat_pages) do | |
for _,cat_e in ipairs(cat_page) do | |
print("","", cat_e.attributes.class) | |
end | |
end | |
print([[Конец обработки категории "]]..cat_name..[["]]) | |
end | |
c:close(); | |
os.exit(0); | |
out=""; | |
for _,t in ipairs(price_t) do | |
local csv_str_t={} | |
local l1,l2 = "", ""; | |
for k,v in pairs(t) do | |
k=k:gsub([["]],[[""]]); | |
v=v:gsub([["]],[[""]]); | |
if (#out>0) then | |
l1=l1..(#l1>0 and [[;]] or l1)..[["]]..v..[["]]; | |
else | |
l1=l1..(#l1>0 and [[;]] or l1)..[["]]..k..[["]]; | |
l2=l2..(#l2>0 and [[;]] or l2)..[["]]..v..[["]]; | |
end | |
end | |
l1=((#l1>0) and (l1.."\n") or l1) | |
l2=((#l2>0) and (l2.."\n") or l2) | |
out=out..l1..l2; | |
end | |
io.output(filename.."_"..os.date("%d%m%y_%H%M%S")..".csv"):write(out); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment