Created
November 17, 2014 21:55
-
-
Save stravant/ba430d4cb4e20faa26ca to your computer and use it in GitHub Desktop.
Tracked Thread saver for Roblox Forums
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/lua | |
local tracked_url = "http://www.roblox.com/Forum/User/MyForums.aspx" | |
local base_url = "http://www.roblox.com/Forum/ShowPost.aspx?PostID=" | |
-- Paste your .ROBLOSECURITY cookie below, you can find the cookie by viewing www.roblox.com's | |
-- cookies in your browser while you are logged in. | |
-- DO NOT SHARE THIS VALUE, it can be used to log into your current account session. | |
local cookie = "" | |
if cookie == "" then | |
error("No .ROBLOSECURITY cookie provided, can't fetch tracked threads! (See above)") | |
end | |
function cmd(txt) | |
os.execute(txt) | |
end | |
function fread(filename) | |
return io.open(filename, 'r'):read("*all") | |
end | |
function openwrite(filename) | |
return io.open(filename, 'w+') | |
end | |
cmd('curl --cookie ".ROBLOSECURITY="'..cookie..' '..tracked_url..' > tracked.html') | |
local tracked_html = fread('tracked.html') | |
local urls_txt = openwrite('urls.txt') | |
local post_id_list = {} | |
-- Top part -> tracked threads, Bottom part -> recent threads | |
local top_part = tracked_html:find("<h2>Your Last ") | |
tracked_html = tracked_html:sub(1, top_part) | |
-- Find post IDs | |
for id in tracked_html:gmatch('href="/Forum/ShowPost.aspx%?PostID=(%d+)"><div') do | |
urls_txt:write(id.."\n") | |
table.insert(post_id_list, tonumber(id)) | |
end | |
-- Create a folder for the posts | |
cmd[[mkdir data]] | |
local saved_count = 0 | |
-- For each post Id, get that post | |
for _, id in pairs(post_id_list) do | |
local target_file = 'data/'..id..'_page1.html' | |
-- Read in the post | |
cmd('curl --cookie ".ROBLOSECURITY="'..cookie..' "'..base_url..id..'" > '..target_file) | |
saved_count = saved_count + 1 | |
-- Now, parse for pages | |
local thread_content = fread(target_file) | |
local pager_control = thread_content:gmatch('<span id="ctl00_cphRoblox_PostView1_ctl00_Pager">[^\n]*\n[^\n]*\n[^\n]*')() | |
local max_page_index = 0 | |
for page_num in pager_control:gmatch('%)">(%d+)</a>') do | |
page_num = tonumber(page_num) | |
if page_num then | |
max_page_index = math.max(max_page_index, page_num) | |
end | |
end | |
-- Read in any extra pages | |
for i = 2, max_page_index+1 do | |
cmd('curl --cookie ".ROBLOSECURITY="'..cookie..' "'..base_url..id..'&PageIndex='..i..'" > data/'..id..'_page'..i..'.html') | |
saved_count = saved_count + 1 | |
end | |
end | |
print("Done, saved "..saved_count.." pages in "..(#post_id_list).." threads.") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment