Last active
November 4, 2019 20:25
-
-
Save NeKzor/0fc5fba418e40e5711c083c849287cb1 to your computer and use it in GitHub Desktop.
SDA forum scraping + analytics.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
console.save = function(file, obj) { | |
let blob = new Blob([JSON.stringify(obj, null, 4)], { type: 'application/json' }); | |
let e = document.createEvent('MouseEvents'); | |
let a = document.createElement('a'); | |
a.download = file; | |
a.href = window.URL.createObjectURL(blob); | |
a.dataset.downloadurl = ['application/json', a.download, a.href].join(':'); | |
e.initMouseEvent('click', true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null); | |
a.dispatchEvent(e); | |
} | |
const parser = new DOMParser(); | |
const base = 'https://forum.speeddemosarchive.com'; | |
let posts = []; | |
let next = '/post/portal_223.html'; | |
const scrap = async () => { | |
let page = await fetch(base + next); | |
console.log('[FETCHED] ' + page.url); | |
let document = parser.parseFromString(await page.text(), 'text/html'); | |
document.querySelectorAll('.filled').forEach(post => { | |
posts.push({ | |
user: post.querySelector('.usernameindent').firstChild.innerText, | |
date: new Date(post.querySelector('.datelink').innerText).toISOString(), | |
post: post.querySelector('.posttext .edit_hide').innerHTML, | |
}); | |
}); | |
let nextPageLink = document.querySelector('#nextpagelink'); | |
if (!nextPageLink) { | |
console.save('result.json', { ...posts }); | |
return; | |
} | |
next = nextPageLink.firstChild.getAttribute('href'); | |
scrap(); | |
}; | |
scrap(); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const posts = [...Object.values(JSON.parse(fs.readFileSync('./result.json', 'utf-8'))).map(post => ({ ...post, date: new Date(post.date) }))]; | |
console.log('Posts: ', posts.length); | |
for (let year = 2011; year < 2020; ++year) | |
console.log(` ${year}: ${posts.filter(({ date }) => date.getFullYear() === year).length}`); | |
console.log('Links:'); | |
posts.map(({ post }) => /href=\"(.+?)">/g.exec(post)).filter(x => x !== null).map(x => x[1]).sort().forEach(x => console.log(' ' + x)); | |
console.log('Terms:'); | |
let words = [].concat.apply([], posts.map(({ post }) => post.split(' '))); | |
let frequency = words | |
.filter((word) => word !== '') | |
.reduce((count, word) => { | |
count[word] = (count[word] || 0) + 1; | |
return count; | |
}, {}); | |
Object.keys(frequency) | |
.sort((a, b) => frequency[b] - frequency[a]) | |
.slice(0, 200) | |
.forEach((key) => console.log(' ' + key + ': ' + frequency[key])); | |
console.log('Users:'); | |
let users = [].concat.apply([], posts.map(x => x.user)); | |
frequency = users | |
.reduce((count, user) => { | |
count[user] = (count[user] || 0) + 1; | |
return count; | |
}, {}); | |
Object.keys(frequency) | |
.sort((a, b) => frequency[b] - frequency[a]) | |
.slice(0, 30) | |
.forEach((key) => console.log(' ' + key + ': ' + frequency[key])); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Posts: 1052 | |
2011: 841 | |
2012: 145 | |
2013: 55 | |
2014: 10 | |
2015: 1 | |
2016: 0 | |
2017: 0 | |
2018: 0 | |
2019: 0 | |
Links: | |
http://adzicents.net/portal2/portal2%202011-04-24%2017-45-11-05.png | |
http://adzicents.net/portal2/portal2%202011-04-24%2021-49-48-28.png | |
http://blip.tv/file/5046980 | |
http://cs.rin.ru/forum/viewtopic.php?f=10&t=52701 | |
http://cs.rin.ru/forum/viewtopic.php?f=30&t=54888 | |
http://cs.rin.ru/forum/viewtopic.php?f=30&t=54888 | |
http://dl.dropbox.com/u/45123651/sr-port2cf.rar | |
http://dl.dropbox.com/u/4533383/CubePreservation_HQ.mp4 | |
http://dl.dropbox.com/u/4533383/minorskip_sp_a2_bts1_HQ.mp4 | |
http://dl.dropbox.com/u/4533383/minorskip_sp_a2_bts1_HQ.mp4 | |
http://dl.dropbox.com/u/4533383/minorskip_sp_a2_bts1_HQ.mp4 | |
http://dl.dropbox.com/u/4533383/promising_HQ.mp4 | |
http://dl.dropbox.com/u/4533383/promising_HQ.mp4 | |
http://dl.dropbox.com/u/4533383/wtflevitatingcube_HQ.mp4 | |
http://dl.dropbox.com/u/5013851/portal%202/route4.dem | |
http://forum.speeddemosarchive.com/post/demonstrates_portal_done_pro.html | |
http://forum.speeddemosarchive.com/post/demonstrates_portal_done_pro.html | |
http://forum.speeddemosarchive.com/post/portal_223_102.html | |
http://forum.speeddemosarchive.com/post/portal_223_102.html | |
http://forum.speeddemosarchive.com/post/portal_223_102.html | |
http://forum.speeddemosarchive.com/post/portal_223_254.html | |
http://forum.speeddemosarchive.com/post/portal_223_480.html | |
http://forum.speeddemosarchive.com/post/portal_2__november_7th_2012.html | |
http://forum.speeddemosarchive.com/post/single_segment_with_resets_now_accepted_186.html | |
http://forums.steampowered.com/forums/showthread.php?t=1849054 | |
http://i.imgur.com/hE78s.png | |
http://i43.tinypic.com/20fb3n6.png | |
http://imageshack.us/photo/my-images/220/goonsquad1.jpg/ | |
http://imageshack.us/photo/my-images/542/myusernames.png/ | |
http://img153.imageshack.us/img153/8682/spa3010001.jpg | |
http://img543.imageshack.us/img543/6613/spa2bts20000.jpg | |
http://img807.imageshack.us/img807/7726/spa3010002.jpg | |
http://img815.imageshack.us/img815/639/52192416.jpg | |
http://img853.imageshack.us/img853/6192/spa3010000.jpg | |
http://imgur.com/oA9ED | |
http://sourceruns.org/index.php | |
http://sourceruns.org/index.php | |
http://sourceruns.org/index.php | |
http://sourceruns.org/index.php?board=15.0 | |
http://sourceruns.org/index.php?board=15.0 | |
http://sourceruns.org/index.php?board=15.0 | |
http://sourceruns.org/index.php?board=17.0 | |
http://sourceruns.org/index.php?board=17.0 | |
http://sourceruns.org/index.php?board=17.0 | |
http://sourceruns.org/index.php?board=17.0 | |
http://sourceruns.org/index.php?board=17.0 | |
http://sourceruns.org/index.php?topic=473.msg6746#msg6746 | |
http://sourceruns.org/index.php?topic=473.msg6746#msg6746 | |
http://sourceruns.org/index.php?topic=473.msg6746#msg6746 | |
http://sourceruns.org/index.php?topic=475.msg6966#msg6966 | |
http://sourceruns.org/index.php?topic=475.msg6966#msg6966 | |
http://sourceruns.org/index.php?topic=475.msg6966#msg6966 | |
http://sourceruns.org/index.php?topic=566.0 | |
http://speeddemosarchive.com/kb/SDA_Strategy_Wiki | |
http://steamcommunity.com/id/adzicents | |
http://steamcommunity.com/id/rogotin | |
http://steamcommunity.com/profiles/76561197972228601 | |
http://steamcommunity.com/profiles/76561197972228601 | |
http://techforum4u.com/forumdisplay.php/58-SMF-Simple-Machines-Forum | |
http://up.k10x.net/xpddezgpkjdkg/trick1.dem | |
http://up.k10x.net/xpddezgpkjdkg/trick1.dem | |
http://up.k10x.net/xpddezgpkjdkg/trick1.dem | |
http://up.k10x.net/xqnzoaskxvgiz/sp_a4_finale2_01.dem | |
http://up.k10x.net/xqnzoaskxvgiz/sp_a4_finale2_01.dem | |
http://webchat.quakenet.org | |
http://www.adzicents.net/portal2/wiki/ | |
http://www.adzicents.net/portal2/wiki/doku.php | |
http://www.adzicents.net/portal2/wiki/doku.php?id=Home | |
http://www.amazon.com/Portal-2-Pc/dp/B002I0JIQW/ref=br_lf_m_1000208101_1_3_ttl?t=slicinc-20&tag=slicinc-20&ie=UTF8&m=ATVPDKIKX0DER&s=videogames&pf_rd_p=1295543582&pf_rd_s=center-2&pf_rd_t=1401&pf_rd_i=1000208101&pf_rd_m=ATVPDKIKX0DER&pf_rd_r=1ZDCDNA0CHJJ8NGPJZ6H | |
http://www.livestream.com/bkbroadcasting | |
http://www.livestream.com/transgenic | |
http://www.logitech.com/en-us/mice-pointers/mice/devices/5750 | |
http://www.mediafire.com/?687r464goy6l00q | |
http://www.mediafire.com/?a686f9r419e8706 | |
http://www.mediafire.com/?cu00ghdhgzrsfmq | |
http://www.megaupload.com/?d=SN5D7QN0 | |
http://www.megaupload.com/?d=SN5D7QN0 | |
http://www.megaupload.com/?d=SN5D7QN0 | |
http://www.megaupload.com/?d=SN5D7QN0 | |
http://www.newegg.com/Product/Product.aspx?Item=N82E16823109191 | |
http://www.twitch.tv/znernicus/b/323603752 | |
http://www.youtube.com/playlist?p=PLD3CDFF0B75369CA8 | |
http://www.youtube.com/playlist?p=PLD3CDFF0B75369CA8 | |
http://www.youtube.com/user/ChaoThing?feature=mhum | |
http://www.youtube.com/user/SoftlyAdverse#grid/user/11E8F87FB6D48568 | |
http://www.youtube.com/watch?v=-m_vwjlwkZI | |
http://www.youtube.com/watch?v=-yduLqf4jF4 | |
http://www.youtube.com/watch?v=6XzR5wopFQQ | |
http://www.youtube.com/watch?v=7Yk-vIwbgPA | |
http://www.youtube.com/watch?v=8Pb4u38TPCc | |
http://www.youtube.com/watch?v=98CG1OqFQlc | |
http://www.youtube.com/watch?v=9Uy4b1JFT4E | |
http://www.youtube.com/watch?v=E1AE2GB6Aho | |
http://www.youtube.com/watch?v=EeYf-K0rGdw | |
http://www.youtube.com/watch?v=FQfIelYDtW8 | |
http://www.youtube.com/watch?v=MDevhzgWozY | |
http://www.youtube.com/watch?v=MKFreE4cXzw | |
http://www.youtube.com/watch?v=MmuGpuv8RMg | |
http://www.youtube.com/watch?v=Pt8L9AQ30vI | |
http://www.youtube.com/watch?v=QRwJ3NTNa8Y | |
http://www.youtube.com/watch?v=Qty0Rv_cQNI | |
http://www.youtube.com/watch?v=R9p850eVNDo | |
http://www.youtube.com/watch?v=XY50yk7oodk | |
http://www.youtube.com/watch?v=Yk0T4aSSxIo | |
http://www.youtube.com/watch?v=ZziFnNroWEA | |
http://www.youtube.com/watch?v=ZziFnNroWEA | |
http://www.youtube.com/watch?v=ZziFnNroWEA | |
http://www.youtube.com/watch?v=fMVsn-8ozNw | |
http://www.youtube.com/watch?v=fRQphlshpds | |
http://www.youtube.com/watch?v=g8pa1-Pt5ho | |
http://www.youtube.com/watch?v=iczE214fG-8 | |
http://www.youtube.com/watch?v=ijMgrpwhmBY | |
http://www.youtube.com/watch?v=kHGtqyWYk64&NR=1 | |
http://www.youtube.com/watch?v=kznj0Zs7b8k | |
http://www.youtube.com/watch?v=lGtaB-m7zmg | |
http://www.youtube.com/watch?v=pqIhtlz6DC4 | |
http://www.youtube.com/watch?v=qQuWxzpsQrs | |
http://www.youtube.com/watch?v=rm_qd...ature=youtu.be | |
http://www.youtube.com/watch?v=sDhRiVK5EKE&feature=g-upl&context=G2dc8befAUAAAAAAAAAA | |
http://www.youtube.com/watch?v=wDzm4v5r6uY | |
http://www.youtube.com/watch?v=wbsKQiyc5go | |
http://www.youtube.com/watch?v=y99h6JwJ1pY | |
http://youtu.be/oNH0s_Dv38Q#t=1m1s | |
http://youtu.be/oNH0s_Dv38Q#t=1m1s | |
http://youtu.be/oNH0s_Dv38Q#t=1m1s | |
http://zalil.ru/30922927 | |
https://bitbucket.org/VoiDeD/steamre/src/6af5ab95757c/Projects/DepotDownloader/DepotDownloader | |
https://bitbucket.org/VoiDeD/steamre/src/6af5ab95757c/Projects/DepotDownloader/DepotDownloader | |
https://bitbucket.org/VoiDeD/steamre/src/6af5ab95757c/Projects/DepotDownloader/DepotDownloader | |
https://support.steampowered.com/view.php?ticketref=6931-WOKB-4412 | |
https://support.steampowered.com/view.php?ticketref=6931-WOKB-4412 | |
https://support.steampowered.com/view.php?ticketref=6931-WOKB-4412 | |
https://youtu.be/2aIgFpJstro | |
https://youtu.be/2aIgFpJstro | |
https://youtu.be/2aIgFpJstro | |
steam://friends/add/76561197969042354 | |
Terms: | |
the: 3384 | |
to: 2011 | |
a: 1386 | |
I: 1342 | |
you: 1190 | |
it: 1032 | |
of: 959 | |
and: 891 | |
is: 831 | |
that: 774 | |
in: 744 | |
on: 638 | |
from: 614 | |
for: 543 | |
this: 518 | |
be: 483 | |
but: 445 | |
class="youtube-player": 429 | |
type="text/html": 429 | |
height="295": 429 | |
allowfullscreen="": 429 | |
frameborder="0": 429 | |
width="480": 429 | |
class="quotearea"><div: 413 | |
class="quotelabel">Quote: 394 | |
with: 393 | |
if: 369 | |
just: 353 | |
can: 328 | |
have: 327 | |
not: 311 | |
<div: 306 | |
as: 300 | |
get: 294 | |
so: 294 | |
at: 289 | |
target="_blank": 280 | |
do: 276 | |
portal: 251 | |
are: 244 | |
was: 234 | |
my: 229 | |
I'm: 225 | |
your: 220 | |
it's: 215 | |
glitch: 203 | |
would: 199 | |
or: 197 | |
one: 192 | |
don't: 189 | |
up: 185 | |
will: 178 | |
like: 176 | |
some: 175 | |
way: 174 | |
cube: 172 | |
think: 165 | |
out: 164 | |
an: 162 | |
by: 161 | |
through: 161 | |
how: 158 | |
game: 154 | |
time: 152 | |
because: 148 | |
-: 147 | |
what: 145 | |
than: 145 | |
when: 143 | |
about: 141 | |
me: 140 | |
all: 134 | |
then: 132 | |
Chapter: 132 | |
route: 128 | |
first: 127 | |
2: 124 | |
video: 124 | |
into: 124 | |
sure: 123 | |
use: 123 | |
Portal: 122 | |
could: 122 | |
make: 121 | |
found: 121 | |
there: 120 | |
run: 119 | |
really: 118 | |
If: 115 | |
It: 115 | |
i: 114 | |
jump: 113 | |
The: 112 | |
other: 109 | |
<img: 108 | |
using: 108 | |
save: 107 | |
faster: 107 | |
know: 106 | |
wall: 105 | |
you're: 105 | |
no: 103 | |
only: 102 | |
should: 101 | |
see: 101 | |
any: 100 | |
Chamber: 97 | |
actually: 97 | |
without: 96 | |
<a: 95 | |
been: 94 | |
which: 93 | |
where: 92 | |
has: 91 | |
trick: 90 | |
button: 90 | |
much: 90 | |
already: 89 | |
it.: 88 | |
more: 87 | |
done: 87 | |
did: 85 | |
possible: 85 | |
still: 85 | |
go: 85 | |
map: 85 | |
You: 83 | |
work: 82 | |
version: 81 | |
can't: 81 | |
we: 80 | |
speed: 80 | |
they: 80 | |
pretty: 76 | |
new: 75 | |
need: 74 | |
I've: 73 | |
level: 73 | |
doesn't: 73 | |
last: 73 | |
even: 72 | |
before: 72 | |
after: 71 | |
same: 70 | |
off: 70 | |
them: 66 | |
This: 66 | |
want: 65 | |
doing: 64 | |
able: 63 | |
going: 63 | |
had: 63 | |
over: 63 | |
here: 63 | |
find: 62 | |
might: 62 | |
back: 60 | |
trying: 59 | |
does: 59 | |
portals: 59 | |
something: 58 | |
made: 58 | |
being: 57 | |
crouch: 57 | |
end: 56 | |
skip: 56 | |
another: 56 | |
=: 56 | |
down: 55 | |
since: 55 | |
load: 55 | |
people: 55 | |
post: 55 | |
chamber: 55 | |
someone: 55 | |
while: 54 | |
two: 54 | |
good: 54 | |
too: 54 | |
class="quotetext"><div: 54 | |
right: 54 | |
start: 53 | |
posted: 53 | |
it,: 53 | |
lot: 53 | |
also: 52 | |
co-op: 52 | |
hard: 52 | |
next: 52 | |
probably: 51 | |
may: 50 | |
there's: 50 | |
src="/file/Pz%2Fcj6ngNFMHn%2BnxTNlCutk1Ef8": 50 | |
isn't: 50 | |
why: 50 | |
different: 49 | |
stuff: 49 | |
I'll: 49 | |
haven't: 48 | |
flying: 48 | |
Users: | |
DemonStrate: 103 | |
adzicents: 76 | |
coolkid: 65 | |
Paraxade: 65 | |
Kingpin: 47 | |
00Svo: 43 | |
ExplodingCabbage: 43 | |
Scepheo: 33 | |
NabsterHax: 29 | |
z1mb0bw4y: 27 | |
ncla: 24 | |
bandit5k: 23 | |
znernicus: 16 | |
toten: 15 | |
forever: 14 | |
TTom: 14 | |
RøvRapunzel: 13 | |
DJell: 13 | |
S.: 13 | |
Pmk138: 12 | |
Negatratoron: 10 | |
Laufas: 10 | |
NoPegs: 10 | |
Rogo_Pro_Venia: 9 | |
Fed981: 9 | |
logitechSDAZ: 9 | |
Spyrunite: 9 | |
Blazier: 9 | |
romscout: 8 | |
Inexistence: 8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment