Created
November 28, 2022 22:02
-
-
Save krusynth/124f28b1546f08e3da4ddad921867ede to your computer and use it in GitHub Desktop.
Mastodon scraper to get all of the folks followed by folks that you follow (your second-degree followers). Download your following_accounts.csv and put it in the same folder as this script. DO NOT RUN THIS if you follow lots (1000+) folks!!! That would be rude.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const readline = require('readline'); | |
const https = require('https'); | |
const reqOptions = { | |
timeout: 1000, | |
}; | |
function get(url) { | |
// console.log(url); | |
return new Promise((resolve, reject) => { | |
const request = https.request(url, reqOptions, (response) => { | |
let data = ''; | |
response.on('data', (chunk) => { | |
data = data + chunk.toString(); | |
}); | |
response.on('end', () => { | |
try { | |
const body = JSON.parse(data); | |
resolve(body); | |
} | |
catch(error) { | |
reject(error); | |
} | |
}); | |
}) | |
request.on('error', (error) => { | |
reject(error); | |
}); | |
request.end(); | |
}); | |
} | |
function write(stream, output) { | |
let line = output.join(',') + "\n"; | |
return stream.write(line); | |
} | |
async function run() { | |
const inputStream = fs.createReadStream('following_accounts.csv'); | |
const rl = readline.createInterface({ | |
input: inputStream, | |
crlfDelay: Infinity | |
}); | |
let userList = []; | |
for await (const line of rl) { | |
let username = line.split(',')[0]; | |
userList.push(username); | |
} | |
// Remove headers. | |
userList.shift(); | |
console.log(userList); | |
let fileArgs = {}; | |
let followfollowStream = fs.createWriteStream('followfollows.csv', fileArgs); | |
let userdata = {}; | |
let follows = {}; | |
for(const username of userList) { | |
let [user, service] = username.split('@'); | |
const lookupUrl = `https://${service}/api/v1/accounts/lookup?acct=@${user}`; | |
let info; | |
try { | |
info = await get(lookupUrl); | |
} catch(error) { | |
console.log('error', error); | |
continue; | |
} | |
if(info.fields && info.fields.length) { | |
info.link1 = info.fields[0]; | |
info.link2 = info.fields[1]; | |
info.link3 = info.fields[2]; | |
info.link4 = info.fields[3]; | |
} | |
delete info.fields; | |
console.log('Getting ', info.display_name); | |
const followUrl = `https://${service}/api/v1/accounts/${info.id}/following`; | |
info.following = []; | |
let following; | |
try { | |
following = await get(followUrl); | |
if(!following || typeof following[Symbol.iterator] !== 'function') { | |
continue; | |
} | |
} catch(error) { | |
console.log('error', error); | |
continue; | |
} | |
for(const follow of following) { | |
let [namePart, serverPart] = follow.acct.split('@'); | |
if(!serverPart) { | |
serverPart = service; | |
} | |
let temp = { | |
'display_name': follow.display_name.replace(',', ' '), | |
'acct': follow.acct, | |
'url': `https://${serverPart}/@${namePart}` | |
}; | |
// We don't do anything with this data at present. | |
// info.following.push(temp); | |
if(!follows[follow.acct]) { | |
follows[follow.acct] = temp; | |
follows[follow.acct].count = 1; | |
} | |
else { | |
follows[follow.acct].count++; | |
} | |
} | |
} | |
// Flatten | |
follows = Object.values(follows); | |
follows.sort((a,b) => { | |
return b.count - a.count; | |
}); | |
write(followfollowStream, ['Name', 'Account', 'Url', 'Count']); | |
for(const follow of follows) { | |
write(followfollowStream, [follow.display_name, follow.acct, follow.url, follow.count]); | |
} | |
return Promise.resolve(); | |
} | |
run().then(() => { | |
console.log('done.'); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment