Created
May 27, 2022 13:15
-
-
Save elazar/0911faf8fb72de9fb4f19645f848726d to your computer and use it in GitHub Desktop.
Twitter Follower Crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// https://developer.twitter.com/en/docs/twitter-api/getting-started/getting-access-to-the-twitter-api | |
define('TWITTER_BEARER_TOKEN', '...'); | |
date_default_timezone_set('America/Chicago'); | |
function sendRequest($path) | |
{ | |
$context = stream_context_create([ | |
'http' => [ | |
'header' => 'Authorization: Bearer ' . TWITTER_BEARER_TOKEN, | |
], | |
]); | |
$url = 'https://api.twitter.com/2' . $path; | |
$response = @file_get_contents($url, false, $context); | |
$headers = []; | |
foreach (array_slice($http_response_header, 1) as $header) { | |
preg_match('/^([^:]+): (.+)$/', $header, $match); | |
$headers[$match[1]] = $match[2]; | |
} | |
if ($headers['x-rate-limit-remaining'] === '0') { | |
$end = (int) $headers['x-rate-limit-reset']; | |
$wait = $end - time(); | |
echo 'Waiting until ', date('h:i:s A', $end), PHP_EOL; | |
sleep($wait); | |
} | |
return json_decode($response); | |
} | |
function convertApiUserToDbUser(object $apiUser) | |
{ | |
return (object) [ | |
'id' => $apiUser->id, | |
'username' => $apiUser->username, | |
'followers_count' => $apiUser->public_metrics->followers_count, | |
]; | |
} | |
function getUserByUsername($username) | |
{ | |
$decoded = sendRequest("/users/by/username/$username?user.fields=public_metrics"); | |
return convertApiUserToDbUser($decoded->data); | |
} | |
function getUserFollowers($userId) | |
{ | |
$requests = 0; | |
do { | |
echo 'Sending request ', ++$requests, PHP_EOL; | |
$path = "/users/$userId/followers?max_results=1000&user.fields=public_metrics"; | |
if (isset($nextToken)) { | |
$path .= "&pagination_token=$nextToken"; | |
} | |
$decoded = sendRequest($path); | |
if (isset($decoded->errors)) { | |
echo 'Error: ', $decoded->errors[0]->detail, PHP_EOL; | |
$requests = 0; | |
$start = null; | |
break; | |
} | |
foreach ($decoded->data as $follower) { | |
yield convertApiUserToDbUser($follower); | |
} | |
$nextToken = $decoded->meta->next_token ?? null; | |
} while ($nextToken); | |
} | |
$pdo = new PDO('sqlite:' . __DIR__ . '/TwitterCrawler.sq3'); | |
$pdo->exec(' | |
CREATE TABLE IF NOT EXISTS users ( | |
id TEXT PRIMARY KEY, | |
username TEXT UNIQUE, | |
followers_count INT | |
); | |
CREATE TABLE IF NOT EXISTS followers ( | |
followee_user_id TEXT, | |
follower_user_id TEXT, | |
PRIMARY KEY (followee_user_id, follower_user_id) | |
); | |
'); | |
$userStmt = $pdo->prepare(' | |
REPLACE INTO users ( | |
id, | |
username, | |
followers_count | |
) | |
VALUES ( | |
:id, | |
:username, | |
:followers_count | |
) | |
'); | |
$followerStmt = $pdo->prepare(' | |
REPLACE INTO followers ( | |
followee_user_id, | |
follower_user_id | |
) | |
VALUES ( | |
:followee_user_id, | |
:follower_user_id | |
) | |
'); | |
$usersStmt = $pdo->query(' | |
SELECT | |
id, | |
username, | |
followers_count | |
FROM | |
users | |
WHERE | |
id IN ( | |
SELECT | |
id | |
FROM ( | |
SELECT | |
users.id, | |
COUNT(*) AS fetched_followers_count | |
FROM | |
users | |
LEFT JOIN | |
followers ON (users.id = followers.followee_user_id) | |
GROUP BY | |
users.id | |
HAVING | |
users.followers_count > fetched_followers_count | |
) | |
) | |
'); | |
$user = getUserByUsername("truthlafayette"); | |
$userStmt->execute((array) $user); | |
$usersStmt->execute(); | |
$users = $usersStmt->fetchAll(PDO::FETCH_OBJ); | |
foreach ($users as $user) { | |
echo 'Processing user ', $user->username, PHP_EOL; | |
$userStmt->execute([ | |
'id' => $user->id, | |
'username' => $user->username, | |
'followers_count' => $user->followers_count, | |
]); | |
foreach (getUserFollowers($user->id) as $follower) { | |
$userStmt->execute([ | |
'id' => $follower->id, | |
'username' => $follower->username, | |
'followers_count' => $follower->followers_count, | |
]); | |
$followerStmt->execute([ | |
'followee_user_id' => $user->id, | |
'follower_user_id' => $follower->id, | |
]); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$pdo = new PDO('sqlite:' . __DIR__ . '/TwitterCrawler.sq3'); | |
$results = $pdo->query(' | |
SELECT | |
users_followees.username AS followee, | |
users_followers.username AS follower | |
FROM | |
(SELECT | |
* | |
FROM | |
followers | |
WHERE | |
follower_user_id IN ( | |
SELECT DISTINCT | |
followee_user_id | |
FROM | |
followers | |
) | |
) AS followers | |
JOIN | |
users AS users_followees ON (users_followees.id = followers.followee_user_id) | |
JOIN | |
users AS users_followers ON (users_followers.id = followers.follower_user_id) | |
'); | |
$fp = fopen(__DIR__ . '/TwitterDot.dot', 'w'); | |
fwrite($fp, "digraph {\n"); | |
while ($result = $results->fetchObject()) { | |
fwrite($fp, "\"{$result->follower}\" -> \"{$result->followee}\"\n"); | |
} | |
fwrite($fp, '}'); | |
fclose($fp); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
dot -v -Goverlap=scale -Tpng TwitterDot.dot > TwitterDot.png |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment