Last active
December 14, 2022 16:49
-
-
Save codemasher/67ba24cee88029a3278c87ff9a0095ba to your computer and use it in GitHub Desktop.
Fetch your twitter timeline via the unofficial adaptive search API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Twitter timeline backup | |
* | |
* Required: | |
* - PHP 8.1+ | |
* - cURL extension enabled | |
* - Windows: | |
* - download https://windows.php.net/downloads/releases/php-8.1.12-Win32-vs16-x64.zip (or a newer available version) | |
* - unzip to a folder of your choice | |
* - copy/rename the php.ini-development to php.ini | |
* - open the php.ini in an editor: | |
* - search for 'extension=curl', uncomment this line (remove the semicolon) | |
* - search for 'extension=openssl', uncomment this line | |
* - search for 'extension_dir', uncomment this line (under "on windows") | |
* - it might be necessary to provide a ca file for openSSL | |
* - download cacert.pem from https://curl.haxx.se/ca/cacert.pem | |
* - search for ';curl.cainfo', uncomment this line and add "c:\path\to\cacaert.pem" | |
* - search for ';openssl.cafile', uncomment this line and add the same path to the cacert.pem as above | |
* - Linux: https://www.digitalocean.com/community/tutorials/how-to-install-php-8-1-and-set-up-a-local-development-environment-on-ubuntu-22-04 | |
* - apt-add-repository ppa:ondrej/php -y | |
* - apt-get update | |
* - apt-get install -y php8.1-cli php8.1-common php8.1-curl | |
* | |
* Run the script in the console: | |
* | |
* - Windows: C:\path\to\php\php.exe C:\path\to\script\timeline.php | |
* - Linux: php /path/to/script/timeline.php | |
* | |
* @see https://github.com/pauldotknopf/twitter-dump | |
* | |
* @created 17.11.2022 | |
* @author smiley <[email protected]> | |
* @copyright 2022 smiley | |
* @license MIT | |
*/ | |
/* | |
* How to get the request token: | |
* | |
* - open https://twitter.com/search in a webbrowser (chrome or firefox recommended) | |
* - open the developer console (press F12) | |
* - type anything in the twitter search box, hit enter | |
* - go to the "network" tab in the dev console and filter the requests for "adaptive.json" | |
* - click that line, a new tab for that request appears | |
* - there, in the "headers" tab, scroll to "request headers" and look for "Authorization: Bearer ..." | |
* - right click that line, select "copy value" and paste it below, should look like: 'Bearer AAAANRILgAAAAAAnNwI...' | |
*/ | |
$token = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs=1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'; | |
/* | |
* The search query | |
* | |
* @see https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query | |
* @see https://help.twitter.com/en/using-twitter/advanced-tweetdeck-features | |
* | |
* try: | |
* - "@username" timeline including replies | |
* - "@username include:nativeretweets filter:nativeretweets" for RTs (returns RTs of the past week only) | |
* - "to:username" for @mentions and replies | |
*/ | |
$query = 'from:dril'; | |
/* | |
* continue/run from stored responses, useful if the run gets interrupted for whatever reason | |
*/ | |
$fromFile = true; | |
/* | |
* the storage path for the raw responses, a different directory per query is recommended | |
*/ | |
$dir = __DIR__.'/from-dril'; | |
/* | |
* JSON output flags | |
* | |
* @see https://www.php.net/manual/en/json.constants.php | |
*/ | |
$jsonFlags = JSON_THROW_ON_ERROR|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT; | |
/* ==================== stop editing here ===================== */ | |
if(!file_exists($dir)){ | |
mkdir(directory: $dir, recursive: true); | |
} | |
$dir = realpath($dir); | |
$timelineJSON = sprintf('%s/%s.json', $dir, md5($query)); | |
$userJSON = sprintf('%s/%s-users.json', $dir, md5($query)); | |
[$timeline, $users] = getTimeline($query, $fromFile); | |
$tl = json_encode($timeline, $jsonFlags); | |
$ul = json_encode($users, $jsonFlags); | |
file_put_contents($timelineJSON, $tl); | |
file_put_contents($userJSON, $ul); | |
echo sprintf("timeline data for '%s' saved in: %s\n", $query, realpath($timelineJSON)); | |
echo sprintf("user data saved in: %s\n", realpath($userJSON)); | |
// verify readability/decoding | |
$tl = json_decode(file_get_contents($timelineJSON), true, 512, JSON_THROW_ON_ERROR); | |
$ul = json_decode(file_get_contents($userJSON), true, 512, JSON_THROW_ON_ERROR); | |
echo sprintf("fetched %s tweets from %s users\n", count($tl), count($ul)); | |
exit; | |
/* ===================== here be dragons ====================== */ | |
/** | |
* retrieves the timeline for the given query and parese the response data | |
*/ | |
function getTimeline(string $query, bool $fromFile = false):array{ | |
global $dir; | |
$tweets = []; | |
$users = []; | |
$timeline = []; | |
$lastCursor = ''; | |
$count = 0; | |
while(true){ | |
$filename = sprintf('%s/%s-%d.json', $dir, md5($query), $count); | |
if($fromFile && file_exists($filename)){ | |
$response = file_get_contents($filename); | |
} | |
else{ | |
[$response, $status, $headers] = search($query, $lastCursor); | |
// rate limit hit (doesn't seem to happen?) | |
if($status === 429){ | |
# var_dump($headers); // @todo: examine headers if x-rate-limit-reset is set | |
// just sleep for a bit | |
sleep(10); | |
continue; | |
} | |
elseif($status !== 200 || empty($response)){ | |
break; | |
} | |
file_put_contents($filename, $response); | |
} | |
if(!parseResponse($response, $tweets, $users, $timeline, $lastCursor)){ | |
break; | |
} | |
echo sprintf("[%s] fetched data for '%s', cursor: %s\n", $count, $query, $lastCursor); | |
$count++; | |
if(empty($lastCursor)){ | |
break; | |
} | |
if(!$fromFile){ | |
sleep(2); | |
} | |
} | |
foreach($timeline as $id => &$v){ | |
$tweet = $tweets[$id]; | |
if($tweet['quoted_status_id'] !== null && isset($tweets[$tweet['quoted_status_id']])){ | |
$tweet['quoted_status'] = $tweets[$tweet['quoted_status_id']]; | |
} | |
$v = $tweet; | |
} | |
return [$timeline, $users]; | |
} | |
/** | |
* parse the API response and fill the data arrays (passed by reference) | |
*/ | |
function parseResponse(string $response, array &$tweets, array &$users, array &$timeline, string &$cursor):bool{ | |
try{ | |
$json = json_decode(json: $response, flags: JSON_THROW_ON_ERROR); | |
} | |
catch(Throwable $e){ | |
# var_dump($response); // @todo: handle json error | |
return false; | |
} | |
if(!isset($json->globalObjects->tweets, $json->globalObjects->users, $json->timeline->instructions)){ | |
return false; | |
} | |
if(empty((array)$json->globalObjects->tweets)){ | |
return false; | |
} | |
foreach($json->globalObjects->tweets as $tweet){ | |
$tweets[$tweet->id_str] = parseTweet($tweet); | |
} | |
foreach($json->globalObjects->users as $user){ | |
$users[$user->id_str] = parseUser($user); | |
} | |
foreach($json->timeline->instructions as $i){ | |
if(isset($i->addEntries->entries)){ | |
foreach($i->addEntries->entries as $instruction){ | |
if(str_starts_with($instruction->entryId, 'sq-I-t')){ | |
$timeline[$instruction->content->item->content->tweet->id] = null; | |
} | |
elseif($instruction->entryId === 'sq-cursor-bottom'){ | |
$cursor = $instruction->content->operation->cursor->value; | |
} | |
} | |
} | |
elseif(isset($i->replaceEntry->entryIdToReplace) && $i->replaceEntry->entryIdToReplace === 'sq-cursor-bottom'){ | |
$cursor = $i->replaceEntry->entry->content->operation->cursor->value; | |
} | |
else{ | |
$cursor = ''; | |
} | |
} | |
return true; | |
} | |
/** | |
* fetch data from the adaptive search API | |
* | |
* @see https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query | |
* @see https://developer.twitter.com/en/docs/twitter-api/tweets/search/introduction | |
*/ | |
function search(string $query, string $cursor = null):array{ | |
// the query parameters from the call to https://twitter.com/i/api/2/search/adaptive.json in original order | |
$params = [ | |
'include_profile_interstitial_type' => '1', | |
'include_blocking' => '1', | |
'include_blocked_by' => '1', | |
'include_followed_by' => '1', | |
'include_want_retweets' => '1', | |
'include_mute_edge' => '1', | |
'include_can_dm' => '1', | |
'include_can_media_tag' => '1', | |
'include_ext_has_nft_avatar' => '1', | |
'include_ext_is_blue_verified' => '1', | |
'skip_status' => '1', | |
'cards_platform' => 'Web-12', | |
'include_cards' => '1', | |
'include_ext_alt_text' => 'true', | |
'include_ext_limited_action_results' => 'false', | |
'include_quote_count' => 'true', | |
'include_reply_count' => '1', | |
'tweet_mode' => 'extended', | |
'include_ext_collab_control' => 'true', | |
'include_entities' => 'true', | |
'include_user_entities' => 'true', | |
'include_ext_media_color' => 'false', | |
'include_ext_media_availability' => 'true', | |
'include_ext_sensitive_media_warning' => 'true', | |
'include_ext_trusted_friends_metadata' => 'true', | |
'send_error_codes' => 'true', | |
'simple_quoted_tweet' => 'true', | |
'q' => $query, | |
# 'social_filter' =>'searcher_follows', // @todo | |
'tweet_search_mode' => 'live', | |
'count' => '100', | |
'query_source' => 'typed_query', | |
'cursor' => $cursor, | |
'pc' => '1', | |
'spelling_corrections' => '1', | |
'include_ext_edit_control' => 'true', | |
'ext' => 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe', | |
]; | |
// remove the cursor parameter if it's empty | |
if(empty($params['cursor'])){ | |
unset($params['cursor']); | |
} | |
return request('https://api.twitter.com/2/search/adaptive.json', $params); | |
} | |
/** | |
* set up and fire http requests | |
*/ | |
function request(string $url, array $params = null):array{ | |
global $token; | |
// add the query string if we have parameters given | |
if(!empty($params)){ | |
$url .= '?'.http_build_query(data: $params, encoding_type: PHP_QUERY_RFC3986); | |
} | |
// set up the stream context to add a header and user agent | |
$contextOptions = [ | |
'http' => [ | |
'method' => 'GET', | |
'header' => ['Authorization: '.$token], | |
'protocol_version' => '1.1', // 1.1 is default from PHP 8.0 | |
'user_agent' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)', | |
'max_redirects' => 0, | |
'timeout' => 5, | |
] | |
]; | |
// fire the request | |
$context = stream_context_create($contextOptions); | |
$response = file_get_contents(filename: $url, context: $context); | |
$responseHeaders = get_headers($url, true, $context); | |
[$version, $status, $statustext] = explode(' ', $responseHeaders[0], 3); | |
unset($responseHeaders[0]); | |
return [$response, intval($status), $responseHeaders]; | |
} | |
/** | |
* parse/clean/flatten a tweet object | |
* | |
* @see https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet | |
*/ | |
function parseTweet(object $tweet):array{ | |
$text = $tweet->full_text ?? $tweet->text ?? ''; | |
$mediaItems = []; | |
foreach($tweet->entities->urls ?? [] as $entity){ | |
$text = str_replace($entity->url, $entity->expanded_url ?? $entity->url ?? '', $text); | |
} | |
foreach($tweet->entities->media ?? [] as $media){ | |
// we'll just remove the shortened media URL as it is of no use | |
$text = str_replace($media->url, '', $text); | |
$mediaItems[] = parseMedia($media); | |
} | |
return [ | |
'id' => (int)$tweet->id, | |
'user_id' => (int)($tweet->user_id ?? $tweet->author_id ?? $tweet->user->id ?? 0), | |
'user' => null, // isset($tweet->user) ? parseUser($tweet->user) : null, | |
'created_at' => strtotime($tweet->created_at), | |
'text' => $text, | |
'source' => $tweet->source, | |
'retweet_count' => (int)($tweet->retweet_count ?? $tweet->public_metrics->retweet_count ?? 0), | |
'favorite_count' => (int)($tweet->favorite_count ?? $tweet->public_metrics->like_count ?? 0), | |
'reply_count' => (int)($tweet->reply_count ?? $tweet->public_metrics->reply_count ?? 0), | |
'quote_count' => (int)($tweet->quote_count ?? $tweet->public_metrics->quote_count ?? 0), | |
'favorited' => $tweet->favorited ?? false, | |
'retweeted' => $tweet->retweeted ?? false, | |
'possibly_sensitive' => $tweet->possibly_sensitive ?? false, | |
'in_reply_to_status_id' => $tweet->in_reply_to_status_id ?? null, | |
'in_reply_to_user_id' => $tweet->in_reply_to_user_id ?? null, | |
'in_reply_to_screen_name' => $tweet->in_reply_to_screen_name ?? null, | |
'is_quote_status' => $tweet->is_quote_status ?? false, | |
'quoted_status_id' => $tweet->quoted_status_id ?? null, | |
'quoted_status' => null, | |
'retweeted_status_id' => $tweet->retweeted_status_id ?? null, | |
'retweeted_status' => null, | |
'self_thread' => $tweet->self_thread->id ?? null, | |
'conversation_id' => $tweet->conversation_id ?? null, | |
'place' => $tweet->place ?? null, | |
'coordinates' => $tweet->coordinates ?? null, | |
'geo' => $tweet->geo ?? null, | |
'media' => $mediaItems, | |
]; | |
} | |
/** | |
* parse/clean/flatten a user object | |
* | |
* @see https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/user | |
*/ | |
function parseUser(object $user):array{ | |
foreach(['name', 'description', 'location', 'url'] as $var){ | |
${$var} = preg_replace('/\s+/', ' ', $user->{$var} ?? ''); | |
} | |
foreach($user->entities->description->urls ?? [] as $entity){ | |
$description = str_replace($entity->url, $entity->expanded_url ?? $entity->url ?? '', $description); | |
} | |
foreach($user->entities->url->urls ?? [] as $entity){ | |
$url = str_replace($entity->url, $entity->expanded_url ?? $entity->url ?? '', $url); | |
} | |
$screenName = $user->screen_name ?? $user->username; | |
$profile_image = str_replace('_normal.', '.', $user->profile_image_url_https ?? $user->profile_image_url ?? ''); | |
$profile_banner = $user->profile_banner_url ?? ''; | |
return [ | |
'id' => $user->id, | |
'screen_name' => $screenName, | |
'name' => $name, | |
'description' => $description, | |
'location' => $location, | |
'url' => $url, | |
'followers_count' => $user->followers_count ?? $user->public_metrics->followers_count ?? 0, | |
'friends_count' => $user->friends_count ?? $user->public_metrics->following_count ?? 0, | |
'statuses_count' => $user->statuses_count ?? $user->public_metrics->tweet_count ?? 0, | |
'favourites_count' => $user->favourites_count ?? 0, | |
'created_at' => strtotime($user->created_at), | |
'protected' => (bool)($user->protected ?? false), | |
'verified' => (bool)($user->verified ?? false), | |
'muting' => (bool)($user->muting ?? false), | |
'blocking' => (bool)($user->blocking ?? false), | |
'blocked_by' => (bool)($user->blocked_by ?? false), | |
'is_cryptobro' => $user->ext_has_nft_avatar ?? false, | |
'clown_emoji' => $user->ext_is_blue_verified ?? false, | |
'profile_image' => $profile_image, | |
'profile_banner' => $profile_banner, | |
]; | |
} | |
/** | |
* @see https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/media | |
*/ | |
function parseMedia(object $media):array{ | |
return [ | |
'id' => $media->id, | |
'media_key' => $media->media_key ?? null, | |
'source_user_id' => $media->source_user_id ?? null, | |
'type' => $media->type, | |
'url' => $media->media_url_https ?? $media->media_url, | |
'alt_text' => $media->ext_alt_text ?? '', | |
'possibly_sensitive' => $tweet->ext_sensitive_media_warning ?? null, | |
'width' => $media->original_info->width ?? null, | |
'height' => $media->original_info->height ?? null, | |
'variants' => $media->video_info->variants ?? null, | |
]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment