Skip to content

Instantly share code, notes, and snippets.

@codemasher
Last active December 14, 2022 16:49
Show Gist options
  • Save codemasher/67ba24cee88029a3278c87ff9a0095ba to your computer and use it in GitHub Desktop.
Save codemasher/67ba24cee88029a3278c87ff9a0095ba to your computer and use it in GitHub Desktop.
Fetch your twitter timeline via the unofficial adaptive search API
<?php
/**
* Twitter timeline backup
*
* Required:
* - PHP 8.1+
* - cURL extension enabled
* - Windows:
* - download https://windows.php.net/downloads/releases/php-8.1.12-Win32-vs16-x64.zip (or a newer available version)
* - unzip to a folder of your choice
* - copy/rename the php.ini-development to php.ini
* - open the php.ini in an editor:
* - search for 'extension=curl', uncomment this line (remove the semicolon)
* - search for 'extension=openssl', uncomment this line
* - search for 'extension_dir', uncomment this line (under "on windows")
* - it might be necessary to provide a ca file for openSSL
* - download cacert.pem from https://curl.haxx.se/ca/cacert.pem
* - search for ';curl.cainfo', uncomment this line and add "c:\path\to\cacaert.pem"
* - search for ';openssl.cafile', uncomment this line and add the same path to the cacert.pem as above
* - Linux: https://www.digitalocean.com/community/tutorials/how-to-install-php-8-1-and-set-up-a-local-development-environment-on-ubuntu-22-04
* - apt-add-repository ppa:ondrej/php -y
* - apt-get update
* - apt-get install -y php8.1-cli php8.1-common php8.1-curl
*
* Run the script in the console:
*
* - Windows: C:\path\to\php\php.exe C:\path\to\script\timeline.php
* - Linux: php /path/to/script/timeline.php
*
* @see https://github.com/pauldotknopf/twitter-dump
*
* @created 17.11.2022
* @author smiley <[email protected]>
* @copyright 2022 smiley
* @license MIT
*/
/*
* How to get the request token:
*
* - open https://twitter.com/search in a webbrowser (chrome or firefox recommended)
* - open the developer console (press F12)
* - type anything in the twitter search box, hit enter
* - go to the "network" tab in the dev console and filter the requests for "adaptive.json"
* - click that line, a new tab for that request appears
* - there, in the "headers" tab, scroll to "request headers" and look for "Authorization: Bearer ..."
* - right click that line, select "copy value" and paste it below, should look like: 'Bearer AAAANRILgAAAAAAnNwI...'
*/
$token = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs=1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
/*
* The search query
*
* @see https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query
* @see https://help.twitter.com/en/using-twitter/advanced-tweetdeck-features
*
* try:
* - "@username" timeline including replies
* - "@username include:nativeretweets filter:nativeretweets" for RTs (returns RTs of the past week only)
* - "to:username" for @mentions and replies
*/
$query = 'from:dril';
/*
* continue/run from stored responses, useful if the run gets interrupted for whatever reason
*/
$fromFile = true;
/*
* the storage path for the raw responses, a different directory per query is recommended
*/
$dir = __DIR__.'/from-dril';
/*
* JSON output flags
*
* @see https://www.php.net/manual/en/json.constants.php
*/
$jsonFlags = JSON_THROW_ON_ERROR|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT;
/* ==================== stop editing here ===================== */
if(!file_exists($dir)){
mkdir(directory: $dir, recursive: true);
}
$dir = realpath($dir);
$timelineJSON = sprintf('%s/%s.json', $dir, md5($query));
$userJSON = sprintf('%s/%s-users.json', $dir, md5($query));
[$timeline, $users] = getTimeline($query, $fromFile);
$tl = json_encode($timeline, $jsonFlags);
$ul = json_encode($users, $jsonFlags);
file_put_contents($timelineJSON, $tl);
file_put_contents($userJSON, $ul);
echo sprintf("timeline data for '%s' saved in: %s\n", $query, realpath($timelineJSON));
echo sprintf("user data saved in: %s\n", realpath($userJSON));
// verify readability/decoding
$tl = json_decode(file_get_contents($timelineJSON), true, 512, JSON_THROW_ON_ERROR);
$ul = json_decode(file_get_contents($userJSON), true, 512, JSON_THROW_ON_ERROR);
echo sprintf("fetched %s tweets from %s users\n", count($tl), count($ul));
exit;
/* ===================== here be dragons ====================== */
/**
* retrieves the timeline for the given query and parese the response data
*/
function getTimeline(string $query, bool $fromFile = false):array{
global $dir;
$tweets = [];
$users = [];
$timeline = [];
$lastCursor = '';
$count = 0;
while(true){
$filename = sprintf('%s/%s-%d.json', $dir, md5($query), $count);
if($fromFile && file_exists($filename)){
$response = file_get_contents($filename);
}
else{
[$response, $status, $headers] = search($query, $lastCursor);
// rate limit hit (doesn't seem to happen?)
if($status === 429){
# var_dump($headers); // @todo: examine headers if x-rate-limit-reset is set
// just sleep for a bit
sleep(10);
continue;
}
elseif($status !== 200 || empty($response)){
break;
}
file_put_contents($filename, $response);
}
if(!parseResponse($response, $tweets, $users, $timeline, $lastCursor)){
break;
}
echo sprintf("[%s] fetched data for '%s', cursor: %s\n", $count, $query, $lastCursor);
$count++;
if(empty($lastCursor)){
break;
}
if(!$fromFile){
sleep(2);
}
}
foreach($timeline as $id => &$v){
$tweet = $tweets[$id];
if($tweet['quoted_status_id'] !== null && isset($tweets[$tweet['quoted_status_id']])){
$tweet['quoted_status'] = $tweets[$tweet['quoted_status_id']];
}
$v = $tweet;
}
return [$timeline, $users];
}
/**
* parse the API response and fill the data arrays (passed by reference)
*/
function parseResponse(string $response, array &$tweets, array &$users, array &$timeline, string &$cursor):bool{
try{
$json = json_decode(json: $response, flags: JSON_THROW_ON_ERROR);
}
catch(Throwable $e){
# var_dump($response); // @todo: handle json error
return false;
}
if(!isset($json->globalObjects->tweets, $json->globalObjects->users, $json->timeline->instructions)){
return false;
}
if(empty((array)$json->globalObjects->tweets)){
return false;
}
foreach($json->globalObjects->tweets as $tweet){
$tweets[$tweet->id_str] = parseTweet($tweet);
}
foreach($json->globalObjects->users as $user){
$users[$user->id_str] = parseUser($user);
}
foreach($json->timeline->instructions as $i){
if(isset($i->addEntries->entries)){
foreach($i->addEntries->entries as $instruction){
if(str_starts_with($instruction->entryId, 'sq-I-t')){
$timeline[$instruction->content->item->content->tweet->id] = null;
}
elseif($instruction->entryId === 'sq-cursor-bottom'){
$cursor = $instruction->content->operation->cursor->value;
}
}
}
elseif(isset($i->replaceEntry->entryIdToReplace) && $i->replaceEntry->entryIdToReplace === 'sq-cursor-bottom'){
$cursor = $i->replaceEntry->entry->content->operation->cursor->value;
}
else{
$cursor = '';
}
}
return true;
}
/**
* fetch data from the adaptive search API
*
* @see https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query
* @see https://developer.twitter.com/en/docs/twitter-api/tweets/search/introduction
*/
function search(string $query, string $cursor = null):array{
// the query parameters from the call to https://twitter.com/i/api/2/search/adaptive.json in original order
$params = [
'include_profile_interstitial_type' => '1',
'include_blocking' => '1',
'include_blocked_by' => '1',
'include_followed_by' => '1',
'include_want_retweets' => '1',
'include_mute_edge' => '1',
'include_can_dm' => '1',
'include_can_media_tag' => '1',
'include_ext_has_nft_avatar' => '1',
'include_ext_is_blue_verified' => '1',
'skip_status' => '1',
'cards_platform' => 'Web-12',
'include_cards' => '1',
'include_ext_alt_text' => 'true',
'include_ext_limited_action_results' => 'false',
'include_quote_count' => 'true',
'include_reply_count' => '1',
'tweet_mode' => 'extended',
'include_ext_collab_control' => 'true',
'include_entities' => 'true',
'include_user_entities' => 'true',
'include_ext_media_color' => 'false',
'include_ext_media_availability' => 'true',
'include_ext_sensitive_media_warning' => 'true',
'include_ext_trusted_friends_metadata' => 'true',
'send_error_codes' => 'true',
'simple_quoted_tweet' => 'true',
'q' => $query,
# 'social_filter' =>'searcher_follows', // @todo
'tweet_search_mode' => 'live',
'count' => '100',
'query_source' => 'typed_query',
'cursor' => $cursor,
'pc' => '1',
'spelling_corrections' => '1',
'include_ext_edit_control' => 'true',
'ext' => 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe',
];
// remove the cursor parameter if it's empty
if(empty($params['cursor'])){
unset($params['cursor']);
}
return request('https://api.twitter.com/2/search/adaptive.json', $params);
}
/**
* set up and fire http requests
*/
function request(string $url, array $params = null):array{
global $token;
// add the query string if we have parameters given
if(!empty($params)){
$url .= '?'.http_build_query(data: $params, encoding_type: PHP_QUERY_RFC3986);
}
// set up the stream context to add a header and user agent
$contextOptions = [
'http' => [
'method' => 'GET',
'header' => ['Authorization: '.$token],
'protocol_version' => '1.1', // 1.1 is default from PHP 8.0
'user_agent' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)',
'max_redirects' => 0,
'timeout' => 5,
]
];
// fire the request
$context = stream_context_create($contextOptions);
$response = file_get_contents(filename: $url, context: $context);
$responseHeaders = get_headers($url, true, $context);
[$version, $status, $statustext] = explode(' ', $responseHeaders[0], 3);
unset($responseHeaders[0]);
return [$response, intval($status), $responseHeaders];
}
/**
* parse/clean/flatten a tweet object
*
* @see https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet
*/
function parseTweet(object $tweet):array{
$text = $tweet->full_text ?? $tweet->text ?? '';
$mediaItems = [];
foreach($tweet->entities->urls ?? [] as $entity){
$text = str_replace($entity->url, $entity->expanded_url ?? $entity->url ?? '', $text);
}
foreach($tweet->entities->media ?? [] as $media){
// we'll just remove the shortened media URL as it is of no use
$text = str_replace($media->url, '', $text);
$mediaItems[] = parseMedia($media);
}
return [
'id' => (int)$tweet->id,
'user_id' => (int)($tweet->user_id ?? $tweet->author_id ?? $tweet->user->id ?? 0),
'user' => null, // isset($tweet->user) ? parseUser($tweet->user) : null,
'created_at' => strtotime($tweet->created_at),
'text' => $text,
'source' => $tweet->source,
'retweet_count' => (int)($tweet->retweet_count ?? $tweet->public_metrics->retweet_count ?? 0),
'favorite_count' => (int)($tweet->favorite_count ?? $tweet->public_metrics->like_count ?? 0),
'reply_count' => (int)($tweet->reply_count ?? $tweet->public_metrics->reply_count ?? 0),
'quote_count' => (int)($tweet->quote_count ?? $tweet->public_metrics->quote_count ?? 0),
'favorited' => $tweet->favorited ?? false,
'retweeted' => $tweet->retweeted ?? false,
'possibly_sensitive' => $tweet->possibly_sensitive ?? false,
'in_reply_to_status_id' => $tweet->in_reply_to_status_id ?? null,
'in_reply_to_user_id' => $tweet->in_reply_to_user_id ?? null,
'in_reply_to_screen_name' => $tweet->in_reply_to_screen_name ?? null,
'is_quote_status' => $tweet->is_quote_status ?? false,
'quoted_status_id' => $tweet->quoted_status_id ?? null,
'quoted_status' => null,
'retweeted_status_id' => $tweet->retweeted_status_id ?? null,
'retweeted_status' => null,
'self_thread' => $tweet->self_thread->id ?? null,
'conversation_id' => $tweet->conversation_id ?? null,
'place' => $tweet->place ?? null,
'coordinates' => $tweet->coordinates ?? null,
'geo' => $tweet->geo ?? null,
'media' => $mediaItems,
];
}
/**
* parse/clean/flatten a user object
*
* @see https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/user
*/
function parseUser(object $user):array{
foreach(['name', 'description', 'location', 'url'] as $var){
${$var} = preg_replace('/\s+/', ' ', $user->{$var} ?? '');
}
foreach($user->entities->description->urls ?? [] as $entity){
$description = str_replace($entity->url, $entity->expanded_url ?? $entity->url ?? '', $description);
}
foreach($user->entities->url->urls ?? [] as $entity){
$url = str_replace($entity->url, $entity->expanded_url ?? $entity->url ?? '', $url);
}
$screenName = $user->screen_name ?? $user->username;
$profile_image = str_replace('_normal.', '.', $user->profile_image_url_https ?? $user->profile_image_url ?? '');
$profile_banner = $user->profile_banner_url ?? '';
return [
'id' => $user->id,
'screen_name' => $screenName,
'name' => $name,
'description' => $description,
'location' => $location,
'url' => $url,
'followers_count' => $user->followers_count ?? $user->public_metrics->followers_count ?? 0,
'friends_count' => $user->friends_count ?? $user->public_metrics->following_count ?? 0,
'statuses_count' => $user->statuses_count ?? $user->public_metrics->tweet_count ?? 0,
'favourites_count' => $user->favourites_count ?? 0,
'created_at' => strtotime($user->created_at),
'protected' => (bool)($user->protected ?? false),
'verified' => (bool)($user->verified ?? false),
'muting' => (bool)($user->muting ?? false),
'blocking' => (bool)($user->blocking ?? false),
'blocked_by' => (bool)($user->blocked_by ?? false),
'is_cryptobro' => $user->ext_has_nft_avatar ?? false,
'clown_emoji' => $user->ext_is_blue_verified ?? false,
'profile_image' => $profile_image,
'profile_banner' => $profile_banner,
];
}
/**
* @see https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/media
*/
function parseMedia(object $media):array{
return [
'id' => $media->id,
'media_key' => $media->media_key ?? null,
'source_user_id' => $media->source_user_id ?? null,
'type' => $media->type,
'url' => $media->media_url_https ?? $media->media_url,
'alt_text' => $media->ext_alt_text ?? '',
'possibly_sensitive' => $tweet->ext_sensitive_media_warning ?? null,
'width' => $media->original_info->width ?? null,
'height' => $media->original_info->height ?? null,
'variants' => $media->video_info->variants ?? null,
];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment