Created
April 22, 2014 06:01
-
-
Save ozh/11166845 to your computer and use it in GitHub Desktop.
Import a CSV tweet list using Ozh' Tweet Archiver plugin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Import a CSV tweet list | |
* | |
* Import tweets from an archive as generated by Twitter | |
* | |
* Usage : | |
* - Import your tweets with the plugin. The plugin will import only the 3200 most recent tweets. | |
* - Download your archive from Twitter, open the tweets.csv file (delete first 3200 most recent tweets to speed up things) | |
* - put this script and tweets.csv in WordPress' root directory (where wp-load.php is) | |
* - run this file in your browser (http://yoursite/blog/import.php) | |
* - when it's finished, delete this file | |
*/ | |
require( './wp-load.php' ); | |
ozh_ta_require( 'import.php' ); | |
ozh_ta_require( 'option-page.php' ); | |
wp_enqueue_script( 'jquery' ); | |
print_head_scripts(); | |
$start = isset( $_GET['start'] ) ? $_GET['start'] : 0; | |
// Import | |
ozh_ta_schedule_next( 0 ); | |
$import = oti_process_file( oti_get_file(), $start ); | |
$start += $import['count']; | |
$delay = ( $import['ok'] ? OZH_TA_NEXT_SUCCESS : OZH_TA_NEXT_FAIL ); | |
// Continue or mark import as finished | |
if( $import['finished'] ) { | |
global $ozh_ta; | |
// Schedule next operation | |
ozh_ta_schedule_next( $ozh_ta['refresh_interval'] ); | |
// Update real last_tweet_id_inserted, stats, & reset API paging | |
$ozh_ta['twitter_stats']['link_count'] = $wpdb->get_var( "SELECT COUNT(ID) FROM `$wpdb->posts` WHERE `post_type` = 'post' AND `post_status` = 'publish' AND `post_content` LIKE '%class=\"link%'" ); | |
$ozh_ta['twitter_stats']['replies'] = $wpdb->get_row( "SELECT COUNT( DISTINCT `meta_value`) as unique_names, COUNT( `meta_value`) as total FROM `$wpdb->postmeta` WHERE `meta_key` = 'ozh_ta_reply_to_name'", ARRAY_A ); | |
$ozh_ta['twitter_stats']['total_archived'] = $wpdb->get_var( "SELECT COUNT(`meta_key`) FROM `$wpdb->postmeta` WHERE `meta_key` = 'ozh_ta_id'" ); | |
update_option( 'ozh_ta', $ozh_ta ); | |
echo '<p>Finished importing tweets!.</p>'; | |
} else { | |
echo '<p>Imported ' . $import['count'] . ' tweets</p>'; | |
$reload = add_query_arg( array( | |
'start' => $start, | |
), get_bloginfo('url') . '/' . basename( __FILE__ ) ); | |
ozh_ta_reload( $reload, $delay ); | |
} | |
// done. | |
/*****************************************************************************/ | |
/** | |
* Get CSV file name, die if not found | |
* | |
* @param string $file CSV file name (defaults to 'tweets.csv') | |
* @return string Full path to file (or die if not found) | |
*/ | |
function oti_get_file( $file = 'tweets.csv' ) { | |
$file = str_replace( '\\', '/', dirname( __FILE__ ) . '/tweets.csv' ); | |
if( !file_exists( $file ) ) { | |
wp_die( 'Tweet file not found. Put it in the same directory as wp-load.php' ); | |
} | |
return $file; | |
} | |
/** | |
* Read a CSV file, process 1st field of each row by batch of defined size | |
* | |
* @param string $file path to file | |
* @param int $start first line to process, ignoring all those before | |
* @return unknown | |
*/ | |
function oti_process_file( $file, $start ) { | |
$handle = fopen( $file, "r" ); | |
// Always ignore header | |
$ignore = fgetcsv( $handle, 30, "," ); | |
$ignore = $count = 0; | |
$finished = false; | |
$ok = true; | |
while ( $count < OZH_TA_BATCH && $data = fgetcsv( $handle, 30, "," ) ) { | |
// Make sure we're reading an actual comma separated line starting with a tweet ID, and not the sequel of a multiline cell | |
$id = $data[0]; | |
if( preg_match( '/^\d+$/', $id ) ) { | |
// Resume to whished position, ignore lines till line number $start | |
if( $ignore++ < ( $start - 1 ) ) { | |
continue; | |
} | |
// Now we can start processing each line during next batch | |
// Here: not much error checking. Assuming false will be either rate limit exceeded, or Twitter down | |
if( ozh_ta_import_single_tweet( $id ) === false ) { | |
$ok = false; | |
} | |
$count++; | |
} | |
} | |
if( $count < OZH_TA_BATCH && $ok ) { | |
$finished = true; | |
} | |
return array( 'ok' => $ok, 'count' => $count, 'finished' => $finished ); | |
} | |
I have this CSV file from 2014, hope it will help :
"tweet_id","in_reply_to_status_id","in_reply_to_user_id","timestamp","source","text","retweeted_status_id","retweeted_status_user_id","retweeted_status_timestamp","expanded_urls"
"455836070069157888","","","2014-04-14 22:32:42 +0000","<a href=""http://github.com"" rel=""nofollow"">GitHub Service Hooks</a>","https://t.co/pK7Pykj6zs - 4 commits","","","","https://github.com/YOURLS/YOURLS/commits/oop"
"455748110174945280","","","2014-04-14 16:43:11 +0000","<a href=""http://github.com"" rel=""nofollow"">GitHub Service Hooks</a>","https://t.co/pK7Pykj6zs - 1 commits","","","","https://github.com/YOURLS/YOURLS/commits/oop"
"455734659146874880","","","2014-04-14 15:49:44 +0000","<a href=""http://github.com"" rel=""nofollow"">GitHub Service Hooks</a>","https://t.co/pK7Pykj6zs - 3 commits","","","","https://github.com/YOURLS/YOURLS/commits/oop"
[...]
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Been trying to run this. But Twitter Archive no longer gives the CSV in the zip file. I have found ways to convert the new JS to CSV using an online tool http://tweetjstocsv.glitch.me/, did some cleaning of the data, then tried to load the importer. It says finished processing, but no posts created/imported. I am guessing it is columns headers and data(date) format issues, etc. Any way to get a sample CSV of what it should look like?