Last active
July 7, 2018 19:24
-
-
Save adrian-enspired/5906840 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// you can't just get this as JSON in the first place, can you ...? | |
$xml = simplexml_load_file( "http://readability.com/christopherburton/latest/feed" ); | |
$json = json_encode( $xml ); | |
$array = json_decode( $json,TRUE ); | |
$items = $array['channel']['item']; | |
// we're doing this now so we can sanitize the data without requiring a second loop | |
// (substitute your actual DB credentials) | |
$DB = new mysqli( DB_HOST,DB_USER,DB_PASS,DB_NAME ); | |
// make sure you're connected | |
if( $DB->connect_errno ){ | |
print "failed to connect to DB: {$DB->connect_error}"; | |
exit( 1 ); | |
} | |
$DB->set_charset( 'utf8' ); | |
/* here's what the regex does: | |
# start of pattern | |
^ start of string | |
(?: this subpattern matches; does not capture | |
[^\?]*\?url= anything "not a ?", followed by "?url=" | |
(basically, the "readability" part of the url) | |
) | |
(https?://) this subpattern captures (referenced by $1) | |
http:// -or- https:// | |
(?: this subpattern matches; does not capture | |
m "m" | |
(?:obile)? possibly followed by "obile" | |
\. followed by a dot | |
)? (and possibly not there at all) | |
(.*)$ this subpattern captures (referenced by $2) | |
everything else until the end of the string | |
(meaning, the actual URL you want) | |
#ui end of pattern | |
"u" means "unicode" | |
"i" means "case insensitive" | |
*/ | |
$match = "#^(?:[^\?]*\?url=)(https?://)(?:m(?:obile)?\.)?(.*)$#ui"; | |
// we'll replace the original string with backreferences $1 and $2 | |
// (note these ARE NOT php variables). | |
$replace = '$1$2'; | |
// parse title and url from raw data | |
foreach( $items as $item ){ | |
// TITLE | |
$title = $item['title']; | |
// URL | |
$url = preg_replace( $match,$replace,$item['link'] ); | |
// save for later | |
$title_url[] = array( $title,$url ); | |
// we'll use these in the SQL statement | |
$sql_values[] = "('{$DB->real_escape_string( $title )}','{$DB->real_escape_string( $url )}')"; | |
} | |
// it's later already! | |
// add array of values to INSERT statement | |
// reversing array to put entries in chronological order | |
$SQL = "INSERT INTO `read`(`title`,`url`) VALUES\n ".implode( "\n,",array_reverse( $sql_values ) ); | |
// look at it (if you care to) | |
print "our INSERT statement:<pre>$SQL</pre>"; | |
// try the query | |
if( $DB->query( $SQL ) ){ | |
print "added {$DB->affected_rows} records"; | |
} | |
// query failed | |
else{ | |
print "failed to INSERT: [{$DB->errno}] {$DB->error}"; | |
} | |
# database table schema # | |
/* | |
CREATE TABLE `read`( | |
`order` INT UNIQUE NOT NULL AUTO_INCREMENT COMMENT 'for sorting only' | |
,`title` VARCHAR( 50 ) NOT NULL | |
,`url` VARCHAR( 50 ) NOT NULL | |
,PRIMARY KEY( `title`,`url` ) | |
)DEFAULT CHARSET=UTF8; | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment