Last active
December 7, 2019 18:00
-
-
Save innocenat/9c4fd987d243881510201b2a34db68fc to your computer and use it in GitHub Desktop.
Google Docs to WordPress
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* Load up wordpress externally */ | |
define('WP_USE_THEMES', false); | |
define('COOKIE_DOMAIN', false); | |
define('DISABLE_WP_CRON', true); | |
require('../wordpress/wp-load.php'); | |
require('../wordpress/wp-admin/includes/image.php'); | |
$user_id = -1; | |
if (is_user_logged_in()) { | |
$user = wp_get_current_user(); | |
$user_id = $user->ID; | |
} else { | |
header('HTTP/1.1 302 Found'); | |
header('Location: /wordpress/wp-login.php?redirect_to=' . $_SERVER['PHP_SELF']); | |
die(); | |
} | |
if (!in_array('administrator', $user->roles) || in_array('editor', $user->roles)) { | |
die('<h1>Sorry, you are not allow to access this page</h1>'); | |
} | |
if ($_SERVER['REQUEST_METHOD'] != 'POST'): | |
?> | |
<html xmlns="http://www.w3.org/1999/xhtml"> | |
<head> | |
<meta http-equiv="content-type" content="text/html; charset=utf-8"/> | |
<title>Google Docs Autoimporter</title> | |
</head> | |
<body> | |
<h2>Google Docs Autoimporter</h2> | |
<p>You are logged in as : <em><?php echo $user->display_name; ?></em></p> | |
<!--Button allowing user to display the file picker--> | |
<button id="pick_file_button" style="display: none;">Pick file</button> | |
<form method="post" id="import_form" style="display:none"> | |
<p> | |
Title:<br><input type="text" style="width:100%" id="dname" name="dname"> | |
</p> | |
<p> | |
Content:<br><textarea id="dcontent" name="dcontents" style="width:100%;height:300px"></textarea> | |
</p> | |
<input type="submit" value="Import"> | |
</form> | |
<script> | |
// The API key obtained from the Google API Console. | |
// Replace with your own API key, or your own key. | |
var API_KEY = "<API-KEY>"; | |
// The Client ID obtained from the Google API Console. Replace with your own Client ID. | |
var CLIENT_ID = "<CLIENT-ID>"; | |
// The APP ID obtained from the Google API Console. Replace with your own APP ID and | |
// keep in sync with API_KEY and CLIENT_ID. | |
var APP_ID = "<APP-ID>"; | |
// API discovery doc URL for APIs used by this example | |
var DISCOVERY_DOCS = ["https://www.googleapis.com/discovery/v1/apis/drive/v3/rest"]; | |
// Authorization scopes required by the API; multiple scopes can be | |
// included, separated by spaces. | |
var SCOPES = 'https://www.googleapis.com/auth/drive.file'; | |
var GoogleAuth; | |
var GoogleUser; | |
var pickFileButton = document.getElementById('pick_file_button'); | |
pickFileButton.onclick = handlePickFileButtonClick; | |
var form = document.getElementById('import_form'); | |
/** | |
* On load, called to load the API client library, the auth2 library, and the picker library. | |
*/ | |
function handleClientLoad() { | |
gapi.load('client:auth2:picker', initClient); | |
} | |
/** | |
* Initializes the API client library and sets the authorization and current user. | |
*/ | |
function initClient() { | |
gapi.client.init({ | |
client_id: CLIENT_ID, | |
discoveryDocs: DISCOVERY_DOCS, | |
scope: SCOPES | |
}).then(function () { | |
GoogleAuth = gapi.auth2.getAuthInstance(); | |
GoogleUser = GoogleAuth.currentUser.get(); | |
pickFileButton.style.display = 'block'; | |
}, function (error) { | |
console.log(error); | |
}); | |
} | |
/** | |
* Ensures the user has the correct scopes prior to displaying the picker. | |
*/ | |
function handlePickFileButtonClick() { | |
if (!GoogleUser.hasGrantedScopes(SCOPES)) { | |
GoogleUser.grant({ | |
scope: SCOPES | |
}).then(displayPicker, function (err) { | |
// Handle error/declined auth... | |
}); | |
} else { | |
displayPicker(); | |
} | |
} | |
/** | |
* Create and render a Picker for selecting any file on My Drive. | |
* We use .enableFeature(google.picker.Feature.SUPPORT_DRIVES) | |
* to ensure files in shared drives are included. | |
*/ | |
function displayPicker() { | |
// Fetch current token | |
let authResponse = GoogleUser.getAuthResponse(true); | |
var view = new google.picker.View(google.picker.ViewId.DOCS); | |
var picker = new google.picker.PickerBuilder() | |
.enableFeature(google.picker.Feature.SUPPORT_DRIVES) | |
.setAppId(APP_ID) | |
.setOAuthToken(authResponse.access_token) | |
.addView(view) | |
.setDeveloperKey(API_KEY) | |
.setCallback(pickerCallback) | |
.build(); | |
picker.setVisible(true); | |
} | |
/** | |
* Called when a file is picked. This function gathers metadata from | |
* the picked documents and from calling get on the fileId. | |
*/ | |
function pickerCallback(data) { | |
console.log(data); | |
var document = data[google.picker.Response.DOCUMENTS][0]; | |
// The following information is passed to the callback, while other has to be | |
// retrieved by calling files.get() on a file (below) | |
var fileName = document[google.picker.Document.NAME]; | |
var URL = document[google.picker.Document.URL]; | |
var fileId = document[google.picker.Document.ID]; | |
gapi.client.drive.files.export({ | |
'fileId': fileId, | |
'mimeType': 'text/html', | |
'fields': '*' | |
}).then(function (response) { | |
window.document.getElementById('dname').value = fileName; | |
window.document.getElementById('dcontent').innerText = response.body; | |
form.style.display = "block"; | |
}, function (err) { | |
console.log(err); | |
}); | |
} | |
</script> | |
<!-- The Google API Loader script. --> | |
<script async defer src="https://apis.google.com/js/api.js" | |
onload="this.onload=function(){};handleClientLoad()" | |
onreadystatechange="if (this.readyState === 'complete') this.onload()"> | |
</script> | |
</body> | |
</html> | |
<?php | |
die; | |
endif; | |
set_time_limit(0); | |
function parse_style($style) | |
{ | |
$items = explode(';', $style); | |
$ret = []; | |
foreach ($items as $i) { | |
if (empty(trim($i))) | |
continue; | |
list($k, $v) = explode(':', $i); | |
$ret[trim($k)] = trim($v); | |
} | |
return $ret; | |
} | |
function unparse_style($s) | |
{ | |
$ret = ''; | |
foreach ($s as $k => $v) { | |
$ret .= "$k:$v;"; | |
} | |
return $ret; | |
} | |
function element_empty(DOMNode $html) | |
{ | |
return empty(preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $html->textContent)); | |
} | |
// Clean style, attributes, replace style with specific tag, etc. | |
function parse_and_clean_1(DOMDocument $doc, DOMNode $html) | |
{ | |
// Remove node; | |
if ($html->nodeName == 'head') { | |
$html->parentNode->removeChild($html); | |
return; | |
} | |
// Check attribute | |
if ($html instanceof DOMElement) { | |
// Remove all class | |
$html->removeAttribute('class'); | |
$html->removeAttribute('id'); | |
// Filter out Google Doc comment | |
// TODO | |
// Filter out google redirect from link | |
if ($html->tagName == 'a' && $html->hasAttribute('href')) { | |
$href = $html->getAttribute('href'); | |
$G_URL = 'https://www.google.com/url?q='; | |
if (substr($href, 0, strlen($G_URL)) == $G_URL) { | |
$query_string = parse_url($href, PHP_URL_QUERY); | |
parse_str($query_string, $g_qs); | |
$html->setAttribute('href', $g_qs['q']); | |
} | |
} | |
// Clean img tag | |
if ($html->tagName == 'img') { | |
$html->removeAttribute('style'); | |
$html->removeAttribute('title'); | |
$html->removeAttribute('alt'); | |
} | |
// Filter style | |
$is_bold = false; | |
$is_italic = false; | |
if ($html->hasAttribute('style')) { | |
$style = parse_style($html->getAttribute('style')); | |
$to_removed = [ | |
'font-family', 'line-height', 'orphans', 'widows', 'height', 'padding', 'margin', | |
'vertical-align', 'margin-left', 'margin-right', 'margin-top', 'margin-bottom', | |
'vertical-align', 'padding-left', 'padding-right', 'padding-top', 'padding-bottom', | |
'text-decoration-skip-ink', '-webkit-text-decoration-skip', 'page-break-after']; | |
$remove_all = false; | |
foreach ($style as $k => $v) { | |
if ($v == 'inherit') $to_removed[] = $k; | |
if ($k == 'color' && $v == '#000000') $to_removed[] = $k; | |
if ($k == 'font-weight' && $v == '400') $to_removed[] = $k; | |
if ($k == 'font-style' && $v == 'normal') $to_removed[] = $k; | |
if ($k == 'font-size' && $v == '11pt') $to_removed[] = $k; | |
if ($k == 'font-size' && $v == '11.5pt') $to_removed[] = $k; | |
if ($k == 'font-size' && $v == '12pt') $to_removed[] = $k; | |
if ($k == 'text-align' && $v == 'left') $to_removed[] = $k; | |
if ($k == 'text-decoration' && $v == 'none') $to_removed[] = $k; | |
if ($k == 'background-color' && $v == '#ffffff') $to_removed[] = $k; | |
if ($k == 'font-weight' && $v == '700') { | |
$to_removed[] = $k; | |
$is_bold = true; | |
} | |
if ($k == 'font-style' && $v == 'italic') { | |
$to_removed[] = $k; | |
$is_italic = true; | |
} | |
if (in_array($html->tagName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) { | |
$to_removed[] = 'color'; | |
$to_removed[] = 'font-weight'; | |
$to_removed[] = 'font-size'; | |
$is_bold = false; | |
} | |
// For span with a inside/inside header | |
if ($html->tagName == 'span') { | |
// a inside | |
if ($html->childNodes->length == 1 && $html->childNodes->item(0)->nodeName == 'a') { | |
$to_removed[] = 'color'; | |
$to_removed[] = 'text-decoration'; | |
} | |
// inside header | |
if (in_array($html->parentNode->tagName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) { | |
$to_removed[] = 'color'; | |
$to_removed[] = 'text-decoration'; | |
$to_removed[] = 'font-size'; | |
} | |
} | |
// For empty p or span | |
if (in_array($html->tagName, ['p', 'span']) && element_empty($html)) { | |
$to_removed = array_merge($to_removed, ['color', 'text-decoration', 'text-align']); | |
} | |
} | |
foreach ($to_removed as $k) { | |
unset($style[$k]); | |
} | |
$s = unparse_style($style); | |
if (empty($s)) | |
$html->removeAttribute('style'); | |
else | |
$html->setAttribute('style', $s); | |
// Bold | |
if ($is_bold) { | |
$strong = $doc->createElement('strong'); | |
$html->parentNode->replaceChild($strong, $html); | |
$strong->appendChild($html); | |
} | |
// Italic | |
if ($is_italic) { | |
$em = $doc->createElement('em'); | |
$html->parentNode->replaceChild($em, $html); | |
$em->appendChild($html); | |
} | |
} | |
} | |
// Parse children | |
if ($html->childNodes && $html->childNodes->length > 0) { | |
$arr = []; | |
foreach ($html->childNodes as $n) { | |
$arr[] = $n; | |
} | |
foreach ($arr as $n) { | |
parse_and_clean_1($doc, $n); | |
} | |
} | |
} | |
// Parse and fix newline and new paragraph | |
function parse_and_clean_2(DOMDocument $doc, DOMNode $html) | |
{ | |
if ($html->childNodes && $html->childNodes->length > 0) { | |
$arr = []; | |
foreach ($html->childNodes as $child) { | |
$arr[] = $child; | |
} | |
// Merge paragraph | |
$base_child = null; | |
for ($i = 0; $i < count($arr); $i++) { | |
$child = $arr[$i]; | |
if ($child instanceof DOMElement && $child->tagName == 'p') { | |
if (element_empty($child)) { | |
$html->removeChild($child); | |
$base_child = null; | |
} else if ($base_child == null && $child->attributes->length == 0) { | |
$base_child = $child; | |
} else if ($child->attributes->length == 0) { | |
// Don't merge if style is different | |
// Append current child to base child | |
$html->removeChild($child); | |
$br = $doc->createElement('br'); | |
$base_child->appendChild($br); | |
$children = []; | |
foreach ($child->childNodes as $c) { | |
$children[] = $c; | |
} | |
foreach ($children as $c) { | |
$base_child->appendChild($c); | |
} | |
} else { | |
$base_child = null; | |
} | |
} else { | |
// Recursive parsing | |
parse_and_clean_2($doc, $child); | |
$base_child = null; | |
} | |
} | |
} | |
} | |
// Remove unneccessary tags | |
function parse_and_clean_3(DOMDocument $doc, DOMNode $html) | |
{ | |
// <br> as first child | |
if ($html->childNodes && $html->childNodes->length > 0 && $html->childNodes->item(0)->nodeName == 'br') { | |
$html->removeChild($html->childNodes->item(0)); | |
} | |
// Parse children | |
if ($html->childNodes && $html->childNodes->length > 0) { | |
$arr = []; | |
foreach ($html->childNodes as $n) { | |
$arr[] = $n; | |
} | |
foreach ($arr as $n) { | |
parse_and_clean_3($doc, $n); | |
} | |
} | |
// empty span and p | |
if (in_array($html->nodeName, ['span', 'p', 'strong', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) { | |
if (!$html->childNodes || $html->childNodes->length == 0) { | |
$html->parentNode->removeChild($html); | |
return; | |
} | |
} | |
// Contentually empty span, strong, em, etc, then we can remove the element, keepin the children | |
if (in_array($html->nodeName, ['span', 'strong', 'em'])) { | |
if (empty(trim($html->textContent))) { | |
$parent = $html->parentNode; | |
$child = []; | |
foreach ($html->childNodes as $n) { | |
$child[] = $n; | |
} | |
foreach ($child as $n) { | |
$html->removeChild($n); | |
$parent->insertBefore($n, $html); | |
} | |
$parent->removeChild($html); | |
return; | |
} | |
} | |
// Check if is empty span without any attribute | |
if ($html instanceof DOMElement && $html->tagName == 'span' && $html->attributes->length == 0) { | |
$parent = $html->parentNode; | |
$child = []; | |
foreach ($html->childNodes as $n) { | |
$child[] = $n; | |
} | |
foreach ($child as $n) { | |
$html->removeChild($n); | |
$parent->insertBefore($n, $html); | |
} | |
$parent->removeChild($html); | |
} | |
} | |
function parse_and_clean_img(DOMDocument $doc, DOMNode $html) | |
{ | |
// Parse children | |
if ($html->childNodes && $html->childNodes->length > 0) { | |
$arr = []; | |
foreach ($html->childNodes as $n) { | |
$arr[] = $n; | |
} | |
foreach ($arr as $n) { | |
parse_and_clean_img($doc, $n); | |
} | |
} | |
// Burst image out of everything | |
// This is just to fix bad formatting | |
while ($html->nodeName == 'img' && in_array($html->parentNode->nodeName, ['p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) { | |
$parent = $html->parentNode; | |
$grandparent = $parent->parentNode; | |
$parent->removeChild($html); | |
$grandparent->insertBefore($html, $parent); | |
} | |
} | |
// I hate wordpress.... | |
$title = stripslashes($_POST['dname']); | |
$content = stripslashes($_POST['dcontents']); | |
$dom = new DOMDocument(); | |
$dom->loadHTML($content); | |
parse_and_clean_1($dom, $dom); | |
parse_and_clean_img($dom, $dom); | |
parse_and_clean_2($dom, $dom); | |
parse_and_clean_3($dom, $dom); | |
// Initial WordPress Import | |
$wp_post_arr = [ | |
'post_author' => $user_id, | |
'post_content' => '', | |
'post_title' => $title, | |
'post_status' => 'draft', | |
'post_type' => 'post', | |
]; | |
$post_id = wp_insert_post($wp_post_arr, true); | |
if ($post_id == 0 || $post_id instanceof WP_Error) { | |
echo '<h1>Wordpress Error!</h1>'; | |
var_dump($post_id); | |
die; | |
} | |
// Handle image attachment | |
$dom_imgs = $dom->getElementsByTagName('img'); | |
$imgs = []; | |
foreach ($dom_imgs as $d) { | |
$imgs[] = $d; | |
} | |
foreach ($imgs as $img) { | |
$href = $img->getAttribute('src'); | |
// Random file name | |
$hash = sha1($href . microtime()); | |
$new_name = $hash; | |
// Download image | |
$file_contents = file_get_contents($href); | |
$pattern = "/^content-type\s*:\s*(.*)$/i"; | |
$content_type = ''; | |
// Get content type and file extension | |
if (($header = preg_grep($pattern, $http_response_header)) && (preg_match($pattern, array_shift(array_values($header)), $match) !== false)) { | |
$content_type = $match[1]; | |
switch ($content_type) { | |
case 'image/png': | |
$new_name .= '.png'; | |
break; | |
case 'image/gif': | |
$new_name .= '.gif'; | |
break; | |
case 'image/bmp': | |
$new_name .= '.bmp'; | |
break; | |
case 'image/vnd.microsoft.icon': | |
$new_name .= '.ico'; | |
break; | |
case 'image/svg+xml': | |
$new_name .= '.svg'; | |
break; | |
case 'image/tiff': | |
$new_name .= '.tif'; | |
break; | |
case 'image/webp': | |
$new_name .= '.webp'; | |
break; | |
default: | |
$new_name .= '.jpg'; | |
} | |
} | |
// Upload to wordpress | |
$uploaded = wp_upload_bits($new_name, null, $file_contents); | |
if (!empty($uploaded['error'])) { | |
die('<h1>Image Upload Error</h1>' . $uploaded['error']); | |
} | |
// Create attachment with previously created post as parent | |
$attachment = array( | |
'guid' => $uploaded['url'], | |
'post_mime_type' => $uploaded['type'], | |
'post_parent' => $post_id, | |
'post_title' => preg_replace('/\.[^.]+$/', '', $new_name), | |
'post_content' => '', | |
'post_status' => 'inherit' | |
); | |
$image_id = wp_insert_attachment($attachment, $uploaded['file'], $post_id, true); | |
if ($image_id == 0 || $image_id instanceof WP_Error) { | |
echo '<h1>Wordpress Error!</h1>'; | |
var_dump($post_id); | |
die; | |
} | |
// Generate thumbnail & metadata | |
$attachment_data = wp_generate_attachment_metadata($image_id, $uploaded['file']); | |
wp_update_attachment_metadata($image_id, $attachment_data); | |
// Get the image tag | |
$img_tag = get_image_tag($image_id, '', '', 'center', 'medium'); | |
// Replace image tag | |
$template = $dom->createDocumentFragment(); | |
$template->appendXML($img_tag); | |
$img->parentNode->replaceChild($template, $img); | |
} | |
// Update the post with proper image | |
$output = ''; | |
foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $item) { | |
$output .= $dom->saveHTML($item); | |
} | |
$wp_post_arr['post_content'] = $output; | |
$wp_post_arr['ID'] = $post_id; | |
$post_id = wp_insert_post($wp_post_arr, true); | |
if ($post_id == 0 || $post_id instanceof WP_Error){ | |
echo '<h1>Wordpress Error!</h1>'; | |
var_dump($post_id); | |
die; | |
} | |
echo '<h1>Post imported</h1>'; | |
echo $output; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment