Last active December 7, 2019 18:00
Google Docs to WordPress
/* Load up wordpress externally */
define('WP_USE_THEMES', false);
define('COOKIE_DOMAIN', false);
define('DISABLE_WP_CRON', true);
$user_id = -1;
if (is_user_logged_in()) {
$user = wp_get_current_user();
$user_id = $user->ID;
} else {
header('HTTP/1.1 302 Found');
header('Location: /wordpress/wp-login.php?redirect_to=' . $_SERVER['PHP_SELF']);
if (!in_array('administrator', $user->roles) || in_array('editor', $user->roles)) {
die('<h1>Sorry, you are not allow to access this page</h1>');
<html xmlns="">
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<title>Google Docs Autoimporter</title>
<h2>Google Docs Autoimporter</h2>
<p>You are logged in as : <em><?php echo $user->display_name; ?></em></p>
<!--Button allowing user to display the file picker-->
<button id="pick_file_button" style="display: none;">Pick file</button>
<form method="post" id="import_form" style="display:none">
Title:<br><input type="text" style="width:100%" id="dname" name="dname">
Content:<br><textarea id="dcontent" name="dcontents" style="width:100%;height:300px"></textarea>
<input type="submit" value="Import">
// The API key obtained from the Google API Console.
// Replace with your own API key, or your own key.
var API_KEY = "<API-KEY>";
// The Client ID obtained from the Google API Console. Replace with your own Client ID.
// The APP ID obtained from the Google API Console. Replace with your own APP ID and
// keep in sync with API_KEY and CLIENT_ID.
var APP_ID = "<APP-ID>";
// API discovery doc URL for APIs used by this example
var DISCOVERY_DOCS = [""];
// Authorization scopes required by the API; multiple scopes can be
// included, separated by spaces.
var SCOPES = '';
var GoogleAuth;
var GoogleUser;
var pickFileButton = document.getElementById('pick_file_button');
pickFileButton.onclick = handlePickFileButtonClick;
var form = document.getElementById('import_form');
* On load, called to load the API client library, the auth2 library, and the picker library.
function handleClientLoad() {
gapi.load('client:auth2:picker', initClient);
* Initializes the API client library and sets the authorization and current user.
function initClient() {
client_id: CLIENT_ID,
discoveryDocs: DISCOVERY_DOCS,
scope: SCOPES
}).then(function () {
GoogleAuth = gapi.auth2.getAuthInstance();
GoogleUser = GoogleAuth.currentUser.get(); = 'block';
}, function (error) {
* Ensures the user has the correct scopes prior to displaying the picker.
function handlePickFileButtonClick() {
if (!GoogleUser.hasGrantedScopes(SCOPES)) {
scope: SCOPES
}).then(displayPicker, function (err) {
// Handle error/declined auth...
} else {
* Create and render a Picker for selecting any file on My Drive.
* We use .enableFeature(google.picker.Feature.SUPPORT_DRIVES)
* to ensure files in shared drives are included.
function displayPicker() {
// Fetch current token
let authResponse = GoogleUser.getAuthResponse(true);
var view = new google.picker.View(google.picker.ViewId.DOCS);
var picker = new google.picker.PickerBuilder()
* Called when a file is picked. This function gathers metadata from
* the picked documents and from calling get on the fileId.
function pickerCallback(data) {
var document = data[google.picker.Response.DOCUMENTS][0];
// The following information is passed to the callback, while other has to be
// retrieved by calling files.get() on a file (below)
var fileName = document[google.picker.Document.NAME];
var URL = document[google.picker.Document.URL];
var fileId = document[google.picker.Document.ID];{
'fileId': fileId,
'mimeType': 'text/html',
'fields': '*'
}).then(function (response) {
window.document.getElementById('dname').value = fileName;
window.document.getElementById('dcontent').innerText = response.body; = "block";
}, function (err) {
<!-- The Google API Loader script. -->
<script async defer src=""
onreadystatechange="if (this.readyState === 'complete') this.onload()">
function parse_style($style)
$items = explode(';', $style);
$ret = [];
foreach ($items as $i) {
if (empty(trim($i)))
list($k, $v) = explode(':', $i);
$ret[trim($k)] = trim($v);
return $ret;
function unparse_style($s)
$ret = '';
foreach ($s as $k => $v) {
$ret .= "$k:$v;";
return $ret;
function element_empty(DOMNode $html)
return empty(preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $html->textContent));
// Clean style, attributes, replace style with specific tag, etc.
function parse_and_clean_1(DOMDocument $doc, DOMNode $html)
// Remove node;
if ($html->nodeName == 'head') {
// Check attribute
if ($html instanceof DOMElement) {
// Remove all class
// Filter out Google Doc comment
// Filter out google redirect from link
if ($html->tagName == 'a' && $html->hasAttribute('href')) {
$href = $html->getAttribute('href');
$G_URL = '';
if (substr($href, 0, strlen($G_URL)) == $G_URL) {
$query_string = parse_url($href, PHP_URL_QUERY);
parse_str($query_string, $g_qs);
$html->setAttribute('href', $g_qs['q']);
// Clean img tag
if ($html->tagName == 'img') {
// Filter style
$is_bold = false;
$is_italic = false;
if ($html->hasAttribute('style')) {
$style = parse_style($html->getAttribute('style'));
$to_removed = [
'font-family', 'line-height', 'orphans', 'widows', 'height', 'padding', 'margin',
'vertical-align', 'margin-left', 'margin-right', 'margin-top', 'margin-bottom',
'vertical-align', 'padding-left', 'padding-right', 'padding-top', 'padding-bottom',
'text-decoration-skip-ink', '-webkit-text-decoration-skip', 'page-break-after'];
$remove_all = false;
foreach ($style as $k => $v) {
if ($v == 'inherit') $to_removed[] = $k;
if ($k == 'color' && $v == '#000000') $to_removed[] = $k;
if ($k == 'font-weight' && $v == '400') $to_removed[] = $k;
if ($k == 'font-style' && $v == 'normal') $to_removed[] = $k;
if ($k == 'font-size' && $v == '11pt') $to_removed[] = $k;
if ($k == 'font-size' && $v == '11.5pt') $to_removed[] = $k;
if ($k == 'font-size' && $v == '12pt') $to_removed[] = $k;
if ($k == 'text-align' && $v == 'left') $to_removed[] = $k;
if ($k == 'text-decoration' && $v == 'none') $to_removed[] = $k;
if ($k == 'background-color' && $v == '#ffffff') $to_removed[] = $k;
if ($k == 'font-weight' && $v == '700') {
$to_removed[] = $k;
$is_bold = true;
if ($k == 'font-style' && $v == 'italic') {
$to_removed[] = $k;
$is_italic = true;
if (in_array($html->tagName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
$to_removed[] = 'color';
$to_removed[] = 'font-weight';
$to_removed[] = 'font-size';
$is_bold = false;
// For span with a inside/inside header
if ($html->tagName == 'span') {
// a inside
if ($html->childNodes->length == 1 && $html->childNodes->item(0)->nodeName == 'a') {
$to_removed[] = 'color';
$to_removed[] = 'text-decoration';
// inside header
if (in_array($html->parentNode->tagName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
$to_removed[] = 'color';
$to_removed[] = 'text-decoration';
$to_removed[] = 'font-size';
// For empty p or span
if (in_array($html->tagName, ['p', 'span']) && element_empty($html)) {
$to_removed = array_merge($to_removed, ['color', 'text-decoration', 'text-align']);
foreach ($to_removed as $k) {
$s = unparse_style($style);
if (empty($s))
$html->setAttribute('style', $s);
// Bold
if ($is_bold) {
$strong = $doc->createElement('strong');
$html->parentNode->replaceChild($strong, $html);
// Italic
if ($is_italic) {
$em = $doc->createElement('em');
$html->parentNode->replaceChild($em, $html);
// Parse children
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $n) {
$arr[] = $n;
foreach ($arr as $n) {
parse_and_clean_1($doc, $n);
// Parse and fix newline and new paragraph
function parse_and_clean_2(DOMDocument $doc, DOMNode $html)
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $child) {
$arr[] = $child;
// Merge paragraph
$base_child = null;
for ($i = 0; $i < count($arr); $i++) {
$child = $arr[$i];
if ($child instanceof DOMElement && $child->tagName == 'p') {
if (element_empty($child)) {
$base_child = null;
} else if ($base_child == null && $child->attributes->length == 0) {
$base_child = $child;
} else if ($child->attributes->length == 0) {
// Don't merge if style is different
// Append current child to base child
$br = $doc->createElement('br');
$children = [];
foreach ($child->childNodes as $c) {
$children[] = $c;
foreach ($children as $c) {
} else {
$base_child = null;
} else {
// Recursive parsing
parse_and_clean_2($doc, $child);
$base_child = null;
// Remove unneccessary tags
function parse_and_clean_3(DOMDocument $doc, DOMNode $html)
// <br> as first child
if ($html->childNodes && $html->childNodes->length > 0 && $html->childNodes->item(0)->nodeName == 'br') {
// Parse children
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $n) {
$arr[] = $n;
foreach ($arr as $n) {
parse_and_clean_3($doc, $n);
// empty span and p
if (in_array($html->nodeName, ['span', 'p', 'strong', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
if (!$html->childNodes || $html->childNodes->length == 0) {
// Contentually empty span, strong, em, etc, then we can remove the element, keepin the children
if (in_array($html->nodeName, ['span', 'strong', 'em'])) {
if (empty(trim($html->textContent))) {
$parent = $html->parentNode;
$child = [];
foreach ($html->childNodes as $n) {
$child[] = $n;
foreach ($child as $n) {
$parent->insertBefore($n, $html);
// Check if is empty span without any attribute
if ($html instanceof DOMElement && $html->tagName == 'span' && $html->attributes->length == 0) {
$parent = $html->parentNode;
$child = [];
foreach ($html->childNodes as $n) {
$child[] = $n;
foreach ($child as $n) {
$parent->insertBefore($n, $html);
function parse_and_clean_img(DOMDocument $doc, DOMNode $html)
// Parse children
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $n) {
$arr[] = $n;
foreach ($arr as $n) {
parse_and_clean_img($doc, $n);
// Burst image out of everything
// This is just to fix bad formatting
while ($html->nodeName == 'img' && in_array($html->parentNode->nodeName, ['p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
$parent = $html->parentNode;
$grandparent = $parent->parentNode;
$grandparent->insertBefore($html, $parent);
// I hate wordpress....
$title = stripslashes($_POST['dname']);
$content = stripslashes($_POST['dcontents']);
$dom = new DOMDocument();
parse_and_clean_1($dom, $dom);
parse_and_clean_img($dom, $dom);
parse_and_clean_2($dom, $dom);
parse_and_clean_3($dom, $dom);
// Initial WordPress Import
$wp_post_arr = [
'post_author' => $user_id,
'post_content' => '',
'post_title' => $title,
'post_status' => 'draft',
'post_type' => 'post',
$post_id = wp_insert_post($wp_post_arr, true);
if ($post_id == 0 || $post_id instanceof WP_Error) {
echo '<h1>Wordpress Error!</h1>';
// Handle image attachment
$dom_imgs = $dom->getElementsByTagName('img');
$imgs = [];
foreach ($dom_imgs as $d) {
$imgs[] = $d;
foreach ($imgs as $img) {
$href = $img->getAttribute('src');
// Random file name
$hash = sha1($href . microtime());
$new_name = $hash;
// Download image
$file_contents = file_get_contents($href);
$pattern = "/^content-type\s*:\s*(.*)$/i";
$content_type = '';
// Get content type and file extension
if (($header = preg_grep($pattern, $http_response_header)) && (preg_match($pattern, array_shift(array_values($header)), $match) !== false)) {
$content_type = $match[1];
switch ($content_type) {
case 'image/png':
$new_name .= '.png';
case 'image/gif':
$new_name .= '.gif';
case 'image/bmp':
$new_name .= '.bmp';
case 'image/':
$new_name .= '.ico';
case 'image/svg+xml':
$new_name .= '.svg';
case 'image/tiff':
$new_name .= '.tif';
case 'image/webp':
$new_name .= '.webp';
$new_name .= '.jpg';
// Upload to wordpress
$uploaded = wp_upload_bits($new_name, null, $file_contents);
if (!empty($uploaded['error'])) {
die('<h1>Image Upload Error</h1>' . $uploaded['error']);
// Create attachment with previously created post as parent
$attachment = array(
'guid' => $uploaded['url'],
'post_mime_type' => $uploaded['type'],
'post_parent' => $post_id,
'post_title' => preg_replace('/\.[^.]+$/', '', $new_name),
'post_content' => '',
'post_status' => 'inherit'
$image_id = wp_insert_attachment($attachment, $uploaded['file'], $post_id, true);
if ($image_id == 0 || $image_id instanceof WP_Error) {
echo '<h1>Wordpress Error!</h1>';
// Generate thumbnail & metadata
$attachment_data = wp_generate_attachment_metadata($image_id, $uploaded['file']);
wp_update_attachment_metadata($image_id, $attachment_data);
// Get the image tag
$img_tag = get_image_tag($image_id, '', '', 'center', 'medium');
// Replace image tag
$template = $dom->createDocumentFragment();
$img->parentNode->replaceChild($template, $img);
// Update the post with proper image
$output = '';
foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $item) {
$output .= $dom->saveHTML($item);
$wp_post_arr['post_content'] = $output;
$wp_post_arr['ID'] = $post_id;
$post_id = wp_insert_post($wp_post_arr, true);
if ($post_id == 0 || $post_id instanceof WP_Error){
echo '<h1>Wordpress Error!</h1>';
echo '<h1>Post imported</h1>';
echo $output;
