A PHP script for importing a directory of images into WordPress, using a CSV file with a row of data about image.
Quite specific to my own needs, but might be useful to others.
See the comments in wp_import_images.php for more info.
| images/* | |
| *.csv |
| <?php | |
| /** | |
| * CSV file parser | |
| * Currently the string matching doesn't work | |
| * if the output encoding is not ASCII or UTF-8 | |
| * | |
| * From http://minghong.blogspot.co.uk/2006/07/csv-parser-for-php.html | |
| */ | |
| class CsvFileParser | |
| { | |
| var $delimiter; // Field delimiter | |
| var $enclosure; // Field enclosure character | |
| var $inputEncoding; // Input character encoding | |
| var $outputEncoding; // Output character encoding | |
| var $data; // CSV data as 2D array | |
| /** | |
| * Constructor | |
| */ | |
| function CsvFileParser() | |
| { | |
| $this->delimiter = ","; | |
| $this->enclosure = '"'; | |
| $this->inputEncoding = "ISO-8859-1"; | |
| $this->outputEncoding = "ISO-8859-1"; | |
| $this->data = array(); | |
| } | |
| /** | |
| * Parse CSV from file | |
| * @param content The CSV filename | |
| * @param hasBOM Using BOM or not | |
| * @return Success or not | |
| */ | |
| function ParseFromFile( $filename, $hasBOM = false ) | |
| { | |
| if ( !is_readable($filename) ) | |
| { | |
| return false; | |
| } | |
| return $this->ParseFromString( file_get_contents($filename), $hasBOM ); | |
| } | |
| /** | |
| * Parse CSV from string | |
| * @param content The CSV string | |
| * @param hasBOM Using BOM or not | |
| * @return Success or not | |
| */ | |
| function ParseFromString( $content, $hasBOM = false ) | |
| { | |
| $content = iconv($this->inputEncoding, $this->outputEncoding, $content ); | |
| $content = str_replace( "\r\n", "\n", $content ); | |
| $content = str_replace( "\r", "\n", $content ); | |
| if ( $hasBOM ) // Remove the BOM (first 3 bytes) | |
| { | |
| $content = substr( $content, 3 ); | |
| } | |
| if ( $content[strlen($content)-1] != "\n" ) // Make sure it always end with a newline | |
| { | |
| $content .= "\n"; | |
| } | |
| // Parse the content character by character | |
| $row = array( "" ); | |
| $idx = 0; | |
| $quoted = false; | |
| for ( $i = 0; $i < strlen($content); $i++ ) | |
| { | |
| $ch = $content[$i]; | |
| if ( $ch == $this->enclosure ) | |
| { | |
| $quoted = !$quoted; | |
| } | |
| // End of line | |
| if ( $ch == "\n" && !$quoted ) | |
| { | |
| // Remove enclosure delimiters | |
| for ( $k = 0; $k < count($row); $k++ ) | |
| { | |
| if ( $row[$k] != "" && $row[$k][0] == $this->enclosure ) | |
| { | |
| $row[$k] = substr( $row[$k], 1, strlen($row[$k]) - 2 ); | |
| } | |
| $row[$k] = str_replace( str_repeat($this->enclosure, 2), $this->enclosure, $row[$k] ); | |
| } | |
| // Append row into table | |
| $this->data[] = $row; | |
| $row = array( "" ); | |
| $idx = 0; | |
| } | |
| // End of field | |
| else if ( $ch == $this->delimiter && !$quoted ) | |
| { | |
| $row[++$idx] = ""; | |
| } | |
| // Inside the field | |
| else | |
| { | |
| $row[$idx] .= $ch; | |
| } | |
| } | |
| return true; | |
| } | |
| } | |
| ?> |
| <?php | |
| /** | |
| * A script for importing a load of images, and data from a CSV, into WordPress. | |
| * It creates a new 'attachment' Post for each image. | |
| * It can also add custom metadata about the image. | |
| * | |
| * Run it like: | |
| * | |
| * $ php wp_import_images.php | |
| * | |
| * | |
| * Assumptions: | |
| * * The CSV file has one row per image. | |
| * * One of the CSV columns refers to a particular image to be uploaded. | |
| * | |
| * To re-use this you'd need to make a few changes based on your needs: | |
| * | |
| * 1. Change the config variables below as needed. | |
| * | |
| * 2. The $COLUMNS should be changed to make sense depending on your CSV's | |
| * structure. | |
| * | |
| * 3. Change how we determine whether a post already exists in the database, | |
| * in create_posts(). | |
| * | |
| * 4. In create_posts() alter the bit that creates the $post_data array. You | |
| * might want to change, add or delete items from this. The keys of the | |
| * $data array there correspond to the names in $COLUMNS. | |
| * | |
| * 5. In get_original_file() alter how you find the correct file for a | |
| * particular row of data. In our case it's based on a column we've called | |
| * 'ref'. You'll need to fiddle with how the image is identified. e.g. | |
| * if you had a column in your CSV that you've called 'file' you could | |
| * replace the line starring `if (strpos(...` with: | |
| * if ($filename == $data['file']) { | |
| * | |
| * 6. You might need to alter stuff in create_attachment() if you have | |
| * different requirements for what gets saved with the Post. | |
| * | |
| * 7. In add_post_metadata() we add our custom metadata fields. You'll want to | |
| * either delete all the custom ones (leave the _wp_attached_file line) or | |
| * add your own if you have different ones. | |
| */ | |
| ////////////////////////////////////////////////////////////////////// | |
| // CONFIGURATION | |
| // All paths relative to this script. | |
| // Setting this to TRUE won't move any files or save anything to the database. | |
| // It will output a bit of debug code that helps ensure everything is in the | |
| // right place before running properly. | |
| $DRY_RUN = TRUE; | |
| // Will be used to create all the Posts. | |
| $AUTHOR_ID = 1; | |
| // Path to the WordPress directory, relative to this script: | |
| $WP_DIR = '../public/wp/'; | |
| // Path relative to this script where the CSV file: | |
| $CSV_FILE = './import.csv'; | |
| // Directory where our images-to-be-uploaded are, relative to this script: | |
| $IMAGE_SOURCE_DIR = './images/'; | |
| // How many rows at the top of the CSV are to be ignored? | |
| $HEADER_ROWS = 1; | |
| // Useful names for each column in the CSV, in order. | |
| // These are then used later to refer to each piece of data. | |
| $COLUMNS = array( | |
| 'ref', | |
| 'title', | |
| 'photographer', | |
| 'publisher', | |
| 'date_taken', | |
| 'description', | |
| 'source', | |
| 'restrictions', | |
| 'no_pic' | |
| ); | |
| // We will add the new Posts to these categories/tags in this taxonomy. | |
| // Set $TAXONOMY to an empty string to not do this. | |
| $TAXONOMY = 'media_category'; | |
| $TERM_IDS = array(991); | |
| // Path to the wp-load.php script, relative to this script: | |
| $WP_LOAD_PATH = $WP_DIR . 'wp-load.php'; | |
| // Path to the WP admin image.php file, relative to this script: | |
| $WP_IMAGE_PATH = $WP_DIR . 'wp-admin/includes/image.php'; | |
| // The WP uploads directory, relative to this script: | |
| // We'll add the year and month to this below... | |
| $WP_UPLOADS_DIR = $WP_DIR . 'wp-content/uploads/'; | |
| ////////////////////////////////////////////////////////////////////// | |
| // SETUP... | |
| require_once('csv_file_parser.php'); | |
| if (! file_exists($CSV_FILE)) { | |
| die(sprintf("Can't find the CSV file at '%s'.\n", $CSV_FILE)); | |
| } | |
| if (! file_exists($IMAGE_SOURCE_DIR)) { | |
| die(sprintf( | |
| "Can't find image source directory at '%s'.\n", $IMAGE_SOURCE_DIR)); | |
| } else if (! is_dir($IMAGE_SOURCE_DIR)) { | |
| die(sprintf( | |
| "\$IMAGE_SOURCE_DIR isn't a directory: %s\n", $IMAGE_SOURCE_DIR)); | |
| } | |
| require_once($WP_LOAD_PATH); | |
| if (! function_exists('wp')) { | |
| die("Sorry, looks like WordPress isn't loaded.\n"); | |
| } | |
| require_once($WP_IMAGE_PATH); | |
| if (! function_exists('wp_generate_attachment_metadata')) { | |
| die("Sorry, looks like the WordPress image.php file isn't loaded.\n"); | |
| } | |
| // eg 'public/wp-content/uploads/2017/09/' | |
| $IMAGE_DESTINATION_DIR = sprintf('%s%s/', $WP_UPLOADS_DIR, date('Y/m')); | |
| if(! file_exists($IMAGE_DESTINATION_DIR)) { | |
| printf("Creating uploads directory '%s'.\n", $IMAGE_DESTINATION_DIR); | |
| mkdir($IMAGE_DESTINATION_DIR); | |
| } | |
| ////////////////////////////////////////////////////////////////////// | |
| // START! | |
| if ($DRY_RUN) { | |
| print("\nThis is a DRY RUN.\n\n"); | |
| } else { | |
| print("\nTHIS IS NOT A DRY RUN. THIS IS REALLY HAPPENING.\n\n"); | |
| } | |
| $IMAGES = scandir($IMAGE_SOURCE_DIR); | |
| $rows = load_csv($CSV_FILE, $HEADER_ROWS); | |
| create_posts($rows, $AUTHOR_ID); | |
| ////////////////////////////////////////////////////////////////////// | |
| // THE FUNCTIONS | |
| /** | |
| * Loads a CSV file at $filepath and returns an array of arrays of data. | |
| */ | |
| function load_csv($filepath, $header_rows=0) { | |
| $parser = new CsvFileParser(); | |
| $parser->inputEncoding = 'UTF-8'; | |
| $parser->outputEncoding = 'UTF-8'; | |
| $parser->ParseFromFile($filepath); | |
| $rows = $parser->data; | |
| $rows = array_slice($rows, $header_rows); | |
| return $rows; | |
| } | |
| /** | |
| * Does it all with the provided data. | |
| * Creates the new posts, uploads the images, and associates them together. | |
| * | |
| * $rows is an array of rows, each one an array of column data. | |
| * $author_id is the numeric ID of the WordPress author to use as the creator | |
| * of the posts. | |
| */ | |
| function create_posts($rows, $author_id) { | |
| global $DRY_RUN, $wpdb; | |
| $uploads = wp_upload_dir(); | |
| foreach($rows as $row) { | |
| // Get an associative array of data from the column data in $row: | |
| $data = row_to_data($row); | |
| // Some of the titles have periods on the end; get rid of them: | |
| $data['title'] = rtrim($data['title'], '.'); | |
| $filedata = get_original_file($data); | |
| printf("Ref: %s. ", $data['ref']); | |
| if ($filedata === FALSE) { | |
| print("No image found, skipping.\n"); | |
| //print_r($data); | |
| } else { | |
| // See if this image exists already. We look for the ref | |
| // in the postmeta table. | |
| $row_count = $wpdb->get_var( | |
| $wpdb->prepare("SELECT COUNT(*) FROM $wpdb->postmeta WHERE meta_key = '_janetgyford_ref' AND meta_value = '%s' ", $data['ref']) | |
| ); | |
| if ($row_count > 0) { | |
| printf("Post exists, skipping: %s\n", $data['title']); | |
| } elseif ($DRY_RUN !== FALSE) { | |
| print("This image and data would be imported.\n"); | |
| } else { | |
| // DO IT! | |
| $new_filename = sanitize_file_name($filedata['filename']); | |
| // $uploads['path'] is like | |
| // '/var/www/public/wp/wp-content/uploads/2017/10' | |
| $new_filepath = $uploads['path'] . '/' . $new_filename; | |
| // Put our file in the uploads directory: | |
| copy($filedata['filepath'], $new_filepath); | |
| // What we use to create our new attachment post: | |
| $post_data = array( | |
| 'author' => $author_id, | |
| 'title' => $data['title'], | |
| 'content' => $data['description'], | |
| 'guid' => $uploads['url'] . '/' . $new_filename, | |
| 'mime_type' => $filedata['filetype']['type'] | |
| ); | |
| // Creates a new Post of type attachment: | |
| $post_id = create_attachment($post_data, $new_filepath); | |
| if ( ! $post_id) { | |
| printf("Couldn't create Post for: %s\n", $data['title']); | |
| } else { | |
| printf("CREATED Post ID %s.\n", $post_id); | |
| if ($attach_data = wp_generate_attachment_metadata( $post_id, $new_filepath)) { | |
| wp_update_attachment_metadata($post_id, $attach_data); | |
| } | |
| // Will be like '2017/10/my-new-filename.jpg' | |
| $attached_file = substr($uploads['subdir'], 1) . '/' . $new_filename; | |
| add_post_metadata($post_id, $data, $attached_file); | |
| add_taxonomy($post_id); | |
| // We want to leave at least one second between adding each | |
| // Post so that there are no duplicate post_dates, which | |
| // might cause problems if ordering by that. | |
| sleep(1); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| /** | |
| * Returns an array of filename, filepath and filetype if an image starting | |
| * with strtolower($data['ref']) exists. | |
| * If it doesn't exist, returns FALSE. | |
| */ | |
| function get_original_file($data) { | |
| global $IMAGES, $IMAGE_SOURCE_DIR; | |
| // $data['ref'] is like 'M10'. | |
| // We want to look for a file starting with 'm0010'. | |
| $num = substr($data['ref'], 1); // '10' | |
| $padded = sprintf("%04d", $num); // '0010' | |
| $filestart = 'm' . $padded; // 'm0010' | |
| $filedata = FALSE; | |
| // $IMAGES is an array of filenames in our source directory. | |
| foreach ($IMAGES as $filename) { | |
| if (strpos($filename, $filestart) === 0) { | |
| // This filename matches our 'ref' so this is the one we need. | |
| $filepath = $IMAGE_SOURCE_DIR . $filename; | |
| $filedata = array( | |
| 'filename' => $filename, | |
| 'filepath' => $filepath, | |
| 'filetype' => wp_check_filetype($filepath, null) | |
| ); | |
| break; | |
| } | |
| } | |
| return $filedata; | |
| } | |
| /** | |
| * Creates a new WP post using the data and returns the new $post_id. | |
| */ | |
| function create_attachment($post_data, $uploaded_path) { | |
| // Make a slug if it doesn't exist. | |
| if (! array_key_exists('name', $post_data)) { | |
| $post_data['name'] = sanitize_title_with_dashes(str_replace("_", "-", $post_data['title'])); | |
| } | |
| $post_id = wp_insert_attachment( | |
| array( | |
| 'comment_status' => 'closed', | |
| 'ping_status' => 'closed', | |
| 'post_author' => $post_data['author'], | |
| 'post_name' => $post_data['name'], | |
| 'post_title' => $post_data['title'], | |
| 'post_content' => $post_data['content'], | |
| 'post_status' => 'publish', | |
| 'post_type' => 'attachment', | |
| 'post_mime_type' => $post_data['mime_type'], | |
| 'guid' => $post_data['guid'] | |
| ) | |
| ); | |
| return $post_id; | |
| } | |
| /** | |
| * Links the attached file to the post, and adds our custom metadata. | |
| * $post_id is the ID of the attachment post we created. | |
| * $data is our associative array of data from a row of the CSV | |
| * $attached_file is the path to the file WITHIN the uploads directory (it has | |
| * no leading slash). e.g. '2017/10/my-new-filename.jpg'. | |
| */ | |
| function add_post_metadata($post_id, $data, $attached_file) { | |
| global $wpdb; | |
| $table = $wpdb->prefix.'postmeta'; | |
| // Associates the uploaded image with the post so that it shows up in | |
| // the admin when viewing it in Media: | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_wp_attached_file', | |
| 'meta_value' => $attached_file)); | |
| // Now add all our custom metadata: | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_janetgyford_ref', | |
| 'meta_value' => $data['ref'])); | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_jantetgyford_credit', | |
| 'meta_value' => $data['photographer'])); | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_janetgyford_publisher', | |
| 'meta_value' => $data['publisher'])); | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_janetgyford_date_taken', | |
| 'meta_value' => $data['date_taken'])); | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_janetgyford_source', | |
| 'meta_value' => $data['source'])); | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_janetgyford_restrictions', | |
| 'meta_value' => $data['restrictions'])); | |
| $visibility = $data['no_pic'] === '' ? 'show' : 'hide'; | |
| $wpdb->insert($table, array('post_id' => $post_id, | |
| 'meta_key' => '_janetgyford_visibility', | |
| 'meta_value' => $visibility)); | |
| } | |
| function add_taxonomy($post_id) { | |
| global $TAXONOMY, $TERM_IDS; | |
| if ($TAXONOMY !== '') { | |
| wp_set_object_terms($post_id, $TERM_IDS, $TAXONOMY, False); | |
| } | |
| } | |
| /** | |
| * Takes an array of column data and returns an associative array with the | |
| * same data. | |
| */ | |
| function row_to_data($row) { | |
| global $COLUMNS; | |
| $data = array(); | |
| foreach($row as $n => $col) { | |
| $data[ $COLUMNS[$n] ] = trim($col); | |
| } | |
| return $data; | |
| } | |