Skip to content

Instantly share code, notes, and snippets.

@JPry
Last active December 13, 2015 19:38
Show Gist options
  • Save JPry/4964255 to your computer and use it in GitHub Desktop.
Save JPry/4964255 to your computer and use it in GitHub Desktop.
Convert CSV of HTML paths to PHP redirects
<?php
error_reporting( E_ALL );
ini_set( "auto_detect_line_endings", true );
//---------- SETUP OPTIONS FOR SCRIPT ---------------//
// Primary folder containing the redirects
$base_path = dirname( __FILE__ ) . '/_redirects';
// The file name to grab redirects from
$csv = dirname( __FILE__ ) . '/module_1.csv';
// Delimiter used in processing redirects. Use a comma for CSV files. Spaces are most common for txt files
$delimiter = ',';
// The file extension we're looking to replace
$extension = '.html';
// Whether this is a dry run, or a live run. A dry run allows you to see what folders are created, but no PHP files are generated.
$dry_run = ( isset( $argv ) && in_array( 'live', $argv ) ) ? 0 : 1;
//---------- END SETUP OPTIONS FOR SCRIPT -----------//
// Setup the file pointer
$file = file( $csv, FILE_IGNORE_NEW_LINES );
echo ( $dry_run ? 'Dry' : 'Live' ) . " Run\n";
// Dry Run informational output
if ( $dry_run ) {
echo "<pre>\n"; print_r( $file ); echo "</pre>\n";
}
// Initiate variables used in looping through the file
$data = array();
$count = 0;
// Now process the file
foreach ( $file as $string ) {
// Separate based on the delimiter for each line of the file
$pieces = explode( $delimiter, $string );
// Make sure we're not matching string with query args
if ( preg_match( "/\\$extension\?/", $pieces[0] ) ) {
$count++;
continue;
}
// Assumes the first string ends with ".html", and replaces it with .php
$pieces[0] = str_replace( $extension, '.php', $pieces[0] );
// Breaks off the domain name of the old path if it exists
// The regex needs improved, because it also strips off any path between the domain and file name
//$pieces[0] = preg_replace( '#.*/([^/]+\.php)$#', '$1', $pieces[0] );
// Stored as an array of $old => $new keys/values
$data[ $pieces[0] ] = $pieces[1];
}
// Dry Run informational output
if ( $dry_run ) {
echo "<pre>\n"; print_r( $data ); echo "</pre>\n";
}
// Take the processed version of the file, and create the redirect files and directory structure
foreach ( $data as $old => $new ) {
$path = $base_path . '/' . ltrim( $old, ' /' );
$created = true;
// Attempt to create new directories regardless of whether this is a dry run or not
if ( ! is_dir( dirname( $path ) ) ) {
$created = mkdir( dirname( $path ), 0775, true );
}
// If not a dry run, then attempt to actually create the files.
if ( ! $dry_run ) {
$new_file = fopen( $path, 'w' );
fwrite( $new_file, "<?php\n\nheader( 'HTTP/1.1 301 Moved Permanently' );\nheader( 'Location: $new' );" );
fclose( $new_file );
}
echo ( $dry_run ? 'Dry Run: ' : '' ) . ( $created ? '' : 'Error ' ) . "Creating $path\n";
}
// Let us know if we need to go back over any skipped rules manually
echo "\n\nSkipped $count rule(s) due to query args found.\n\n";
<?php
error_reporting( E_ALL );
ini_set( "auto_detect_line_endings", true );
//---------- SETUP OPTIONS FOR SCRIPT ---------------//
// The file name to grab redirects from
$import_file = dirname( __FILE__ ) . '/redirects.txt';
// New file for JSON output
$json_output_file = dirname( __FILE__ ) . '/redirects.json';
// Delimiter used in processing redirects. Use a comma for CSV files. Spaces are most common for txt files
$delimiter = "\t";
// Whether this is a dry run, or a live run. A dry run allows you to see what folders are created, but no PHP files are generated.
$dry_run = ( isset( $argv ) && in_array( 'live', $argv ) ) ? 0 : 1;
//---------- END SETUP OPTIONS FOR SCRIPT -----------//
// Setup the file pointers
$import_pointer = file( $import_file, FILE_IGNORE_NEW_LINES );
$json_file_pointer = fopen( $json_output_file, 'w' );
echo ( $dry_run ? 'Dry' : 'Live' ) . " Run\n";
// Dry Run informational output
if ( $dry_run ) {
echo "<pre>\n";
print_r( $import_pointer );
echo "</pre>\n";
}
// Initiate variables used in looping through the file
$data = array();
// Now process the file
foreach ( $import_pointer as $string ) {
// Possibly remove "Redirect 301" from the beginning of the string, if it's there
$string = preg_replace( '#^Redirect\s301\s#i', '', $string );
if ( $dry_run ) {
echo "$string\n";
}
// Separate based on the delimiter for each line of the file
$pieces = explode( $delimiter, $string );
if ( $dry_run ) {
print_r( $pieces );
echo "\n";
}
$matches = array( );
// Break up the string into appropriate components
// TODO: Include regex to grab query args
preg_match( '#^(https?://)?([^/]+)?(/.*)#', $pieces[0], &$matches );
if ( $dry_run ) {
print_r( $matches );
}
// Stored as an array of data for rewrites
$data[] = array(
'name' => '',
'pattern' => '^' . $matches[3],
'replacement' => $pieces[1],
'type' => 'permanent',
'domain' => $matches[2],
'argmatch' => '',
);
}
// Dry Run informational output
if ( $dry_run ) {
echo "<pre>\n";
print_r( $data );
echo "</pre>\n";
}
// Write to the file
$success = fwrite( $json_file_pointer, json_encode( $data ) );
// Close the file
fclose( $json_file_pointer );
if ( $success ) {
echo "$success bytes written to file.\n";
} else {
echo "Writing to file failed!\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment