Created
February 3, 2014 02:31
-
-
Save msouth/8778091 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use DBI; | |
use strict; | |
use warnings; | |
my $db_name = shift or die "call as $0 [database_name] [mysql user] [mysql password]"; | |
my $mysql_user = shift || 'root'; | |
my $mysql_password = shift || ''; | |
my $dbh = DBI->connect("dbi:mysql:database=$db_name", $mysql_user, $mysql_password) || die DBI->errstr; | |
# first time we run, make a copy of the original forum_posts table so people can revert if they need to. | |
$dbh->do("create table if not exists forum_posts_original_do_not_touch as (select * from forum_posts)") or die DBI->errstr; | |
my $select = $dbh->prepare('select id, content from forum_posts'); | |
my $replace = $dbh->prepare('update forum_posts set content =? where id = ?'); | |
$select->execute or die DBI->errstr; | |
my %sizes; | |
my %tags; | |
while ( my $row = $select->fetchrow_hashref ) { | |
my $id = $row->{id}; | |
my $content = $row->{content}; | |
#markdown doesn't do color | |
$content =~ s/\[\/?color.*?\]//gms; | |
# there were a lot of 24's | |
$content =~ s/\[size=24\](.*?)\[\/size\]/\r\n# $1\r\n/gms; | |
# there were a lot of 18's | |
$content =~ s/\[size=18\](.*?)\[\/size\]/\r\n## $1\r\n/gms; | |
# there were a few other 8's, 7's, 10's, 12's...just ignoring those | |
$content =~ s/\[size=\d+\](.*?)\[\/size\]/$1\r\n/gms; | |
# plain urls | |
$content =~ s/\[url\](http[^\[]+)\[\/url\]/$1/gims; | |
# images (some inside urls...those are later) | |
$content =~ s/\[img\](.*?)\[\/img\]//gims; | |
# [url=http://www.jagtoplist.com/in.php?site=16661][/url] | |
$content =~ s/\[url=(.*?)\](.*?)\[\/url\]/[$2]($1)/gms; | |
$content =~ s/\[\/?b\]/**/gims; | |
$content =~ s/\[\/?u\]/*/gims; | |
$content =~ s/\[\/?i\]/*/gims; | |
$content =~ s/\[\/h(\d)\]/\r\n/gims; | |
$content =~ s/\[h(\d)\]/"\r\n".("#"x$1)/igems; | |
#take out the ul tags... | |
$content =~ s/\[\/?ul\]//gims; | |
# ... but make their li's *'s | |
$content =~ s/\[li\]/\r\n* /gims; | |
$content =~ s/\[\/li\]/\r\n/gims; | |
$content =~ s/\[#+/[/gms; # size inside image/link text doesn't work | |
$content =~ s/\[list\](.*?)\[\/list\]/ | |
if (defined($1)){ | |
my $inner = $1; | |
$inner =~ s{^\s+(.*)\s+$}{$1}s; #trim space | |
$inner =~ s{(\r\n)(\s*\r\n)+}{$1}gms; #remove blank lines | |
$inner = "* $inner"; #first item | |
$inner =~ s{(\r\n)}{$1* }gms; #internal items | |
$inner =~ s{\*\s+$}{}s; #mechanism sometimes puts an extra li, remove it | |
$inner} | |
/gems; | |
$content =~ s/^\* ##/##/gms;# size hack inside list doesn't work | |
$content = reformat_quotes($content); | |
$replace->execute($content,$id) or die DBI->errstr; | |
my @tags = ($content =~ /\[(\w+)\]/g); | |
$tags{ $_ } ++ for @tags; | |
} | |
# this shows what bb tags--or things the look kind of like tags--are left, and how many | |
#print Dumper(\%tags); use Data::Dumper; | |
$dbh->disconnect; | |
sub reformat_quotes { | |
my $text = shift; | |
$text =~ s/\[quote(?:=(\w+))?\]/my $start = ''; if (defined($1)) {$start ="\@$1:"}; "$start\r\nGORIGHT\r\n"/gemsx; | |
$text =~ s/\[\/quote\]/GOLEFT\r\n\r\n/gms; | |
my $quote_mark = ''; | |
my $indentlevel = 0; | |
$text =~ s/(GORIGHT)|(GOLEFT)|(\r\n)/ | |
if (defined $1) { | |
++$indentlevel; | |
} | |
if (defined $2) { | |
--$indentlevel; | |
} | |
if (defined $3) { | |
if ($indentlevel) { | |
' '.$3. ('>' x $indentlevel); | |
} | |
else { | |
$3; | |
} | |
} | |
/egmsx; | |
return $text; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment