Last active
March 18, 2022 04:56
-
-
Save Chmarusso/0e5811dbc726cdcbc42d746c8db30860 to your computer and use it in GitHub Desktop.
Put this file in: discourse_dir/scripts/import_scripts/ipb4.rb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# migration script from IPB4 to Discourse | |
require "mysql2" | |
require File.expand_path(File.dirname(__FILE__) + "/base.rb") | |
require 'htmlentities' | |
class ImportScripts::Ipb4 < ImportScripts::Base | |
IPB_DB_USER = "user" | |
IPB_DB_PASSWORD = "password" | |
IPB_4_DB = "mmo_forum" | |
TABLE_PREFIX = "ibf_" | |
ATTACHMENTS_BASE_DIR = nil # "/absolute/path/to/attachments" set the absolute path if you have attachments | |
BATCH_SIZE = 1000 | |
CONVERT_HTML = true | |
def initialize | |
super | |
@htmlentities = HTMLEntities.new | |
@client = Mysql2::Client.new( | |
host: "localhost", | |
username: IPB_DB_USER, | |
password: IPB_DB_PASSWORD, | |
database: IPB_4_DB | |
) | |
@import_tags = false | |
begin | |
r = @client.query("select count(*) count from #{TABLE_PREFIX}core_tags") | |
@import_tags = true if r.first["count"].to_i > 0 | |
rescue => e | |
puts "Tags won't be imported. #{e.message}" | |
end | |
end | |
def execute | |
if @import_tags | |
SiteSetting.tagging_enabled = true | |
SiteSetting.max_tags_per_topic = 10 | |
end | |
import_users | |
import_categories | |
import_topics_categories | |
import_topics | |
import_posts | |
fix_quotes_emoticons | |
update_tl0 | |
create_permalinks | |
end | |
def import_users | |
puts '', "creating users" | |
@user_is_deleted = false | |
@last_deleted_username = nil | |
username = nil | |
@last_user_id = -1 | |
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}core_members WHERE member_id > 52204;").first['count'] | |
batches(BATCH_SIZE) do |offset| | |
results = mysql_query("SELECT member_id, name, member_title, email, joined, last_activity, ip_address FROM #{TABLE_PREFIX}core_members WHERE member_id > 52204 ORDER BY member_id ASC LIMIT #{BATCH_SIZE};") | |
break if results.size < 1 | |
@last_user_id = results.to_a.last['member_id'] | |
next if all_records_exist? :users, results.map { |u| u['member_id'].to_i } | |
create_users(results, total: total_count, offset: offset) do |user| | |
next if user['email'].blank? | |
next if user['name'].blank? | |
next if @lookup.user_id_from_imported_user_id(user['member_id']) | |
@user_is_deleted = false | |
username = user['name'] | |
{ id: user['member_id'], | |
email: user['email'], | |
username: username, | |
name: user['name'], | |
created_at: user['joined'] == nil ? 0 : Time.zone.at(user['joined'].to_i), | |
registration_ip_address: user['ip_address'], | |
last_seen_at: user['last_activity'] == nil ? 0 : Time.zone.at(user['last_activity'].to_i), | |
admin: user['member_id'] == 1, | |
} | |
end | |
end | |
end | |
def import_avatars | |
if ATTACHMENTS_BASE_DIR && File.exists?(ATTACHMENTS_BASE_DIR) | |
puts "", "importing user avatars" | |
User.find_each do |u| | |
next unless u.custom_fields["import_id"] | |
r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first | |
next if r.nil? | |
photo = r["photo"] | |
next unless photo.present? | |
# Possible encoded values: | |
# 1. cf://uploads/userpics/820/Y0AFUQYYM6QN.jpg | |
# 2. ~cf/userpics2/cf566487133f1f538e02da96f9a16b18.jpg | |
# 3. ~cf/userpics/txkt8kw1wozn.jpg | |
photo_real_filename = nil | |
parts = photo.squeeze("/").split("/") | |
if parts[0] == "cf:" | |
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/") | |
elsif parts[0] == "~cf" | |
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/") | |
else | |
puts "UNKNOWN FORMAT: #{photo}" | |
next | |
end | |
if !File.exists?(photo_path) | |
puts "Path to avatar file not found! Skipping. #{photo_path}" | |
next | |
end | |
photo_real_filename = find_photo_file(photo_path, parts.last) | |
if photo_real_filename.nil? | |
puts "Couldn't find file for #{photo}. Skipping." | |
next | |
end | |
print "." | |
upload = create_upload(u.id, photo_real_filename, File.basename(photo_real_filename)) | |
if upload.persisted? | |
u.import_mode = false | |
u.create_user_avatar | |
u.import_mode = true | |
u.user_avatar.update(custom_upload_id: upload.id) | |
u.update(uploaded_avatar_id: upload.id) | |
else | |
puts "Error: Upload did not persist for #{u.username} #{photo_real_filename}!" | |
end | |
end | |
end | |
end | |
def find_photo_file(path, base_filename) | |
base_guess = base_filename.dup | |
full_guess = File.join(path, base_guess) # often an exact match exists | |
return full_guess if File.exists?(full_guess) | |
# Otherwise, the file exists but with a prefix: | |
# The p prefix seems to be the full file, so try to find that one first. | |
['p', 't', 'n'].each do |prefix| | |
full_guess = File.join(path, "#{prefix}#{base_guess}") | |
return full_guess if File.exists?(full_guess) | |
end | |
# Didn't find it. | |
nil | |
end | |
def import_categories | |
puts "", "importing categories..." | |
categories = mysql_query(" | |
SELECT id, name_seo | |
FROM #{TABLE_PREFIX}forums_forums | |
ORDER BY id ASC | |
").to_a | |
create_categories(categories) do |category| | |
{ | |
id: category['id'], | |
name: CGI.unescapeHTML(category['name_seo']).gsub('-', ' ').capitalize, | |
description: '' | |
} | |
end | |
end | |
def import_topics | |
puts "", "importing topics..." | |
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}forums_topics WHERE tid > 102420;").first['count'] | |
@last_topic_id = -1 | |
batches(BATCH_SIZE) do |offset| | |
discussions = mysql_query( | |
"SELECT tid, forum_id, title, topic_firstpost | |
start_date, starter_id | |
FROM #{TABLE_PREFIX}forums_topics | |
WHERE tid > 102420 | |
ORDER BY tid ASC | |
LIMIT #{BATCH_SIZE};") | |
break if discussions.size < 1 | |
@last_topic_id = discussions.to_a.last['tid'] | |
next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['tid'].to_s } | |
create_posts(discussions, total: total_count, offset: offset) do |discussion| | |
firstPost = mysql_query( | |
"SELECT post | |
FROM #{TABLE_PREFIX}forums_posts | |
WHERE topic_id = #{discussion['tid']} | |
ORDER BY pid ASC | |
LIMIT 1;").to_a | |
next if firstPost[0].nil? | |
# store first post id, so it won't be imported again in import_posts | |
add_post("comment#" + firstPost[0]['id'].to_s, firstPost[0]['id']) | |
{ | |
id: "discussion#" + discussion['tid'].to_s, | |
user_id: user_id_from_imported_user_id(discussion['starter_id']) || Discourse::SYSTEM_USER_ID, | |
title: discussion['title'], | |
category: category_id_from_imported_category_id(discussion['forum_id']), | |
raw: clean_up(firstPost[0]['post']), | |
created_at: Time.zone.at(discussion['start_date'].to_i), | |
} | |
end | |
end | |
end | |
def import_topics_categories | |
puts "", "importing topics..." | |
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}forums_topics;").first['count'] | |
@last_topic_id = -1 | |
batches(BATCH_SIZE) do |offset| | |
discussions = mysql_query( | |
"SELECT tid, forum_id, title, topic_firstpost | |
start_date, starter_id | |
FROM #{TABLE_PREFIX}forums_topics | |
WHERE tid > #{@last_topic_id} | |
ORDER BY tid ASC | |
LIMIT #{BATCH_SIZE};") | |
break if discussions.size < 1 | |
@last_topic_id = discussions.to_a.last['tid'] | |
discussions.each do |discussion| | |
topic = Topic.where(id: discussion['tid']).first | |
if topic | |
topic.category_id = category_id_from_imported_category_id(discussion['forum_id']) | |
topic.save | |
puts "topic #{topic.id} imported..." | |
end | |
end | |
end | |
end | |
def import_posts | |
puts "", "importing posts..." | |
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}forums_posts WHERE pid > 729297;").first['count'] | |
@last_post_id = -1 | |
batches(BATCH_SIZE) do |offset| | |
comments = mysql_query( | |
"SELECT pid, topic_id, post, | |
post_date, author_id | |
FROM #{TABLE_PREFIX}forums_posts | |
WHERE pid > 729297 | |
ORDER BY pid ASC | |
LIMIT #{BATCH_SIZE};") | |
break if comments.size < 1 | |
@last_post_id = comments.to_a.last['pid'] | |
next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['pid'].to_s } | |
create_posts(comments, total: total_count, offset: offset) do |comment| | |
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['topic_id'].to_s) | |
next if comment['post'].blank? | |
{ | |
id: "comment#" + comment['pid'].to_s, | |
user_id: user_id_from_imported_user_id(comment['author_id']) || Discourse::SYSTEM_USER_ID, | |
topic_id: t[:topic_id], | |
raw: clean_up(comment['post']), | |
created_at: Time.zone.at(comment['post_date'].to_i) | |
} | |
end | |
end | |
end | |
def clean_up(raw) | |
return "" if raw.blank? | |
# decode HTML entities | |
raw = @htmlentities.decode(raw) | |
# fix whitespaces | |
raw = raw.gsub(/(\\r)?\\n/, "\n") | |
.gsub("\\t", "\t") | |
# [HTML]...[/HTML] | |
raw = raw.gsub(/\[html\]/i, "\n```html\n") | |
.gsub(/\[\/html\]/i, "\n```\n") | |
# [PHP]...[/PHP] | |
raw = raw.gsub(/\[php\]/i, "\n```php\n") | |
.gsub(/\[\/php\]/i, "\n```\n") | |
# [HIGHLIGHT="..."] | |
raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" } | |
# [CODE]...[/CODE] | |
# [HIGHLIGHT]...[/HIGHLIGHT] | |
raw = raw.gsub(/\[\/?code\]/i, "\n```\n") | |
.gsub(/\[\/?highlight\]/i, "\n```\n") | |
# [SAMP]...[/SAMP] | |
raw.gsub!(/\[\/?samp\]/i, "`") | |
unless CONVERT_HTML | |
# replace all chevrons with HTML entities | |
# NOTE: must be done | |
# - AFTER all the "code" processing | |
# - BEFORE the "quote" processing | |
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } | |
.gsub("<", "<") | |
.gsub("\u2603", "<") | |
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } | |
.gsub(">", ">") | |
.gsub("\u2603", ">") | |
end | |
# [URL=...]...[/URL] | |
raw.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" } | |
# [IMG]...[/IMG] | |
raw.gsub!(/\[\/?img\]/i, "") | |
# [URL]...[/URL] | |
# [MP3]...[/MP3] | |
raw = raw.gsub(/\[\/?url\]/i, "") | |
.gsub(/\[\/?mp3\]/i, "") | |
# [QUOTE]...[/QUOTE] | |
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" } | |
# [YOUTUBE]<id>[/YOUTUBE] | |
raw.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } | |
# [youtube=425,350]id[/youtube] | |
raw.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" } | |
# [MEDIA=youtube]id[/MEDIA] | |
raw.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } | |
# [VIDEO=youtube;<id>]...[/VIDEO] | |
raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } | |
# Convert image bbcode | |
raw.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">') | |
# Remove the color tag | |
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "") | |
raw.gsub!(/\[\/color\]/i, "") | |
# remove attachments | |
raw.gsub!(/\[attach[^\]]*\]\d+\[\/attach\]/i, "") | |
# sanitize img tags | |
# This regexp removes everything between the first and last img tag. The .* is too much. | |
# If it's needed, it needs to be fixed. | |
# raw.gsub!(/\<img.*src\="([^\"]+)\".*\>/i) {"\n<img src='#{$1}'>\n"} | |
raw | |
end | |
def staff_guardian | |
@_staff_guardian ||= Guardian.new(Discourse.system_user) | |
end | |
def mysql_query(sql) | |
@client.query(sql) | |
# @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault | |
end | |
def create_permalinks | |
puts '', 'Creating redirects...', '' | |
User.find_each do |u| | |
ucf = u.custom_fields | |
if ucf && ucf["import_id"] && ucf["import_username"] | |
Permalink.create(url: "profile/#{ucf['import_id']}/#{ucf['import_username']}", external_url: "/users/#{u.username}") rescue nil | |
print '.' | |
end | |
end | |
Post.find_each do |post| | |
pcf = post.custom_fields | |
if pcf && pcf["import_id"] | |
topic = post.topic | |
id = pcf["import_id"].split('#').last | |
if topic && post.post_number == 1 | |
slug = Slug.for(topic.title) # probably matches what vanilla would do... | |
Permalink.create(url: "discussion/#{id}/#{slug}", topic_id: topic.id) rescue nil | |
else | |
Permalink.create(url: "discussion/comment/#{id}", post_id: post.id) rescue nil | |
end | |
print '.' | |
end | |
end | |
end | |
def fix_quotes_emoticons | |
Post.order('id DESC').find_each do |post| | |
raw = Nokogiri::HTML::fragment(post.raw) | |
raw.search('.ipsQuote_citation').remove | |
raw.search('blockquote').each do |quote| | |
quote.content = quote.text | |
quote.content = quote.content.gsub(/[\t]+/, "\t") | |
quote.content = quote.content.gsub(/\t\n/, "") | |
end | |
post.raw = raw.to_s | |
post.raw = post.raw.gsub("<img alt=\":)\" data-emoticon=\"\" src=\"<fileStore.core_Emoticons>/emoticons/263a.png\" title=\":)\" />", ":) ") | |
post.raw = post.raw.gsub("<img alt=\":)\" height=\"20\" title=\":)\" width=\"20\">", ":)") | |
post.save | |
puts "Post #{post.id} fixed...." | |
end | |
end | |
end | |
ImportScripts::Ipb4.new.perform |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment