Created
April 23, 2012 20:21
-
-
Save mozz100/2473564 to your computer and use it in GitHub Desktop.
Shakespeare data manipulation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ----------------------------------------------------------------------------------- | |
# Create the database | |
CREATE DATABASE `shkspr` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; | |
# ----------------------------------------------------------------------------------- | |
# Create table structures | |
CREATE TABLE `shkspr`.`Chapters` ( | |
`WorkID` VARCHAR( 32 ) NOT NULL , | |
`ChapterID` INT NOT NULL , | |
`Act` INT NOT NULL , | |
`Scene` INT NOT NULL , | |
`Description` TEXT NOT NULL | |
#, INDEX (`WorkID`, `ChapterID`) | |
) ENGINE = INNODB; | |
CREATE TABLE `shkspr`.`Characters` ( | |
`CharID` VARCHAR( 32 ) NOT NULL , | |
`CharName` VARCHAR( 128 ) NOT NULL , | |
`Abbrev` VARCHAR( 32 ) NOT NULL , | |
`Works` VARCHAR( 128 ) NOT NULL , | |
`Description` TEXT NOT NULL | |
#, INDEX (`CharID`) | |
) ENGINE = INNODB; | |
CREATE TABLE `shkspr`.`Paragraphs` ( | |
`WorkID` VARCHAR( 32 ) NOT NULL , | |
`ParagraphID` INT NOT NULL , | |
`ParagraphNum` INT NOT NULL , | |
`CharID` VARCHAR( 32 ) NOT NULL , | |
`PlainText` TEXT NOT NULL , | |
`Act` INT NOT NULL , | |
`Scene` INT NOT NULL | |
#, INDEX (`WorkID`, `CharID`) | |
) ENGINE = INNODB; | |
CREATE TABLE `shkspr`.`Works` ( | |
`WorkID` VARCHAR( 32 ) NOT NULL , | |
`Title` VARCHAR( 128 ) NOT NULL , | |
`LongTitle` TEXT NOT NULL , | |
`Date` INT NOT NULL , | |
`GenreType` VARCHAR( 32 ) NOT NULL | |
#, INDEX (`WorkID`) | |
) ENGINE = INNODB; | |
# ----------------------------------------------------------------------------------- | |
# NOW DO THE DATA IMPORT! | |
# See https://github.com/edent/Open-Source-Shakespeare | |
# ----------------------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ----------------------------------------------------------------------------------- | |
# Move the WorkID relationship to one based on integers | |
ALTER TABLE `Works` ADD `id` INT NOT NULL; | |
ALTER TABLE `Chapters` ADD `work_id` INT NOT NULL ; | |
ALTER TABLE `Paragraphs` ADD `work_id` INT NOT NULL ; | |
CREATE TEMPORARY TABLE `shkt` ( | |
id_old VARCHAR(32) NOT NULL, | |
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY | |
); | |
INSERT INTO shkt(id_old) SELECT DISTINCT WorkID FROM Works; | |
UPDATE Works, shkt SET Works.id = shkt.id WHERE Works.WorkID = shkt.id_old; | |
UPDATE Chapters, shkt SET Chapters.work_id = shkt.id WHERE Chapters.WorkID = shkt.id_old; | |
UPDATE Paragraphs, shkt SET Paragraphs.work_id = shkt.id WHERE Paragraphs.WorkID = shkt.id_old; | |
DROP TABLE `shkt`; | |
# ----------------------------------------------------------------------------------- | |
# Move the CharID relationship to one based on integers, and use character_id as the col name | |
# Took AGES on my PC, could probably be more efficient, but hey, this is a one-time command to be run offline. | |
ALTER TABLE `Characters` ADD `id` INT NOT NULL; | |
ALTER TABLE `Paragraphs` ADD `character_id` INT NOT NULL ; | |
CREATE TEMPORARY TABLE `shkt` ( | |
id_old VARCHAR(32) NOT NULL, | |
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY | |
); | |
INSERT INTO shkt(id_old) SELECT DISTINCT CharID FROM Characters; | |
UPDATE Characters, shkt SET Characters.id = shkt.id WHERE Characters.CharID = shkt.id_old; | |
UPDATE Paragraphs, shkt SET Paragraphs.character_id = shkt.id WHERE Paragraphs.CharID = shkt.id_old; # This is slooow (see comment above) | |
DROP TABLE `shkt`; | |
# ----------------------------------------------------------------------------------- | |
# TODO rejig characters/works many to many relationship to have a proper join table. | |
# ----------------------------------------------------------------------------------- | |
# TIDY UP: | |
# drop columns that are no longer to be used, like CharID, WorkID | |
ALTER TABLE `Chapters` | |
DROP `WorkID`; | |
ALTER TABLE `Characters` | |
DROP `CharID`; | |
ALTER TABLE `Paragraphs` | |
DROP `WorkID`, | |
DROP `CharID`; | |
ALTER TABLE `Paragraphs` CHANGE `ParagraphID` `id` INT NOT NULL; | |
ALTER TABLE `Chapters` CHANGE `ChapterID` `id` INT NOT NULL; | |
ALTER TABLE `Works` | |
DROP `WorkID`; | |
# mark 'id' columns as primary keys | |
ALTER TABLE `Works` ADD PRIMARY KEY ( `id` ); | |
ALTER TABLE `Chapters` ADD PRIMARY KEY ( `id` ); | |
ALTER TABLE `Paragraphs` ADD PRIMARY KEY ( `id` ); | |
ALTER TABLE `Characters` ADD PRIMARY KEY ( `id` ); | |
# rename tables to what Rails might expect | |
RENAME TABLE `Chapters` TO `chapters` ; | |
RENAME TABLE `Works` TO `works` ; | |
RENAME TABLE `Paragraphs` TO `paragraphs` ; | |
RENAME TABLE `Characters` TO `characters` ; | |
# add indexes for efficiency, and remove no-longer-used indexes | |
ALTER TABLE `chapters` ADD INDEX ( `work_id` ); | |
ALTER TABLE `paragraphs` ADD INDEX ( `work_id` ); | |
ALTER TABLE `paragraphs` ADD INDEX ( `character_id` ); | |
# add foreign key relationships? Rails doesn't insist on them. | |
ALTER TABLE `chapters` ADD FOREIGN KEY ( `work_id` ) REFERENCES `shkspr`.`works` ( | |
`id` | |
) ON DELETE CASCADE ON UPDATE CASCADE ; | |
ALTER TABLE `paragraphs` ADD FOREIGN KEY ( `work_id` ) REFERENCES `shkspr`.`works` ( | |
`id` | |
) ON DELETE CASCADE ON UPDATE CASCADE ; | |
ALTER TABLE `paragraphs` ADD FOREIGN KEY ( `character_id` ) REFERENCES `shkspr`.`characters` ( | |
`id` | |
) ON DELETE CASCADE ON UPDATE CASCADE ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment