Skip to content

Instantly share code, notes, and snippets.

@mozz100
Created April 23, 2012 20:21
Show Gist options
  • Save mozz100/2473564 to your computer and use it in GitHub Desktop.
Save mozz100/2473564 to your computer and use it in GitHub Desktop.
Shakespeare data manipulation
# -----------------------------------------------------------------------------------
# Create the database
CREATE DATABASE `shkspr` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
# -----------------------------------------------------------------------------------
# Create table structures
CREATE TABLE `shkspr`.`Chapters` (
`WorkID` VARCHAR( 32 ) NOT NULL ,
`ChapterID` INT NOT NULL ,
`Act` INT NOT NULL ,
`Scene` INT NOT NULL ,
`Description` TEXT NOT NULL
#, INDEX (`WorkID`, `ChapterID`)
) ENGINE = INNODB;
CREATE TABLE `shkspr`.`Characters` (
`CharID` VARCHAR( 32 ) NOT NULL ,
`CharName` VARCHAR( 128 ) NOT NULL ,
`Abbrev` VARCHAR( 32 ) NOT NULL ,
`Works` VARCHAR( 128 ) NOT NULL ,
`Description` TEXT NOT NULL
#, INDEX (`CharID`)
) ENGINE = INNODB;
CREATE TABLE `shkspr`.`Paragraphs` (
`WorkID` VARCHAR( 32 ) NOT NULL ,
`ParagraphID` INT NOT NULL ,
`ParagraphNum` INT NOT NULL ,
`CharID` VARCHAR( 32 ) NOT NULL ,
`PlainText` TEXT NOT NULL ,
`Act` INT NOT NULL ,
`Scene` INT NOT NULL
#, INDEX (`WorkID`, `CharID`)
) ENGINE = INNODB;
CREATE TABLE `shkspr`.`Works` (
`WorkID` VARCHAR( 32 ) NOT NULL ,
`Title` VARCHAR( 128 ) NOT NULL ,
`LongTitle` TEXT NOT NULL ,
`Date` INT NOT NULL ,
`GenreType` VARCHAR( 32 ) NOT NULL
#, INDEX (`WorkID`)
) ENGINE = INNODB;
# -----------------------------------------------------------------------------------
# NOW DO THE DATA IMPORT!
# See https://github.com/edent/Open-Source-Shakespeare
# -----------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------
# Move the WorkID relationship to one based on integers
ALTER TABLE `Works` ADD `id` INT NOT NULL;
ALTER TABLE `Chapters` ADD `work_id` INT NOT NULL ;
ALTER TABLE `Paragraphs` ADD `work_id` INT NOT NULL ;
CREATE TEMPORARY TABLE `shkt` (
id_old VARCHAR(32) NOT NULL,
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY
);
INSERT INTO shkt(id_old) SELECT DISTINCT WorkID FROM Works;
UPDATE Works, shkt SET Works.id = shkt.id WHERE Works.WorkID = shkt.id_old;
UPDATE Chapters, shkt SET Chapters.work_id = shkt.id WHERE Chapters.WorkID = shkt.id_old;
UPDATE Paragraphs, shkt SET Paragraphs.work_id = shkt.id WHERE Paragraphs.WorkID = shkt.id_old;
DROP TABLE `shkt`;
# -----------------------------------------------------------------------------------
# Move the CharID relationship to one based on integers, and use character_id as the col name
# Took AGES on my PC, could probably be more efficient, but hey, this is a one-time command to be run offline.
ALTER TABLE `Characters` ADD `id` INT NOT NULL;
ALTER TABLE `Paragraphs` ADD `character_id` INT NOT NULL ;
CREATE TEMPORARY TABLE `shkt` (
id_old VARCHAR(32) NOT NULL,
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY
);
INSERT INTO shkt(id_old) SELECT DISTINCT CharID FROM Characters;
UPDATE Characters, shkt SET Characters.id = shkt.id WHERE Characters.CharID = shkt.id_old;
UPDATE Paragraphs, shkt SET Paragraphs.character_id = shkt.id WHERE Paragraphs.CharID = shkt.id_old; # This is slooow (see comment above)
DROP TABLE `shkt`;
# -----------------------------------------------------------------------------------
# TODO rejig characters/works many to many relationship to have a proper join table.
# -----------------------------------------------------------------------------------
# TIDY UP:
# drop columns that are no longer to be used, like CharID, WorkID
ALTER TABLE `Chapters`
DROP `WorkID`;
ALTER TABLE `Characters`
DROP `CharID`;
ALTER TABLE `Paragraphs`
DROP `WorkID`,
DROP `CharID`;
ALTER TABLE `Paragraphs` CHANGE `ParagraphID` `id` INT NOT NULL;
ALTER TABLE `Chapters` CHANGE `ChapterID` `id` INT NOT NULL;
ALTER TABLE `Works`
DROP `WorkID`;
# mark 'id' columns as primary keys
ALTER TABLE `Works` ADD PRIMARY KEY ( `id` );
ALTER TABLE `Chapters` ADD PRIMARY KEY ( `id` );
ALTER TABLE `Paragraphs` ADD PRIMARY KEY ( `id` );
ALTER TABLE `Characters` ADD PRIMARY KEY ( `id` );
# rename tables to what Rails might expect
RENAME TABLE `Chapters` TO `chapters` ;
RENAME TABLE `Works` TO `works` ;
RENAME TABLE `Paragraphs` TO `paragraphs` ;
RENAME TABLE `Characters` TO `characters` ;
# add indexes for efficiency, and remove no-longer-used indexes
ALTER TABLE `chapters` ADD INDEX ( `work_id` );
ALTER TABLE `paragraphs` ADD INDEX ( `work_id` );
ALTER TABLE `paragraphs` ADD INDEX ( `character_id` );
# add foreign key relationships? Rails doesn't insist on them.
ALTER TABLE `chapters` ADD FOREIGN KEY ( `work_id` ) REFERENCES `shkspr`.`works` (
`id`
) ON DELETE CASCADE ON UPDATE CASCADE ;
ALTER TABLE `paragraphs` ADD FOREIGN KEY ( `work_id` ) REFERENCES `shkspr`.`works` (
`id`
) ON DELETE CASCADE ON UPDATE CASCADE ;
ALTER TABLE `paragraphs` ADD FOREIGN KEY ( `character_id` ) REFERENCES `shkspr`.`characters` (
`id`
) ON DELETE CASCADE ON UPDATE CASCADE ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment