Last active
August 29, 2015 13:56
-
-
Save jasdeepkhalsa/8860347 to your computer and use it in GitHub Desktop.
Extracting first letters of each word in a string depending upon a RegEx in MySQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| update scriptures set search = replace(search,'॥',''); | |
| update scriptures set search = replace(search,'ਉ','ੳ'); | |
| update scriptures set search = replace(search,'ਊ','ੳ'); | |
| update scriptures set search = replace(search,'ਓ','ੳ'); | |
| update scriptures set search = replace(search,'ਔ','ਅ'); | |
| update scriptures set search = replace(search,'ਐ','ਅ'); | |
| update scriptures set search = replace(search,'ਆ','ਅ'); | |
| update scriptures set search = replace(search,'ਏ','ੲ'); | |
| update scriptures set search = replace(search,'ਈ','ੲ'); | |
| update scriptures set search = replace(search,'ਇ','ੲ'); | |
| update scriptures set search = replace(search,'੦',''); | |
| update scriptures set search = replace(search,'੧',''); | |
| update scriptures set search = replace(search,'੨',''); | |
| update scriptures set search = replace(search,'੩',''); | |
| update scriptures set search = replace(search,'੪',''); | |
| update scriptures set search = replace(search,'੫',''); | |
| update scriptures set search = replace(search,'੬',''); | |
| update scriptures set search = replace(search,'੭',''); | |
| update scriptures set search = replace(search,'੮',''); | |
| update scriptures set search = replace(search,'੯',''); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| delimiter $$ | |
| drop function if exists `initials`$$ | |
| CREATE FUNCTION `initials`(str text, expr text) RETURNS text CHARSET utf8 | |
| begin | |
| declare result text default ''; | |
| declare buffer text default ''; | |
| declare i int default 1; | |
| if(str is null) then | |
| return null; | |
| end if; | |
| set buffer = trim(str); | |
| while i <= length(buffer) do | |
| if substr(buffer, i, 1) regexp expr then | |
| set result = concat( result, substr( buffer, i, 1 )); | |
| set i = i + 1; | |
| while i <= length( buffer ) and substr(buffer, i, 1) regexp expr do | |
| set i = i + 1; | |
| end while; | |
| while i <= length( buffer ) and substr(buffer, i, 1) not regexp expr do | |
| set i = i + 1; | |
| end while; | |
| else | |
| set i = i + 1; | |
| end if; | |
| end while; | |
| return result; | |
| end$$ | |
| drop function if exists `acronym`$$ | |
| CREATE FUNCTION `acronym`(str text) RETURNS text CHARSET utf8 | |
| begin | |
| declare result text default ''; | |
| set result = initials( str, '[ੴਓੳਅੲਸਹਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵੜਸ਼ਖ਼ਗ਼ਜ਼ਫ਼ਲ਼]' ); | |
| return result; | |
| end$$ | |
| delimiter ; | |
| UPDATE scriptures SET search = acronym(scripture) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just to explain the last line:
scripturesis the table I want to updatesearchis a new empty column I created inside the table to store the resultscriptureis an existing column inside thescripturestable with all the strings I want to extract fromacronymis the function previously declared which is looking to match the first letter of each word with a character from the RegEx[ੴਓੳਅੲਸਹਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵੜਸ਼ਖ਼ਗ਼ਜ਼ਫ਼ਲ਼]So this final line of the code will go through each row of the column
scripture, apply the functionacronymto it and store the result in the newsearchcolumn.