Last active
October 22, 2024 17:03
-
-
Save duanehutchins/c6000b367b3032b0b495c46b3bc370c8 to your computer and use it in GitHub Desktop.
MySQL split comma-separated string into rows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- split-string-into-rows.sql | |
-- Duane Hutchins | |
-- https://www.github.com/duanehutchins | |
-- Split a string into a mysql resultset of rows | |
-- This is designed to work with a comma-separated string (csv, SET, array) | |
-- To use a delimiter other than a comma: | |
-- Just change all the occurrences of ',' to the new delimiter | |
-- (four occurrences in SET_EXTRACT and one occurrence in SET_COUNT) | |
-- Function SET_EXTRACT | |
-- Essentially does the reverse of MySQL's built-in function FIND_IN_SET(str,strlist) = index INT | |
-- Splits a comma-separated string (AKA "SET"), $strlist, and returns the element (aka substring) matching the provided index, $i. | |
-- If index $i is zero or positive, the elements are counted from the left, starting at zero. | |
-- If index $i is negative, the elements are instead counted from the right, starting at -1. | |
-- If either parameter is NULL or if $i is outside the element count, NULL will be returned | |
-- Usage Example: SELECT SET_EXTRACT(2,'foo,bar,foobar'); // "foobar" | |
DROP FUNCTION SET_EXTRACT; | |
CREATE FUNCTION SET_EXTRACT($i SMALLINT UNSIGNED, $strlist MEDIUMBLOB) RETURNS VARBINARY(255) | |
DETERMINISTIC NO SQL | |
RETURN NULLIF(SUBSTRING_INDEX(SUBSTRING_INDEX(CONCAT(0b0, ',', $strlist, ',', 0b0), ',', $i+1.5*(SIGN($i+0.5)+1)-1), ',', -SIGN($i+0.5)),0b0); | |
-- Function SET_COUNT | |
-- Returns the number of elements in a set | |
-- (Actually returns the one plus the number of commas in the string) | |
DROP FUNCTION SET_COUNT; | |
CREATE FUNCTION SET_COUNT($strlist MEDIUMBLOB) RETURNS SMALLINT UNSIGNED | |
DETERMINISTIC NO SQL | |
RETURN 1+CHAR_LENGTH($strlist)-CHAR_LENGTH(REPLACE($strlist,',','')); | |
-- Table number_set | |
-- A column of integers counting from 0 to 255 | |
-- This is a handy tool to pivot a table (or mysql result) row of columns into a column of rows | |
-- The ENGINE=MEMORY engine may be used for a performance gain, but see note on the MEMORY engine listed below | |
DROP TABLE `number_set`; | |
CREATE TABLE `number_set` ( | |
`n` TINYINT(3) UNSIGNED NOT NULL PRIMARY KEY, | |
UNIQUE KEY `n` (`n`) USING BTREE | |
) ENGINE=INNODB DEFAULT CHARSET=BINARY MAX_ROWS=256 MIN_ROWS=256; | |
-- Note: If using MEMORY engine for the number_set table, the data in MEMORY tables is lost on server restart, | |
-- I recommend adding this INSERT query below to the mysql --init-file, if using MEMORY engine | |
-- https://dev.mysql.com/doc/refman/5.7/en/memory-storage-engine.html#memory-storage-engine-loading-data | |
-- Insert numbers 0-255 into the number_set table | |
TRUNCATE number_set; | |
INSERT INTO number_set (n) | |
SELECT STRAIGHT_JOIN n1.n|(n2.n<<2)|(n3.n<<4)|(n4.n<<6) AS n FROM | |
(SELECT 0 AS n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3) n1, | |
(SELECT 0 AS n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3) n2, | |
(SELECT 0 AS n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3) n3, | |
(SELECT 0 AS n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3) n4; | |
-- Function split_string_into_rows() | |
-- Only used as a work-around to pass @split_string_into_rows to the split_string_into_rows VIEW | |
-- Returns @split_string_into_rows if the argument is NULL | |
-- Sets @split_string_into_rows if the argument is not NULL | |
DROP FUNCTION split_string_into_rows; | |
CREATE FUNCTION split_string_into_rows($split_string_into_rows MEDIUMBLOB) RETURNS MEDIUMBLOB | |
DETERMINISTIC NO SQL | |
RETURN IF($split_string_into_rows IS NULL, IFNULL(@split_string_into_rows,''), '1'|@split_string_into_rows:=$split_string_into_rows); | |
-- View split_string_into_rows | |
-- Splits a comma-delimited string (aka csv aka comma-separated string) into rows | |
-- Result set contains the index (`i`) and element (`e`) | |
-- Resultset sorted by index, starting at zero | |
-- The comma-separated string is passed via @split_string_into_rows | |
-- Usage Examples: | |
-- Two queries: | |
-- SET @split_string_into_rows = 'foo,bar,foobar'; SELECT e FROM split_string_into_rows; | |
-- As a single query: | |
-- SELECT e FROM split_string_into_rows WHERE split_string_into_rows('foo,bar,foobar,barfoo'); | |
-- With a JOIN to another table: | |
-- SELECT u.name FROM users u JOIN split_string_into_rows s ON u.birth_month = s.e WHERE split_string_into_rows('March,April,May'); | |
-- _ or even better _ | |
-- SELECT STRAIGHT_JOIN u.name FROM split_string_into_rows s, users u WHERE u.birth_month = s.e AND split_string_into_rows('March,April,May,June'); | |
-- Field indexes are still used when doing a join against a string split! | |
-- This preforms much faster than FIND_IN_SET() because the indexes are preserved. | |
-- Limited to 256 results | |
CREATE OR REPLACE ALGORITHM = MERGE VIEW split_string_into_rows(i,e) AS | |
SELECT HIGH_PRIORITY SQL_SMALL_RESULT n1.n AS i, SET_EXTRACT(n1.n, split_string_into_rows(NULL)) AS e | |
FROM number_set n1 | |
WHERE 1&(n1.n < SET_COUNT(split_string_into_rows(NULL))); | |
-- Limited to 65535 results (slightly slower) | |
CREATE OR REPLACE VIEW split_string_into_rows(i,e) AS | |
SELECT STRAIGHT_JOIN n1.n|(n256.n<<8) AS i, SET_EXTRACT(n1.n|(n256.n<<8), split_string_into_rows(NULL)) AS e | |
FROM number_set n1, number_set n256 | |
WHERE 1&(n1.n|(n256.n<<8) < SET_COUNT(split_string_into_rows(NULL))); | |
-- Larger than 65535 results will get very slow, | |
-- but can be done with additional joins within the above view | |
-- and adjusting the INT and BLOB variable types to support larger sizes in the functions |
@mans1969 That should work fine, and none of this code uses DECIMAL, so it may be a mistake.
Let's break it down. Which steps fail below? (I should put this as a unit test)
SET @split_string_into_rows = 'foo, bar, foobar';
SELECT SET_COUNT(@split_string_into_rows); // Should be 3
SELECT SET_EXTRACT(1, @split_string_into_rows); // should be "bar"
SELECT split_string_into_rows(NULL); // Should be "foo, bar, foobar"
SELECT * FROM number_set LIMIT 1; // Should be 0
SELECT * FROM split_string_into_rows; // Should be three rows: "0 | foo", "1 | bar", "2 | foobar"
SELECT split_string_into_rows('foo, bar, foobar'); // Should be 1
SELECT @split_string_into_rows; // Should be "foo, bar, foobar"
SELECT * FROM split_string_into_rows WHERE split_string_into_rows('foo, bar, foobar'); // Should be three rows: "0 | foo", "1 | bar", "2 | foobar"
I have run the test
There are 2 test that give a error.
The error is : Encountered illegal value '' when converting to DECIMAL.
Maybe the value '' is a empty string and sql try to convert it ?
this are the 2 that give a error
- SELECT split_string_into_rows('foo, bar, foobar');
- SELECT * FROM split_string_into_rows WHERE split_string_into_rows('foo, bar, foobar');
when i import the function this is what is inside the database :
=====================
CREATE FUNCTION `split_string_into_rows`($split_string_into_rows MEDIUMBLOB) RETURNS mediumblob
NO SQL
DETERMINISTICRETURN IF($split_string_into_rows IS NULL, ***@***.***_string_into_rows,''), ***@***.***_string_into_rows:=$split_string_into_rows);
=====================
i hope i have give you the right information.
The editor i use is "Sequel Ace". The database is running on a Linux server.
Mans
Ik ben dyslectisch. De gemaakte taal- en stijlfouten komen
niet voort uit desinteresse of nonchalance. Ik kan het niet beter dan dit en
heb mijn uiterste best gedaan om het zo begrijpelijk mogelijk op te schrijven.
Het kan zijn dat, ondanks mijn inspanning, in deze e-mail en eventueel
bijbehorende bijlagen, zinnen voorkomen die voor u niet te begrijpen zijn. Laat
mij dat dan weten.
Op dinsdag 15 oktober 2024 om 17:10 schreef Duane Hutchins ***@***.***>:
… @duanehutchins commented on this gist.
---------------------------------------------------------------
***@***.***(https://github.com/mans1969) That should work fine, and none of this code uses DECIMAL, so it may be a mistake.
Let's break it down. Which steps fail below? (I should put this as a unit test)
SET
@split_string_into_rows
=
'
foo, bar, foobar
'
;
SELECT
***@***.***_string_into_rows);
//
Should be
3
SELECT
SET_EXTRACT(
1
, @split_string_into_rows);
//
should be
"
bar
"
SELECT
split_string_into_rows(
NULL
);
//
Should be
"
foo, bar, foobar
"
SELECT
*
FROM
number_set
LIMIT
1
;
//
Should be
0
SELECT
*
FROM
split_string_into_rows;
//
Should be three rows:
"
0 | foo
"
,
"
1 | bar
"
,
"
2 | foobar
"
SELECT
split_string_into_rows(
'
foo, bar, foobar
'
);
//
Should be
1
SELECT
@split_string_into_rows;
//
Should be
"
foo, bar, foobar
"
SELECT
*
FROM
split_string_into_rows
WHERE
split_string_into_rows(
'
foo, bar, foobar
'
);
//
Should be three rows:
"
0 | foo
"
,
"
1 | bar
"
,
"
2 | foobar
"
—
Reply to this email directly, [view it on GitHub](https://gist.github.com/duanehutchins/c6000b367b3032b0b495c46b3bc370c8#gistcomment-5235410) or [unsubscribe](https://github.com/notifications/unsubscribe-auth/AIEQBZTVWPIQCUB43FXCSC3Z3UV65BFKMF2HI4TJMJ2XIZLTSKBKK5TBNR2WLJDUOJ2WLJDOMFWWLO3UNBZGKYLEL5YGC4TUNFRWS4DBNZ2F6YLDORUXM2LUPGBKK5TBNR2WLJDHNFZXJJDOMFWWLK3UNBZGKYLEL52HS4DFVRZXKYTKMVRXIX3UPFYGLK2HNFZXIQ3PNVWWK3TUUZ2G64DJMNZZDAVEOR4XAZNEM5UXG5FFOZQWY5LFVA2DKOBQGU4DQN5HORZGSZ3HMVZKMY3SMVQXIZI).
You are receiving this email because you were mentioned.
Triage notifications on the go with GitHub Mobile for [iOS](https://apps.apple.com/app/apple-store/id1477376905) or [Android](https://play.google.com/store/apps/details?id=com.github.android).
i have done one extra test
SELECT * FROM split_string_into_rows WHERE split_string_into_rows('a, b, c');
this one give the error => Encountered illegal value '' when converting to DECIMAL
SELECT * FROM split_string_into_rows WHERE split_string_into_rows('34,24,67 , 1542');
this one give no error
maybe you can explain it now ... i hope .. lol
Mans
Ik ben dyslectisch. De gemaakte taal- en stijlfouten komen
niet voort uit desinteresse of nonchalance. Ik kan het niet beter dan dit en
heb mijn uiterste best gedaan om het zo begrijpelijk mogelijk op te schrijven.
Het kan zijn dat, ondanks mijn inspanning, in deze e-mail en eventueel
bijbehorende bijlagen, zinnen voorkomen die voor u niet te begrijpen zijn. Laat
mij dat dan weten.
Op dinsdag 22 oktober 2024 om 18:37 schreef ***@***.*** ***@***.***>:
… I have run the test
There are 2 test that give a error.
The error is : Encountered illegal value '' when converting to DECIMAL.
Maybe the value '' is a empty string and sql try to convert it ?
this are the 2 that give a error
- SELECT split_string_into_rows('foo, bar, foobar');
- SELECT * FROM split_string_into_rows WHERE split_string_into_rows('foo, bar, foobar');
when i import the function this is what is inside the database :
=====================
CREATE FUNCTION `split_string_into_rows`($split_string_into_rows MEDIUMBLOB) RETURNS mediumblob
NO SQL
DETERMINISTICRETURN IF($split_string_into_rows IS NULL, ***@***.***_string_into_rows,''), ***@***.***_string_into_rows:=$split_string_into_rows);
=====================
i hope i have give you the right information.
The editor i use is "Sequel Ace". The database is running on a Linux server.
Mans
Ik ben dyslectisch. De gemaakte taal- en stijlfouten komen
niet voort uit desinteresse of nonchalance. Ik kan het niet beter dan dit en
heb mijn uiterste best gedaan om het zo begrijpelijk mogelijk op te schrijven.
Het kan zijn dat, ondanks mijn inspanning, in deze e-mail en eventueel
bijbehorende bijlagen, zinnen voorkomen die voor u niet te begrijpen zijn. Laat
mij dat dan weten.
Op dinsdag 15 oktober 2024 om 17:10 schreef Duane Hutchins ***@***.***>:
> @duanehutchins commented on this gist.
> ---------------------------------------------------------------
>
> ***@***.***(https://github.com/mans1969) That should work fine, and none of this code uses DECIMAL, so it may be a mistake.
>
> Let's break it down. Which steps fail below? (I should put this as a unit test)
>
> SET
>
> @split_string_into_rows
>
> =
>
> '
>
> foo, bar, foobar
>
> '
>
> ;
>
> SELECT
>
> ***@***.***_string_into_rows);
>
> //
>
> Should be
>
> 3
>
> SELECT
>
> SET_EXTRACT(
>
> 1
>
> , @split_string_into_rows);
>
> //
>
> should be
>
> "
>
> bar
>
> "
>
> SELECT
>
> split_string_into_rows(
>
> NULL
>
> );
>
> //
>
> Should be
>
> "
>
> foo, bar, foobar
>
> "
>
> SELECT
>
> *
>
> FROM
>
> number_set
>
> LIMIT
>
> 1
>
> ;
>
> //
>
> Should be
>
> 0
>
> SELECT
>
> *
>
> FROM
>
> split_string_into_rows;
>
> //
>
> Should be three rows:
>
> "
>
> 0 | foo
>
> "
>
> ,
>
> "
>
> 1 | bar
>
> "
>
> ,
>
> "
>
> 2 | foobar
>
> "
>
> SELECT
>
> split_string_into_rows(
>
> '
>
> foo, bar, foobar
>
> '
>
> );
>
> //
>
> Should be
>
> 1
>
> SELECT
>
> @split_string_into_rows;
>
> //
>
> Should be
>
> "
>
> foo, bar, foobar
>
> "
>
> SELECT
>
> *
>
> FROM
>
> split_string_into_rows
>
> WHERE
>
> split_string_into_rows(
>
> '
>
> foo, bar, foobar
>
> '
>
> );
>
> //
>
> Should be three rows:
>
> "
>
> 0 | foo
>
> "
>
> ,
>
> "
>
> 1 | bar
>
> "
>
> ,
>
> "
>
> 2 | foobar
>
> "
>
> —
> Reply to this email directly, [view it on GitHub](https://gist.github.com/duanehutchins/c6000b367b3032b0b495c46b3bc370c8#gistcomment-5235410) or [unsubscribe](https://github.com/notifications/unsubscribe-auth/AIEQBZTVWPIQCUB43FXCSC3Z3UV65BFKMF2HI4TJMJ2XIZLTSKBKK5TBNR2WLJDUOJ2WLJDOMFWWLO3UNBZGKYLEL5YGC4TUNFRWS4DBNZ2F6YLDORUXM2LUPGBKK5TBNR2WLJDHNFZXJJDOMFWWLK3UNBZGKYLEL52HS4DFVRZXKYTKMVRXIX3UPFYGLK2HNFZXIQ3PNVWWK3TUUZ2G64DJMNZZDAVEOR4XAZNEM5UXG5FFOZQWY5LFVA2DKOBQGU4DQN5HORZGSZ3HMVZKMY3SMVQXIZI).
> You are receiving this email because you were mentioned.
>
> Triage notifications on the go with GitHub Mobile for [iOS](https://apps.apple.com/app/apple-store/id1477376905) or [Android](https://play.google.com/store/apps/details?id=com.github.android).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
creat function but ....
SELECT * FROM split_string_into_rows WHERE split_string_into_rows('34,24,67 , 1542');
works
SELECT * FROM split_string_into_rows WHERE split_string_into_rows('foo, bar, foobar');
does not work.
I get the following error
Encountered illegal value '' when converting to DECIMAL
any idee ?