Created
December 8, 2009 22:46
-
-
Save lsegal/252070 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class String | |
| # Splits text into tokens the way a shell would, handling quoted | |
| # text as a single token. Use '\"' and "\'" to escape quotes and | |
| # '\\' to escape a backslash. | |
| # | |
| # @return [Array] an array representing the tokens | |
| def shell_split | |
| out = [""] | |
| state = :none | |
| escape_next = false | |
| quote = "" | |
| strip.split(//).each do |char| | |
| case state | |
| when :none, :space | |
| case char | |
| when /\s/ | |
| out << "" unless state == :space | |
| state = :space | |
| escape_next = false | |
| when "\\" | |
| if escape_next | |
| out.last << char | |
| escape_next = false | |
| else | |
| escape_next = true | |
| end | |
| when '"', "'" | |
| if escape_next | |
| out.last << char | |
| escape_next = false | |
| else | |
| state = char | |
| quote = "" | |
| end | |
| else | |
| state = :none | |
| out.last << char | |
| escape_next = false | |
| end | |
| when '"', "'" | |
| case char | |
| when '"', "'" | |
| if escape_next | |
| quote << char | |
| escape_next = false | |
| elsif char == state | |
| out.last << quote | |
| state = :none | |
| else | |
| quote << char | |
| end | |
| when '\\' | |
| if escape_next | |
| quote << char | |
| escape_next = false | |
| else | |
| escape_next = true | |
| end | |
| else | |
| quote << char | |
| escape_next = false | |
| end | |
| end | |
| end | |
| out | |
| end | |
| end | |
| describe String do | |
| describe '#shell_split' do | |
| it "should split simple non-quoted text" do | |
| "a b c".shell_split.should == %w(a b c) | |
| end | |
| it "should split double quoted text into single token" do | |
| 'a "b c d" e'.shell_split.should == ["a", "b c d", "e"] | |
| end | |
| it "should split single quoted text into single token" do | |
| "a 'b c d' e".shell_split.should == ["a", "b c d", "e"] | |
| end | |
| it "should handle escaped quotations in quotes" do | |
| "'a \\' b'".shell_split.should == ["a ' b"] | |
| end | |
| it "should handle escaped quotations outside quotes" do | |
| "\\'a 'b'".shell_split.should == %w('a b) | |
| end | |
| it "should handle escaped backslash" do | |
| "\\\\'a b c'".shell_split.should == ['\a b c'] | |
| end | |
| it "should handle complex input" do | |
| text = "hello \\\"world \"1 2\\\" 3\" a 'b \"\\\\\\'' c" | |
| text.shell_split.should == ["hello", "\"world", "1 2\" 3", "a", "b \"\\'", "c"] | |
| end | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment