Last active
August 29, 2015 13:55
-
-
Save xionon/8739402 to your computer and use it in GitHub Desktop.
This is a runable tutorial on 3 regex tips that will make your code more readable: 1) named capture groups, 2) whitespace+comments, 3) an alternate syntax
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'minitest' | |
require 'minitest/spec' | |
require 'minitest/autorun' | |
describe 'Regexes are super great' do | |
before do | |
@month = "12" | |
@day = "15" | |
@year = "2014" | |
@date = "#{@month}/#{@day}/#{@year}" | |
end | |
# You can use () to say, "there are multiple things in this string that I care about, | |
# please capture them all and let me know what they were | |
it "defines capture groups using the () syntax, so you can refer to them like an array" do | |
month_day_year_matcher = /(\d\d)\/(\d\d)\/(\d\d\d\d)/ | |
matchdata = @date.match( month_day_year_matcher ) | |
# => #<MatchData "12/15/2014" 1:"12" 2:"15" 3:"2014"> | |
matchdata[1].must_equal @month | |
matchdata[2].must_equal @day | |
matchdata[3].must_equal @year | |
end | |
# Note that they are assigned to the magic $1..$3 variables, but also the returned | |
# MatchData instance is array-like. It would be useful if these had names, | |
# though, because there might be a lot of them! | |
it "stores the captured data in really terrible magical global variables" do | |
month_day_year_matcher = /(\d\d)\/(\d\d)\/(\d\d\d\d)/ | |
matchdata = @date.match( month_day_year_matcher ) | |
$1.must_equal @month | |
$2.must_equal @day | |
$3.must_equal @year | |
end | |
# Well, turns out you can kind of assign variables in regex. It’s called a | |
# "named capture group." The returned MatchData object will be hash-like. | |
# The ruby syntax is like this: | |
it "names capture groups using ?<>, so you can refer to matchdata like a hash" do | |
month_day_year_matcher = /(?<month>\d\d)\/(?<day>\d\d)\/(?<year>\d\d\d\d)/ | |
matchdata = @date.match( month_day_year_matcher ) | |
# Wow, such readable, very good reasoning | |
matchdata[:year].must_equal @year | |
matchdata[:month].must_equal @month | |
matchdata[:day].must_equal @day | |
end | |
# Now it’s noisy, though, and hard to read. If you add line breaks, it’ll fubar | |
# the regex, because it’s going to try to match those line breaks... | |
# | |
# BUT WHAT IF IT DIDN’T? You guessed it, you can make your regex ignore non-escaped | |
# whitespace using the “x” flag, which means you can break your regex into | |
# multiple lines, like so: | |
it "can ignore newlines so you can make your regex look good" do | |
month_day_year_matcher = / | |
(?<month>\d\d) | |
\/(?<day>\d\d) | |
\/(?<year>\d\d\d\d) # if we're already ignoring whitespace, we'll ignore comments, too | |
/x # <— see that tiny little x there??? that's the magic bit | |
matchdata = @date.match(month_day_year_matcher) | |
matchdata[:year].must_equal @year | |
matchdata[:month].must_equal @month | |
matchdata[:day].must_equal @day | |
end | |
# Last, you can use alternate %r{} syntax to define a regex, which makes it a | |
# little clearer what’s going on. | |
it "can use alternate syntax that lets you skip escaping a forward slash" do | |
month_day_year_matcher = %r{ | |
(?<month>\d\d) | |
/(?<day>\d\d) # look ma, no escapes! | |
/(?<year>\d\d\d\d) | |
}x | |
matchdata = @date.match month_day_year_matcher | |
matchdata[:year].must_equal @year | |
matchdata[:month].must_equal @month | |
matchdata[:day].must_equal @day | |
end | |
# For more on regex in general: | |
# http://www.regular-expressions.info/tutorial.html | |
# For more on named capture groups: | |
# http://www.regular-expressions.info/named.html | |
# For more on ruby's magic %r{} syntax, and the rest of ruby's specific flags: | |
# http://www.ruby-doc.org/core-1.9.3/Regexp.html | |
end | |
# Run options: --seed 61466 | |
# # Running tests: | |
# ..... | |
# Finished tests in 0.000703s, 7112.3755 tests/s, 21337.1266 assertions/s. | |
# 5 tests, 15 assertions, 0 failures, 0 errors, 0 skips | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment