Created
June 25, 2010 22:07
-
-
Save oluies/453519 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"99 bottles of beer" matches "(\\d+) bottles of beer" | |
"cab" matches "(?<!a)b" | |
"b" matches "(?<!a)b" | |
"cab" matches "(?<!a)b" | |
"bed" matches "(?<!a)b" | |
"debt" matches "(?<!a)b" | |
"cab" matches "(?<=a)b" | |
"bed" matches "(?<=a)b" | |
"debt" matches "(?<=a)b" | |
val e = "(?<!a)b".r | |
val expr="""(?<=Ca*)""".r | |
val expr = List("(?<!a)b".r,"(?<=a)b".r,"""(?<=a+)b""".r,"""(?<=a*)b""".r) | |
val text = "cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa" | |
def printIt(e: scala.util.matching.Regex, m: scala.util.matching.Regex.Match):Unit = { | |
println(e + " matched " + m.matched + " m.start "+ m.start + " m.end " + m.end) | |
println(m.source) | |
println((1 to m.start-1).map(s => ' ').mkString + "^") | |
println((1 to m.end-1).map(s => ' ').mkString + "^") | |
} | |
expr.foreach{ e => (e findAllIn text ).matchData.foreach {m => printIt(e,m) } } | |
(?<!a)b cab bed debt ab b matched bm.start 4 m.end 5 | |
(?<!a)b cab bed debt ab b matched bm.start 10 m.end 11 | |
(?<!a)b cab bed debt ab b matched bm.start 16 m.end 17 | |
(?<=a)b cab bed debt ab b matched bm.start 2 m.end 3 | |
(?<=a)b cab bed debt ab b matched bm.start 14 m.end 15 | |
(?<=a+)b cab bed debt ab b matched bm.start 2 m.end 3 | |
(?<=a+)b cab bed debt ab b matched bm.start 14 m.end 15 | |
(?<!a)b matched bm.start 4 m.end 5 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<!a)b matched bm.start 10 m.end 11 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<!a)b matched bm.start 13 m.end 14 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<!a)b matched bm.start 17 m.end 18 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<!a)b matched bm.start 26 m.end 27 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a)b matched bm.start 2 m.end 3 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a)b matched bm.start 24 m.end 25 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a+)b matched bm.start 2 m.end 3 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a+)b matched bm.start 24 m.end 25 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a*)b matched bm.start 2 m.end 3 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a*)b matched bm.start 4 m.end 5 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a*)b matched bm.start 10 m.end 11 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a*)b matched bm.start 13 m.end 14 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a*)b matched bm.start 17 m.end 18 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a*)b matched bm.start 24 m.end 25 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ | |
^ | |
(?<=a*)b matched bm.start 26 m.end 27 | |
cab bed debt bad baaad ab b CaaaaaaaaaaaaaaaBaaaa | |
^ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val EmailParser = """([\w\d\-\_]+)(\+\d+)?@([\w\d\-\.]+)""".r | |
val s = "[email protected]" | |
val EmailParser(name, num, domain) = s | |
printf("Name: %s, Domain: %s\n", name, domain) | |
val pattern = """(\d{3})-(\d{3}-\d{4})""".r | |
val input = "212-555-6666 906-932-1111 415-222-3333 425-888-9999" | |
val myMatchData = (pattern findAllIn input).matchData | |
myMatchData.foreach {m => println("Prefix : "+ m.group(1) + " Suffix " + m.group(2)) } | |
val pattern = """(?<company>\d{3})-(\d{3}-\d{4})""".r | |
val input = "212-555-6666 906-932-1111 415-222-3333 425-888-9999" | |
val myMatchData = (pattern findAllIn input).matchData | |
myMatchData.foreach {m => println("Prefix : "+ m.group(1) + " Suffix " + m.group(2)) } | |
for(m <- pattern.findAllIn(input ).matchData; e <- m.subgroups) println(e) | |
for(m <- pattern.findAllIn(input ).matchData; e <- m.groupNames) println(e) | |
("""(\d{3})-(\d{3}-\d{4})""".r findAllIn "212-555-6666 906-932-1111 415-222-3333 425-888-9999").matchData.foreach {m => println("Prefix : "+ m.group(1) + " Suffix " + m.group(2)) } | |
scala> (pattern findAllIn input).matchData foreach {m => println("Prefix : "+ m.group(1) + " Suffix " + m.group(2)) } | |
Prefix : 212 Suffix 555-6666 | |
Prefix : 906 Suffix 932-1111 | |
Prefix : 415 Suffix 222-3333 | |
Prefix : 425 Suffix 888-9999 | |
scala> val date = "11/01/2010" | |
date: java.lang.String = 11/01/2010 | |
scala> val Date = """(\d\d)/(\d\d)/(\d\d\d\d)""".r | |
Date: scala.util.matching.Regex = (\d\d)/(\d\d)/(\d\d\d\d) | |
scala> val Date(day, monthYear, month, year) = date | |
day: String = 11 | |
monthYear: String = 01/2010 | |
month: String = 01 | |
year: String = 2010 | |
val pattern = """(?P=prefix\d{3})-(\d{3}-\d{4})""".r | |
Named Capturing Groups | |
http://www.regular-expressions.info/named.html | |
http://www.scala-lang.org/docu/files/api/scala/util/matching/Regex$object.MatchData.html#groupNames | |
groupNames | |
"(?<login>\w+) (?<id>\d+)" | |
pattern findAllIn input foreach{println _} | |
(pattern findAllIn input).matchData foreach {m => println(m.matched mkString ",") } | |
(pattern findAllIn input).matchData foreach {m => println(m.group(1)) } | |
(pattern findAllIn input).matchData foreach {m => println(m.start, m.end, m.before, m.after, m.source) } | |
val Some(gg) = pattern findFirstMatchIn input | |
val pattern(phoneNumbers) = pattern | |
val Decimal = """(-)?(\d+)(\.\d*)?""".r | |
val input = "for -1.0 to 99 by 3" | |
for( s <- Decimal findAllIn input) println(s) | |
for( Decimal(s,i,d) <- Decimal findAllIn input) | |
println("sign: " + s + ", integer: " + i + ", decimal: " +d ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment