Created
September 14, 2012 15:41
-
-
Save tototoshi/3722742 to your computer and use it in GitHub Desktop.
join(1) in scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AA 100円 | |
BB 300円 | |
CC 200円 | |
DD 400円 | |
EE 500円 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
labelA AA Category1 | |
labelB BB Category2 | |
labelC CC Category3 | |
labelD DD Category4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File | |
import scala.io.Source | |
import scala.sys.process._ | |
def join(file1AndColumn: (File, Int), | |
file2AndColumn: (File, Int), | |
separator: String = "\t"): Iterator[List[String]] = { | |
val (file1, col1) = file1AndColumn | |
val (file2, col2) = file2AndColumn | |
join( | |
Source.fromFile(file1).getLines.map(_.split(separator).toList) -> col1, | |
Source.fromFile(file2).getLines.map(_.split(separator).toList) -> col2 | |
) | |
} | |
def join( | |
line1AndKeyColumn: (Iterator[List[String]], Int), | |
line2AndKeyColumn: (Iterator[List[String]], Int) | |
): Iterator[List[String]] = new Iterator[List[String]] { | |
var it1: Iterator[List[String]] = line1AndKeyColumn._1 | |
var it2: Iterator[List[String]] = line2AndKeyColumn._1 | |
var _nextLine1: List[String] = null | |
val keyColumn1 = line1AndKeyColumn._2 | |
val keyColumn2 = line2AndKeyColumn._2 | |
def hasNext: Boolean = { | |
(! it1.isEmpty) && { | |
_nextLine1 = it1.next | |
it2 = it2.dropWhile(x => x(keyColumn2 - 1) != _nextLine1(keyColumn1 - 1)) | |
it2.hasNext | |
} | |
} | |
def next: List[String] = { | |
val _line2Next = it2.next | |
val result = if (_nextLine1 != null) { | |
_nextLine1 ::: _line2Next.filterNot(_ == _line2Next(keyColumn2 - 1)) | |
} else { | |
it1.next ::: _line2Next.filterNot(_ == _line2Next(keyColumn2 - 1)) | |
} | |
_nextLine1 = null | |
result | |
} | |
} | |
for { | |
line <- join(new File("AA.txt") -> 1, new File("BB.txt") -> 2, separator = " ") | |
} { | |
println(line.mkString("\t")) | |
} | |
/* | |
$ scala join.scala | |
AA 100円 labelA Category1 | |
BB 300円 labelB Category2 | |
CC 200円 labelC Category3 | |
DD 400円 labelD Category4 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment