Skip to content

Instantly share code, notes, and snippets.

@tototoshi
Created September 14, 2012 15:41
Show Gist options
  • Save tototoshi/3722742 to your computer and use it in GitHub Desktop.
Save tototoshi/3722742 to your computer and use it in GitHub Desktop.
join(1) in scala
AA 100円
BB 300円
CC 200円
DD 400円
EE 500円
labelA AA Category1
labelB BB Category2
labelC CC Category3
labelD DD Category4
import java.io.File
import scala.io.Source
import scala.sys.process._
def join(file1AndColumn: (File, Int),
file2AndColumn: (File, Int),
separator: String = "\t"): Iterator[List[String]] = {
val (file1, col1) = file1AndColumn
val (file2, col2) = file2AndColumn
join(
Source.fromFile(file1).getLines.map(_.split(separator).toList) -> col1,
Source.fromFile(file2).getLines.map(_.split(separator).toList) -> col2
)
}
def join(
line1AndKeyColumn: (Iterator[List[String]], Int),
line2AndKeyColumn: (Iterator[List[String]], Int)
): Iterator[List[String]] = new Iterator[List[String]] {
var it1: Iterator[List[String]] = line1AndKeyColumn._1
var it2: Iterator[List[String]] = line2AndKeyColumn._1
var _nextLine1: List[String] = null
val keyColumn1 = line1AndKeyColumn._2
val keyColumn2 = line2AndKeyColumn._2
def hasNext: Boolean = {
(! it1.isEmpty) && {
_nextLine1 = it1.next
it2 = it2.dropWhile(x => x(keyColumn2 - 1) != _nextLine1(keyColumn1 - 1))
it2.hasNext
}
}
def next: List[String] = {
val _line2Next = it2.next
val result = if (_nextLine1 != null) {
_nextLine1 ::: _line2Next.filterNot(_ == _line2Next(keyColumn2 - 1))
} else {
it1.next ::: _line2Next.filterNot(_ == _line2Next(keyColumn2 - 1))
}
_nextLine1 = null
result
}
}
for {
line <- join(new File("AA.txt") -> 1, new File("BB.txt") -> 2, separator = " ")
} {
println(line.mkString("\t"))
}
/*
$ scala join.scala
AA 100円 labelA Category1
BB 300円 labelB Category2
CC 200円 labelC Category3
DD 400円 labelD Category4
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment