Created
May 14, 2010 14:05
-
-
Save spaceCamel/401182 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collection.mutable.{ListBuffer, LinkedList} | |
import io.Source | |
import scala.collection.mutable.HashMap | |
/** | |
* Date: Apr 16, 2010 | |
* Time: 10:44:22 PM | |
*/ | |
object Parser | |
{ | |
def main(args: Array[String]) { | |
val attributes = List("company", "url", "country", "rate", "field") | |
//val all_fields = List("Software", "Telecommunications/Networking", "Media/Entertainment", "Internet", "Biotech/Pharmaceutical/ Medical Equipment", "Other", "Greentech", "internet", "Semiconductor, Components and Electronics", "Computers/Peripherals") | |
//val all_countries = List("Turkey", "United Kingdom", "Belgium", "Germany", "Netherlands", "Republic of Ireland", "Poland", "France", "Norway", "Israel", "Northern Ireland", "Sweden", "Russia", "Romania", "Finland", "Bulgaria", "Hungary", "Greece", "Serbia", "Slovakia", "Denmark", "Austria", "Czech Republic", "Portugal", "Croatia", "Estonia") | |
import scala.collection.immutable.Set | |
val my_fields = Set("Software", "Internet") | |
val my_countries = Set("United Kingdom") | |
val all_attributes = List("position") ::: attributes | |
val heading = "|_. %s |".format(all_attributes.map(_.toUpperCase).mkString(" |_. ")) | |
// val all_companies = List[Int]() | |
val dir = new java.io.File("/Users/xan/IdeaProjects/ScalaTest/data") | |
.listFiles().filter(_.getName().matches("Fast500_\\d+.txt")) | |
val company_entry = """(\d+)\s(.*)""".r | |
val all_companies = ListBuffer[HashMap[String, String]]() | |
for{file <- dir} | |
{ | |
val lines = Source.fromFile(file).getLines().buffered | |
val companies = ListBuffer[HashMap[String, String]]() | |
while (lines.head.matches(company_entry.pattern.toString)) { | |
val company = HashMap[String, String]() | |
val company_entry(position, name) = lines.next | |
company(all_attributes(0)) = position | |
company(all_attributes(1)) = name | |
companies.append(company) | |
} | |
for (attribute <- attributes.tail; company <- companies) { | |
company(attribute) = lines.next() | |
} | |
// println(companies.size) | |
all_companies.appendAll(companies) | |
} | |
all_companies.foreach( | |
(m) => print("%s\n".format( | |
m.map( | |
(i) => "%s\n".format(i.toString))))) | |
println("SIZE: %s".format(all_companies.size)) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment