Skip to content

Instantly share code, notes, and snippets.

@kmizumar
Created July 1, 2016 18:10
Show Gist options
  • Save kmizumar/0a5b3030e3a79fa4729e709c6eee2d69 to your computer and use it in GitHub Desktop.
Save kmizumar/0a5b3030e3a79fa4729e709c6eee2d69 to your computer and use it in GitHub Desktop.
import org.apache.spark.sql.types.DataTypes
def toInt(s: String): Option[Int] = {
try {
Some(s.toInt)
}
catch {
case e: Exception => None
}
}
val df = sqlContext.read.
format("com.databricks.spark.csv").
option("header", "true").
option("inferSchema", "true").
load("3G.csv").
withColumnRenamed("管理アドレス種別", "pf").
withColumnRenamed("IPアドレス", "ip").
withColumnRenamed("ネットマスク", "mask").
select(regexp_extract($"pf", "^.*/([^/]+)$", 1).alias("pf"),
regexp_extract($"ip", "^(\\d+)\\.\\d+\\.\\d+\\.\\d+$", 1).alias("ip0"),
regexp_extract($"ip", "^\\d+\\.(\\d+)\\.\\d+\\.\\d+$", 1).alias("ip1"),
regexp_extract($"ip", "^\\d+\\.\\d+\\.(\\d+)\\.\\d+$", 1).alias("ip2"),
regexp_extract($"ip", "^\\d+\\.\\d+\\.\\d+\\.(\\d+)$", 1).alias("ip3"),
$"mask").
withColumn("ip0", $"ip0".cast(DataTypes.IntegerType)).
withColumn("ip1", $"ip1".cast(DataTypes.IntegerType)).
withColumn("ip2", $"ip2".cast(DataTypes.IntegerType)).
withColumn("ip3", $"ip3".cast(DataTypes.IntegerType)).
withColumn("mask", $"mask".cast(DataTypes.IntegerType))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment