Last active
June 29, 2020 23:07
-
-
Save RodolVelasco/21b96f1365335259fe1814c28ff40738 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.pd.scala | |
import org.apache.spark.sql.SparkSession | |
import org.apache.spark.sql.functions._ | |
import org.apache.spark.sql.expressions.Window | |
import java.util.Calendar | |
object App { | |
def main(args: Array[String]) { | |
val spark = SparkSession.builder.master("local[*]").appName("Prueba Spark Luis Velasco").getOrCreate() | |
//peliculas | |
val peliculas = spark.read | |
.option("header", "true") | |
.option("delimiter", "\t") | |
.csv("title.basics.tsv") | |
// 1. Cuantas películas del género acción salieron en el año 1987 | |
val acc1987 = peliculas.filter("startYear == 1987 and genres like '%Action%'").count() | |
println(s"Son $acc1987 peliculas de accion en el anio 1987") | |
// 2. Año con más películas producidas | |
val peliculasPorAnio = peliculas | |
.groupBy("startYear") | |
.agg(count("tconst").alias("count_movies")) | |
peliculasPorAnio.sort(desc("count_movies")).filter("startYear not like '%\\N%'").show(1) | |
// 3. Top 5 de DIRECTORES con mayor número de apariciones en películas de cualquier género desde el año 1987 | |
/*val directores = spark.read | |
.option("header", "true") | |
.option("delimiter", "\t") | |
.csv("title.crew.tsv") | |
val apariciones = directores | |
.withColumn("directors", explode(split(column("directors"), ","))) | |
.withColumnRenamed("tconst", "llave") | |
apariciones.printSchema() | |
val top_actors = peliculas | |
.join(apariciones, "tconst") | |
.filter("startYear >= 1987") | |
.groupBy("llave", "directors") | |
.agg(count("tconst").alias("count_movies")) | |
.sort(desc("count_movies")) | |
top_actors.show(5)*/ | |
// 4. Cantidad de películas producidas por año en los últimos 10 años | |
val cal = Calendar.getInstance() | |
val currentYear = cal.get(Calendar.YEAR) | |
peliculasPorAnio | |
.sort(desc("startYear")) | |
.filter(s"startYear <= $currentYear") | |
.show(10) | |
// 5) El nombre de la película con mayor duración por década de las ultimas 4 décadas | |
val currentDecade = currentYear - currentYear % 10 | |
peliculas | |
.withColumn("decade", expr("Int(startYear) - (Int(startYear) % 10)")) | |
.withColumn("runtimeMinutes", expr("Int(runtimeMinutes)")) | |
.withColumn("longest", max("runtimeMinutes").over(Window.partitionBy("decade"))) | |
.filter(s"decade <= $currentDecade and longest == runtimeMinutes") | |
.sort(desc("decade")) | |
.show(4) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PARA EL ENVIRONMENT | |
https://sparktutorials.github.io/2015/04/02/setting-up-a-spark-project-with-maven.html | |
MaxPrice class | |
http://knowdimension.com/en/data/create-a-spark-application-with-scala-using-maven-on-intellij/ | |
https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.11/2.2.1 | |
https://github.com/themarcoszone/examplesCode/tree/master/exampleSparkScala/input | |
https://www.imdb.com/interfaces/ | |
https://hortonworks.com/tutorial/setting-up-a-spark-development-environment-with-scala/ | |
libro | |
https://jaceklaskowski.gitbooks.io/mastering-apache-spark/ | |
https://spark.apache.org/docs/latest/sql-programming-guide.html | |
https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.sql.Dataset | |
SQL API SPARK | |
https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | |
<modelVersion>4.0.0</modelVersion> | |
<groupId>com.pd.scala</groupId> | |
<artifactId>Prueba1_Luis_Velasco</artifactId> | |
<version>1.0-SNAPSHOT</version> | |
<inceptionYear>2008</inceptionYear> | |
<properties> | |
<scala.version>2.11.6</scala.version> | |
</properties> | |
<repositories> | |
<repository> | |
<id>scala-tools.org</id> | |
<name>Scala-Tools Maven2 Repository</name> | |
<url>http://scala-tools.org/repo-releases</url> | |
</repository> | |
</repositories> | |
<pluginRepositories> | |
<!--<pluginRepository> | |
<id>scala-tools.org</id> | |
<name>Scala-Tools Maven2 Repository</name> | |
<url>http://scala-tools.org/repo-releases</url> | |
</pluginRepository>--> | |
</pluginRepositories> | |
<dependencies> | |
<dependency> | |
<!--<groupId>org.scala-lang</groupId> | |
<artifactId>scala-library</artifactId> | |
<version>${scala.version}</version>--> | |
<groupId>org.scala-tools</groupId> | |
<artifactId>maven-scala-plugin</artifactId> | |
<version>${scala.version}</version> | |
</dependency> | |
<dependency> | |
<groupId>junit</groupId> | |
<artifactId>junit</artifactId> | |
<version>4.4</version> | |
<scope>test</scope> | |
</dependency> | |
<dependency> | |
<groupId>org.specs</groupId> | |
<artifactId>specs</artifactId> | |
<version>1.2.5</version> | |
<scope>test</scope> | |
</dependency> | |
<dependency> | |
<groupId>org.apache.spark</groupId> | |
<artifactId>spark-core_2.11</artifactId> | |
<version>2.2.1</version> | |
</dependency> | |
<dependency> | |
<groupId>org.apache.spark</groupId> | |
<artifactId>spark-sql_2.11</artifactId> | |
<version>2.2.1</version> | |
</dependency> | |
</dependencies> | |
<build> | |
<sourceDirectory>src/main/scala</sourceDirectory> | |
<plugins> | |
<plugin> | |
<groupId>org.scala-tools</groupId> | |
<artifactId>maven-scala-plugin</artifactId> | |
<executions> | |
<execution> | |
<goals> | |
<goal>compile</goal> | |
<goal>testCompile</goal> | |
</goals> | |
</execution> | |
</executions> | |
<configuration> | |
<scalaVersion>${scala.version}</scalaVersion> | |
<args> | |
<arg>-target:jvm-1.5</arg> | |
</args> | |
</configuration> | |
</plugin> | |
<plugin> | |
<groupId>org.apache.maven.plugins</groupId> | |
<artifactId>maven-eclipse-plugin</artifactId> | |
<configuration> | |
<downloadSources>true</downloadSources> | |
<buildcommands> | |
<buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand> | |
</buildcommands> | |
<additionalProjectnatures> | |
<projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature> | |
</additionalProjectnatures> | |
<classpathContainers> | |
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer> | |
<classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer> | |
</classpathContainers> | |
</configuration> | |
</plugin> | |
</plugins> | |
</build> | |
<reporting> | |
<plugins> | |
<plugin> | |
<groupId>org.scala-tools</groupId> | |
<artifactId>maven-scala-plugin</artifactId> | |
<configuration> | |
<scalaVersion>${scala.version}</scalaVersion> | |
</configuration> | |
</plugin> | |
</plugins> | |
</reporting> | |
</project> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.pd.scala | |
import org.apache.spark.sql.SparkSession | |
import org.apache.spark.sql.functions._ | |
import org.apache.spark.sql.expressions.Window | |
import java.util.Calendar | |
object sparkExample { | |
def main(args: Array[String]){ | |
val spark = SparkSession.builder | |
.master("local[*]") | |
.appName("Prueba Luis Velasco") | |
.getOrCreate() | |
//RDD para movies | |
val movies = spark.read | |
.option("header", "true") | |
.option("delimiter", "\t") | |
.csv("title.basics.tsv") | |
//RDD para actores | |
val principals = spark.read | |
.option("header", "true") | |
.option("delimiter", "\t") | |
.csv("name.basics.tsv") | |
// Explodes knownForTitles column and filters persons who are actress or actors only | |
val actors = principals | |
.withColumn("knownForTitles", explode(split(column("knownForTitles"), ","))) | |
// .filter("primaryProfession like 'actor' or primaryProfession like 'actress'") | |
.withColumnRenamed("knownForTitles", "tconst") | |
actors.printSchema() | |
// "1) How manny action movies are from 1987? | |
val action_1987 = movies.filter("startYear == 1987 and genres like '%Action%'").count() | |
println(s"There are $action_1987 Action movies from 1987") | |
val movies_per_year = movies | |
.groupBy("startYear") | |
.agg(count("tconst").alias("count_movies")) | |
// "2) What is the year with more movies produced?" | |
//movies_per_year.sort(desc("count_movies")).filter("startYear != '\\N'").show(10) | |
movies_per_year.sort(desc("count_movies")).filter("startYear not like '%\\N%'").show(10) | |
System.exit(1) | |
// "3) Top 5 of actors that appear on movies from any gender since 1987" | |
val top_actors = movies | |
.join(actors, "tconst") | |
.filter("startYear >= 1987") | |
.groupBy("nconst", "primaryName") | |
.agg(count("tconst").alias("n_movies")) | |
.sort(desc("n_movies")) | |
top_actors.show(5) | |
// 4) How manny movies had been produced by year from the last 10 years | |
val cal = Calendar.getInstance() | |
val currentYear = cal.get(Calendar.YEAR) | |
movies_per_year | |
.sort(desc("startYear")) | |
.filter(s"startYear <= $currentYear") | |
.show(10) | |
// 5) The name of the longest movie per decade from last 4 decades | |
// Get current decade | |
val currentDecade = currentYear - currentYear % 10 | |
// Calculations | |
movies | |
.withColumn("decade", expr("Int(startYear) - (Int(startYear) % 10)")) | |
.withColumn("runtimeMinutes", expr("Int(runtimeMinutes)")) | |
.withColumn("longest", max("runtimeMinutes").over(Window.partitionBy("decade"))) | |
.filter(s"decade <= $currentDecade and longest == runtimeMinutes") | |
.sort(desc("decade")) | |
.show(4) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val spark = SparkSession.builder().getOrCreate() | |
val df = spark.read.option("header","true").option("inferSchema","true").csv("nombrearchivo") | |
df.describe().show() | |
import spark.implicits. | |
df.filter($"Close" > 480 && $"High" < 480).show() | |
df.filter("Close > 480").show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ASP### ESTO NO ES DE SCALA.
ActionResult
Lo que devuelve un controller. Es un objeto que sirve para devolver JsonResult, FileResult, ViewResult, PartilViewResult, RedirectResult, RedirectToRouteResult, HttpNotFoundResult
return Content("Rodolfo", "application/json");
QueryString
Para pasar parámetros por URL
App_Start
RouteConfig.cs
return new HttpStatusCodeResult(404);
Codigo de un VIEW
Listado de valores
**Instalar el nuget
Click derecho a Proyecto -> Administrar paquetes nuget -> Instalar restsharp
Inyección por dependencia
Click derecho a Proyecto -> Dependencias de paquetes nuget -> ninject.mvc5 -> Version 3.2.1 -> Instalar
Verificar que instaló ninjawebcommons en App_Start
Creo carpeta Inject
Creo interfaz IPrueba
Creo clases Prueba
App_Start -> Al final crear clase DependenciasModules.
Aquí mismo, registro el servicio en la clase RegisterServices con el código que tiene adentro.
Luego me voy a HomeController
Click derecho a Proyecto -> Dependencias de paquetes nuget -> Ir a Ninject.mvc5 -> actualizar a version estable más reciente (Instalar 3.3.0)
Click derecho a Proyecto -> Dependencias de paquetes nuget -> Ir a Ninject -> actualizar a version estable más reciente
Click derecho a Proyecto -> Dependencias de paquetes nuget -> Ir a NinjectWebCommon -> actualizar a version estable más reciente
Click derecho a Proyecto -> Dependencias de paquetes nuget -> Ir a NinjectWebCommonWebHost -> actualizar a version estable más reciente
Luego darle a click derecho a proyecto Limpiar
Luego darle a click derecho a proyecto compilar
A los errores darle click derecho y darle click derecho y using la primera sugerencia
Unit Test
Al Proyecto de RestSharpTest -> Referencias -> click derecho agregar referencia -> RestSharpDependencyInyection
Luego En Unit Test