$ split -l 5000 users.csv ./split-files
5000 is the number of lines you want for each file.)
| import java.io.InputStream | |
| import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils | |
| import org.apache.spark.sql.{ DataFrame, Row } | |
| import org.postgresql.copy.CopyManager | |
| import org.postgresql.core.BaseConnection | |
| val jdbcUrl = s"jdbc:postgresql://..." // db credentials elided | |
| val connectionProperties = { |
| // Credits to Louistiti from Drizzle Discord: https://discord.com/channels/1043890932593987624/1130802621750448160/1143083373535973406 | |
| import { sql } from 'drizzle-orm'; | |
| const clearDb = async (): Promise<void> => { | |
| const query = sql<string>`SELECT table_name | |
| FROM information_schema.tables | |
| WHERE table_schema = 'public' | |
| AND table_type = 'BASE TABLE'; | |
| `; |
| export function distinctOn<Column extends AnyColumn>(column: Column) { | |
| return sql<Column["_"]["data"]>`distinct on (${column}) ${column}`; | |
| } | |
| export function jsonBuildObject<T extends SelectedFields>(shape: T) { | |
| const chunks: SQL[] = []; | |
| Object.entries(shape).forEach(([key, value]) => { | |
| if (chunks.length > 0) { | |
| chunks.push(sql.raw(`,`)); |