Skip to content

Instantly share code, notes, and snippets.

View djouallah's full-sized avatar

Mimoune djouallah

View GitHub Profile
#V order thing you can ignore those two lines
spark.conf.set("spark.sql.parquet.vorder.enabled", "true")
spark.conf.set("spark.microsoft.delta.optimizeWrite.enabled", "true")
#Load from the default lakehouse, make sure you click on the pin
from pyspark.sql.types import *
df = spark.read.option("header", "true").format("csv").load("Files/csv/*.csv")
df.write.mode("overwrite").format("delta").save("Tables/tablecsv")
from datetime import datetime, date, timedelta
import urllib.request as urllib2
import tempfile
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
import re ,shutil
from urllib.request import urlopen
import os
import adlfs
let
parquetfunctin =(params) =>
let
Parquet = Parquet.Document(Binary.Buffer(Web.Contents("https://nyc-tlc.s3.amazonaws.com/trip+data/",[RelativePath=params])))
in
Parquet,
select `WKT`,
`C1`,
`C2`
from
(
select `WKT`,
sum(`area`) as `C1`,
min(`WKT`) as `C2`
from
(
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"usermeta": {
"deneb": {
"build": "1.1.0.20220119#7e76f47",
"metaVersion": 1,
"provider": "vegaLite",
"providerVersion": "5.2.0"
},
"interactivity": {
// DAX Query
DEFINE
VAR __DS0FilterTable =
TREATAS({"Coal",
"Renewable",
"Fuel"}, 'UNITARCHIVE'[Technology])
VAR __DS0FilterTable2 =
FILTER(
KEEPFILTERS(VALUES('UNITARCHIVE'[SETTLEMENTDATE])),
WITH
xx AS (
SELECT
"australia" AS key,
state,
suburbs,
longitude,
latitude,
IRSAD
FROM
{
"FileFormatVersion": 1,
"PhysicalQueryPlanRows": [
{
"Records": null,
"Operation": "PartitionIntoGroups: IterPhyOp LogOp=Order IterCols(0, 1)('covid_19_geographic_distribution_worldwide'[countries_and_territories], ''[Sumdaily_confirmed_cases]) #Groups=1 #Rows=212",
"IndentedOperation": "PartitionIntoGroups: IterPhyOp LogOp=Order IterCols(0, 1)('covid_19_geographic_distribution_worldwide'[countries_and_territories], ''[Sumdaily_confirmed_cases]) #Groups=1 #Rows=212",
"Level": 0,
"RowNumber": 1
},
with tt as (SELECT gg.countries_and_territories as Countries , date, sum( daily_deaths ) as daily_deaths ,max(pop) as pop FROM `GIS.covid19new` gg
left join (SELECT countries_and_territories,max( pop_data_2019) as pop FROM `GIS.covid19new` group by 1) pp
on gg.countries_and_territories =pp.countries_and_territories
where gg.countries_and_territories in unnest(@selection) group by 1,2
UNION ALL
SELECT "Rest of the World" as Countries , date,sum( daily_deaths ) as daily_deaths,sum(pop ) FROM `GIS.covid19new` gg
left join (SELECT countries_and_territories,max( pop_data_2019) as pop FROM `GIS.covid19new` group by 1) pp
on gg.countries_and_territories =pp.countries_and_territories
where gg.countries_and_territories not in unnest(@selection) group by 1,2)
WITH Geographies AS
(SELECT ST_GEOGFROMTEXT('POINT(1 1)') AS g ,'newgeopmetry' as t UNION ALL
SELECT ST_GEOGFROMTEXT('POINT(1 3)') AS g ,'newgeopmetry' as t UNION ALL
SELECT ST_GEOGFROMTEXT('POINT(1 2)') AS g ,'newgeopmetry' as t )
SELECT
t, ST_CONVEXHULL(g) AS polygon FROM Geographies group by 1;