This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
s3_resource = boto3.resource('s3', | |
region_name = "us-east-1" , | |
endpoint_url = "zzzzzzzzzzzzzzzzzzzz" , | |
aws_access_key_id = "uuuuuuuuuuuuuuuuuu", | |
aws_secret_access_key = "xxxxxxxxxxxxxxxxxxxxxxxxxxx" | |
) | |
bucket_name = "uuuuu" | |
bucket = s3_resource.Bucket(bucketName) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
COPY (select '' as filename, null as year ) TO "/lakehouse/default/Files/scada" (FORMAT PARQUET, PARTITION_BY (year),OVERWRITE_OR_IGNORE ) ; | |
SET VARIABLE list_of_files = (select list(file) from glob("/lakehouse/default/Files/Daily_Reports/*.CSV") | |
where parse_filename(file) not in | |
(select filename from read_parquet("/lakehouse/default/Files/scada/*/*.parquet"))) ; | |
create or replace view raw as (select * from read_csv(getvariable('list_of_files'),Skip=1,header =0,all_varchar=1, | |
columns={ | |
'I': 'VARCHAR','UNIT': 'VARCHAR','XX': 'VARCHAR','VERSION': 'VARCHAR','SETTLEMENTDATE': 'VARCHAR','RUNNO': 'VARCHAR', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import time | |
import json | |
import base64 | |
def get_notebook_content(notebook_id_or_name): | |
nb = notebookutils.notebook.get(notebook_id_or_name) | |
workspaceId = nb['workspaceId'] | |
notebookId = nb['id'] | |
format = 'ipynb' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
--Query01 | |
l_returnflag, | |
l_linestatus, | |
SUM(l_quantity) AS sum_qty, | |
SUM(l_extendedprice) AS sum_base_price, | |
SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price, | |
SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, | |
AVG(l_quantity) AS avg_qty, | |
AVG(l_extendedprice) AS avg_price, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%time | |
!pip install -q duckdb | |
!pip install -q deltalake | |
import duckdb | |
from deltalake.writer import write_deltalake | |
from trident_token_library_wrapper import PyTridentTokenLibrary | |
aadToken = PyTridentTokenLibrary.get_access_token("storage") | |
sf =1 | |
for x in range(0, sf) : | |
con=duckdb.connect() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import pandas as pd | |
s3_client = boto3.client('s3') | |
bucket = 'xxxxxxx' | |
prefix = 'zzzz/yyyyyy/metadata' | |
paginator = s3_client.get_paginator('list_objects_v2') | |
response_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix) | |
file_names = pd.DataFrame(columns=['file','date']) | |
for response in response_iterator: | |
for object_data in response['Contents']: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#V order thing you can ignore those two lines | |
spark.conf.set("spark.sql.parquet.vorder.enabled", "true") | |
spark.conf.set("spark.microsoft.delta.optimizeWrite.enabled", "true") | |
#Load from the default lakehouse, make sure you click on the pin | |
from pyspark.sql.types import * | |
df = spark.read.option("header", "true").format("csv").load("Files/csv/*.csv") | |
df.write.mode("overwrite").format("delta").save("Tables/tablecsv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, date, timedelta | |
import urllib.request as urllib2 | |
import tempfile | |
import pandas as pd | |
import pyarrow as pa | |
import pyarrow.dataset as ds | |
import re ,shutil | |
from urllib.request import urlopen | |
import os | |
import adlfs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let | |
parquetfunctin =(params) => | |
let | |
Parquet = Parquet.Document(Binary.Buffer(Web.Contents("https://nyc-tlc.s3.amazonaws.com/trip+data/",[RelativePath=params]))) | |
in | |
Parquet, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select `WKT`, | |
`C1`, | |
`C2` | |
from | |
( | |
select `WKT`, | |
sum(`area`) as `C1`, | |
min(`WKT`) as `C2` | |
from | |
( |
NewerOlder