This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
schemas = [] # Listo of the schemas to create the volume | |
volume_name = "" #name of you volume | |
for schema in schemas: | |
try: | |
spark.sql(f"CREATE VOLUME IF NOT EXISTS {schema}.{volume_name}") | |
print(f"Volume {volume_name} created or already exists.") | |
except Exception as e: | |
print(f"Error creating volume {volume_name}: {e}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# definition of the few shot messages | |
messages_few_shots = [] | |
for key, sample in samples.items(): | |
messages_few_shots.extend([{ | |
"role": "user", | |
"content" : str(sample["input"]), | |
},{ | |
"role": "assistant", | |
"content" : str(sample["output"]), | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
samples = { | |
"sample_0" : { | |
"input" : "<p><strong>Merlin:</strong> <em>(off)</em> Non, non, non on y va sûrement pas ! Vous me laissez le temps de me faire à l'idée !</p>", | |
"output" : [{ | |
"character": "Merlin", | |
"dialogue": "Non, non, non on y va sûrement pas ! Vous me laissez le temps de me faire à l'idée !", | |
"parenthetical": "(off)"}] | |
}, | |
"sample_1" : { | |
"input" : "<p><strong>Servius:</strong> Allez on y va !</p>", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prompt = """ | |
You are given raw dialogues from a TV show script in HTML format. | |
Extract and return the dialogues in the following JSON format: | |
[ | |
{ | |
"character": "", | |
"dialogue": "", | |
"parenthetical": "" | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Secrets module | |
dbutils.secrets.get(scope="", key="") # Function ot fetch secret to vault link to DBX instance | |
# Widgets module | |
dbutils.widgets.text("","")# Function to create a text input widget with a given name and default value | |
dbutils.widgets.get("") # Function to retrieve current value of an input widget | |
# notebook module | |
dbutils.notebook.exit() # Function to exit properly a notebook |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.recommendations import ALS | |
# Context , there is dfs_actions is spark dataframe that looks like the pandas dataframe for surprise example https://gist.github.com/jeanmidevacc/a00c9cf8fe9379cd8a818b1d842dbaa1 | |
# Setup the model parameters | |
als = ALS( | |
seed=12, | |
userCol="user_id", | |
itemCol="item_id", | |
ratingCol="rating", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import surprise | |
# there is pandas dataframe that contains the different ratings between user and item , indexed | |
# Build a reader | |
reader = surprise.Reader(rating_scale=(dfp_actions["rating"].min(),dfp_actions["rating"].max())) | |
#_id column are index id of the user and item | |
surprise_data = surprise.Dataset.load_from_df(dfp_actions[["user_id", "item_id", "rating"]], reader = reader) | |
train_set = surprise_train.build_full_trainset() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession, SQLContext | |
import pyspark.sql.functions as F | |
import pyspark.sql.types as T | |
from pyspark.sql import Window | |
def build_recommendations(broadcast_dfp_items_factors): | |
def build_recommendations_(user_factors, inventory_itemid_encoded, k=5): | |
# Fetch on the factors for the item that can be recommended , add your rules | |
dfp_items_factors_to_rank = broadcast_dfp_items_factors.value[~broadcast_array_items_factors.value["id"].isin(inventory_itemid_encoded)] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession, SQLContext | |
import pyspark.sql.functions as F | |
import pyspark.sql.types as T | |
from pyspark.sql import Window | |
dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid) | |
previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora | |
windowspec = Window.orderBy(F.col("itemid"))# build a window function |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if time.time() - last_checked > 2: | |
step += 1 | |
# Update the last checked time | |
last_checked = time.time() | |
particle_states = get_particles_state(handler.data["particles"]) | |
score = handler.data["score"] | |
# Set the next particle's x position before releasing | |
observation = { |
NewerOlder