Skip to content

Instantly share code, notes, and snippets.

View jeanmidevacc's full-sized avatar

Jean-Michel Daignan jeanmidevacc

View GitHub Profile
@jeanmidevacc
jeanmidevacc / databricks_create_volume.py
Created July 21, 2025 20:53
Pyspark code to create volumen in databricks schemas
schemas = [] # Listo of the schemas to create the volume
volume_name = "" #name of you volume
for schema in schemas:
try:
spark.sql(f"CREATE VOLUME IF NOT EXISTS {schema}.{volume_name}")
print(f"Volume {volume_name} created or already exists.")
except Exception as e:
print(f"Error creating volume {volume_name}: {e}")
# definition of the few shot messages
messages_few_shots = []
for key, sample in samples.items():
messages_few_shots.extend([{
"role": "user",
"content" : str(sample["input"]),
},{
"role": "assistant",
"content" : str(sample["output"]),
}
samples = {
"sample_0" : {
"input" : "<p><strong>Merlin:</strong> <em>(off)</em> Non, non, non on y va sûrement pas ! Vous me laissez le temps de me faire à l'idée !</p>",
"output" : [{
"character": "Merlin",
"dialogue": "Non, non, non on y va sûrement pas ! Vous me laissez le temps de me faire à l'idée !",
"parenthetical": "(off)"}]
},
"sample_1" : {
"input" : "<p><strong>Servius:</strong> Allez on y va !</p>",
@jeanmidevacc
jeanmidevacc / llm_prompt.py
Created May 28, 2025 22:48
this is an example of prompt teste during my kaamelot project
prompt = """
You are given raw dialogues from a TV show script in HTML format.
Extract and return the dialogues in the following JSON format:
[
{
"character": "",
"dialogue": "",
"parenthetical": ""
}
@jeanmidevacc
jeanmidevacc / databricks_dbutils_selection.py
Last active July 21, 2025 20:50
This is a collection of dbutils functions that I used
# Secrets module
dbutils.secrets.get(scope="", key="") # Function ot fetch secret to vault link to DBX instance
# Widgets module
dbutils.widgets.text("","")# Function to create a text input widget with a given name and default value
dbutils.widgets.get("") # Function to retrieve current value of an input widget
# notebook module
dbutils.notebook.exit() # Function to exit properly a notebook
@jeanmidevacc
jeanmidevacc / mf_pyspark_als.py
Last active August 14, 2024 12:12
mf_pyspark_als.py
from pyspark.ml.recommendations import ALS
# Context , there is dfs_actions is spark dataframe that looks like the pandas dataframe for surprise example https://gist.github.com/jeanmidevacc/a00c9cf8fe9379cd8a818b1d842dbaa1
# Setup the model parameters
als = ALS(
seed=12,
userCol="user_id",
itemCol="item_id",
ratingCol="rating",
import surprise
# there is pandas dataframe that contains the different ratings between user and item , indexed
# Build a reader
reader = surprise.Reader(rating_scale=(dfp_actions["rating"].min(),dfp_actions["rating"].max()))
#_id column are index id of the user and item
surprise_data = surprise.Dataset.load_from_df(dfp_actions[["user_id", "item_id", "rating"]], reader = reader)
train_set = surprise_train.build_full_trainset()
from pyspark.sql import SparkSession, SQLContext
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import Window
def build_recommendations(broadcast_dfp_items_factors):
def build_recommendations_(user_factors, inventory_itemid_encoded, k=5):
# Fetch on the factors for the item that can be recommended , add your rules
dfp_items_factors_to_rank = broadcast_dfp_items_factors.value[~broadcast_array_items_factors.value["id"].isin(inventory_itemid_encoded)]
from pyspark.sql import SparkSession, SQLContext
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import Window
dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid)
previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora
windowspec = Window.orderBy(F.col("itemid"))# build a window function
if time.time() - last_checked > 2:
step += 1
# Update the last checked time
last_checked = time.time()
particle_states = get_particles_state(handler.data["particles"])
score = handler.data["score"]
# Set the next particle's x position before releasing
observation = {