Jean-Michel Daignan jeanmidevacc

Data scientist and R&D engineer who like to test, build and destroy stuff. My opinion are my own and I have always an opinion because I am French.

jeanmidevacc / databricks_create_volume.py

Created July 21, 2025 20:53

Pyspark code to create volumen in databricks schemas

	schemas = [] # Listo of the schemas to create the volume
	volume_name = "" #name of you volume

	for schema in schemas:
	try:
	spark.sql(f"CREATE VOLUME IF NOT EXISTS {schema}.{volume_name}")
	print(f"Volume {volume_name} created or already exists.")
	except Exception as e:
	print(f"Error creating volume {volume_name}: {e}")

jeanmidevacc / few_shot_message.py

Created May 29, 2025 02:35

	# definition of the few shot messages
	messages_few_shots = []
	for key, sample in samples.items():
	messages_few_shots.extend([{
	"role": "user",
	"content" : str(sample["input"]),
	},{
	"role": "assistant",
	"content" : str(sample["output"]),
	}

jeanmidevacc / few_shot_prompting_samples.py

Created May 28, 2025 23:03

	samples = {
	"sample_0" : {
	"input" : "<p><strong>Merlin:</strong> <em>(off)</em> Non, non, non on y va sûrement pas ! Vous me laissez le temps de me faire à l'idée !</p>",
	"output" : [{
	"character": "Merlin",
	"dialogue": "Non, non, non on y va sûrement pas ! Vous me laissez le temps de me faire à l'idée !",
	"parenthetical": "(off)"}]
	},
	"sample_1" : {
	"input" : "<p><strong>Servius:</strong> Allez on y va !</p>",

jeanmidevacc / llm_prompt.py

Created May 28, 2025 22:48

this is an example of prompt teste during my kaamelot project

	prompt = """
	You are given raw dialogues from a TV show script in HTML format.
	Extract and return the dialogues in the following JSON format:

	[
	{
	"character": "",
	"dialogue": "",
	"parenthetical": ""
	}

jeanmidevacc / databricks_dbutils_selection.py

Last active July 21, 2025 20:50

This is a collection of dbutils functions that I used

	# Secrets module
	dbutils.secrets.get(scope="", key="") # Function ot fetch secret to vault link to DBX instance

	# Widgets module
	dbutils.widgets.text("","")# Function to create a text input widget with a given name and default value
	dbutils.widgets.get("") # Function to retrieve current value of an input widget

	# notebook module
	dbutils.notebook.exit() # Function to exit properly a notebook

jeanmidevacc / mf_pyspark_als.py

Last active August 14, 2024 12:12

mf_pyspark_als.py

	from pyspark.ml.recommendations import ALS

	# Context , there is dfs_actions is spark dataframe that looks like the pandas dataframe for surprise example https://gist.github.com/jeanmidevacc/a00c9cf8fe9379cd8a818b1d842dbaa1

	# Setup the model parameters
	als = ALS(
	seed=12,
	userCol="user_id",
	itemCol="item_id",
	ratingCol="rating",

jeanmidevacc / mf_surprise_example.py

Created August 2, 2024 19:20

	import surprise

	# there is pandas dataframe that contains the different ratings between user and item , indexed

	# Build a reader
	reader = surprise.Reader(rating_scale=(dfp_actions["rating"].min(),dfp_actions["rating"].max()))

	#_id column are index id of the user and item
	surprise_data = surprise.Dataset.load_from_df(dfp_actions[["user_id", "item_id", "rating"]], reader = reader)
	train_set = surprise_train.build_full_trainset()

jeanmidevacc / mf_build_recommendations_with_udf_with_pyspark.py

Last active August 2, 2024 18:51

	from pyspark.sql import SparkSession, SQLContext
	import pyspark.sql.functions as F
	import pyspark.sql.types as T
	from pyspark.sql import Window

	def build_recommendations(broadcast_dfp_items_factors):
	def build_recommendations_(user_factors, inventory_itemid_encoded, k=5):

	# Fetch on the factors for the item that can be recommended , add your rules
	dfp_items_factors_to_rank = broadcast_dfp_items_factors.value[~broadcast_array_items_factors.value["id"].isin(inventory_itemid_encoded)]

jeanmidevacc / mf_index_entities_with_pyspark.py

Created August 2, 2024 18:26

	from pyspark.sql import SparkSession, SQLContext
	import pyspark.sql.functions as F
	import pyspark.sql.types as T
	from pyspark.sql import Window

	dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid)

	previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora

	windowspec = Window.orderBy(F.col("itemid"))# build a window function

jeanmidevacc / suika_make_actions.py

Created June 25, 2024 09:57

suika_make_Action

	if time.time() - last_checked > 2:
	step += 1

	# Update the last checked time
	last_checked = time.time()
	particle_states = get_particles_state(handler.data["particles"])
	score = handler.data["score"]

	# Set the next particle's x position before releasing
	observation = {