Skip to content

Instantly share code, notes, and snippets.

View jeanmidevacc's full-sized avatar

Jean-Michel Daignan jeanmidevacc

View GitHub Profile
@jeanmidevacc
jeanmidevacc / mf_pyspark_als.py
Last active August 14, 2024 12:12
mf_pyspark_als.py
from pyspark.ml.recommendations import ALS
# Context , there is dfs_actions is spark dataframe that looks like the pandas dataframe for surprise example https://gist.github.com/jeanmidevacc/a00c9cf8fe9379cd8a818b1d842dbaa1
# Setup the model parameters
als = ALS(
seed=12,
userCol="user_id",
itemCol="item_id",
ratingCol="rating",
import surprise
# there is pandas dataframe that contains the different ratings between user and item , indexed
# Build a reader
reader = surprise.Reader(rating_scale=(dfp_actions["rating"].min(),dfp_actions["rating"].max()))
#_id column are index id of the user and item
surprise_data = surprise.Dataset.load_from_df(dfp_actions[["user_id", "item_id", "rating"]], reader = reader)
train_set = surprise_train.build_full_trainset()
from pyspark.sql import SparkSession, SQLContext
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import Window
def build_recommendations(broadcast_dfp_items_factors):
def build_recommendations_(user_factors, inventory_itemid_encoded, k=5):
# Fetch on the factors for the item that can be recommended , add your rules
dfp_items_factors_to_rank = broadcast_dfp_items_factors.value[~broadcast_array_items_factors.value["id"].isin(inventory_itemid_encoded)]
from pyspark.sql import SparkSession, SQLContext
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import Window
dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid)
previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora
windowspec = Window.orderBy(F.col("itemid"))# build a window function
if time.time() - last_checked > 2:
step += 1
# Update the last checked time
last_checked = time.time()
particle_states = get_particles_state(handler.data["particles"])
score = handler.data["score"]
# Set the next particle's x position before releasing
observation = {
@jeanmidevacc
jeanmidevacc / trigger.sh
Created June 24, 2024 22:16
suika trigger simulation
#!/bin/bash
# Check if the user provided an argument
if [ -z "$1" ]; then
echo "Please specify the number of runs as an argument."
exit 1
fi
# Total number of runs specified by the first argument
TOTAL_RUNS=$1
@jeanmidevacc
jeanmidevacc / suika_baseline_agents.py
Created June 24, 2024 15:56
suika_baseline_agents.py
from datetime import datetime
import random
import pandas as pd
class RandomAgent():
def __init__(self):
self.creation_date = datetime.utcnow()
self.tag = "random"
@jeanmidevacc
jeanmidevacc / build_timecodes_based_on_silence.py
Created January 28, 2024 21:38
build_timecodes_based_on_silence
from pydub import AudioSegment, silence
import pandas as pd
def build_segments(audio, length_segment=10, dbfs=0):
silences = silence.detect_silence(audio, min_silence_len=1000, silence_thresh=dbfs-16)
dfp_silences = pd.DataFrame(silences, columns = ["start_timecode", "end_timecode"])
threshold_segment = int(length_segment * 60 * 1000)
first_timecode = 0
last_timecode = int(audio.duration_seconds * 1000)
@jeanmidevacc
jeanmidevacc / openai_whisper.py
Created January 28, 2024 20:20
openai_whisper
from pathlib import Path
from openai import OpenAI
client_openai = OpenAI(
# This is the default and can be omitted
api_key="sk-XXX",
)
def get_transcript_openai_api(file, language="fr"):
# f = open(file, "rb")
@jeanmidevacc
jeanmidevacc / local_hf_whisper.py
Created January 28, 2024 18:16
local_hf_whisper
import torch
from transformers import pipeline
device = "cuda:0" if torch.cuda.is_available() else "cpu"
mapping = {"whisper-tiny" : "tiny", "whisper-small" : "small", "whisper-medium" : "medium", "whisper-base" : "base"}
hf_model_name = "whisper-medium"
size_model = mapping[hf_model_name] #tiny, base, small, medium
model = pipeline(