This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.recommendations import ALS | |
# Context , there is dfs_actions is spark dataframe that looks like the pandas dataframe for surprise example https://gist.github.com/jeanmidevacc/a00c9cf8fe9379cd8a818b1d842dbaa1 | |
# Setup the model parameters | |
als = ALS( | |
seed=12, | |
userCol="user_id", | |
itemCol="item_id", | |
ratingCol="rating", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import surprise | |
# there is pandas dataframe that contains the different ratings between user and item , indexed | |
# Build a reader | |
reader = surprise.Reader(rating_scale=(dfp_actions["rating"].min(),dfp_actions["rating"].max())) | |
#_id column are index id of the user and item | |
surprise_data = surprise.Dataset.load_from_df(dfp_actions[["user_id", "item_id", "rating"]], reader = reader) | |
train_set = surprise_train.build_full_trainset() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession, SQLContext | |
import pyspark.sql.functions as F | |
import pyspark.sql.types as T | |
from pyspark.sql import Window | |
def build_recommendations(broadcast_dfp_items_factors): | |
def build_recommendations_(user_factors, inventory_itemid_encoded, k=5): | |
# Fetch on the factors for the item that can be recommended , add your rules | |
dfp_items_factors_to_rank = broadcast_dfp_items_factors.value[~broadcast_array_items_factors.value["id"].isin(inventory_itemid_encoded)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession, SQLContext | |
import pyspark.sql.functions as F | |
import pyspark.sql.types as T | |
from pyspark.sql import Window | |
dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid) | |
previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora | |
windowspec = Window.orderBy(F.col("itemid"))# build a window function |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if time.time() - last_checked > 2: | |
step += 1 | |
# Update the last checked time | |
last_checked = time.time() | |
particle_states = get_particles_state(handler.data["particles"]) | |
score = handler.data["score"] | |
# Set the next particle's x position before releasing | |
observation = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if the user provided an argument | |
if [ -z "$1" ]; then | |
echo "Please specify the number of runs as an argument." | |
exit 1 | |
fi | |
# Total number of runs specified by the first argument | |
TOTAL_RUNS=$1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
import random | |
import pandas as pd | |
class RandomAgent(): | |
def __init__(self): | |
self.creation_date = datetime.utcnow() | |
self.tag = "random" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pydub import AudioSegment, silence | |
import pandas as pd | |
def build_segments(audio, length_segment=10, dbfs=0): | |
silences = silence.detect_silence(audio, min_silence_len=1000, silence_thresh=dbfs-16) | |
dfp_silences = pd.DataFrame(silences, columns = ["start_timecode", "end_timecode"]) | |
threshold_segment = int(length_segment * 60 * 1000) | |
first_timecode = 0 | |
last_timecode = int(audio.duration_seconds * 1000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
from openai import OpenAI | |
client_openai = OpenAI( | |
# This is the default and can be omitted | |
api_key="sk-XXX", | |
) | |
def get_transcript_openai_api(file, language="fr"): | |
# f = open(file, "rb") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import pipeline | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
mapping = {"whisper-tiny" : "tiny", "whisper-small" : "small", "whisper-medium" : "medium", "whisper-base" : "base"} | |
hf_model_name = "whisper-medium" | |
size_model = mapping[hf_model_name] #tiny, base, small, medium | |
model = pipeline( |
NewerOlder