This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import surprise | |
| # there is pandas dataframe that contains the different ratings between user and item , indexed | |
| # Build a reader | |
| reader = surprise.Reader(rating_scale=(dfp_actions["rating"].min(),dfp_actions["rating"].max())) | |
| #_id column are index id of the user and item | |
| surprise_data = surprise.Dataset.load_from_df(dfp_actions[["user_id", "item_id", "rating"]], reader = reader) | |
| train_set = surprise_train.build_full_trainset() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pyspark.sql import SparkSession, SQLContext | |
| import pyspark.sql.functions as F | |
| import pyspark.sql.types as T | |
| from pyspark.sql import Window | |
| def build_recommendations(broadcast_dfp_items_factors): | |
| def build_recommendations_(user_factors, inventory_itemid_encoded, k=5): | |
| # Fetch on the factors for the item that can be recommended , add your rules | |
| dfp_items_factors_to_rank = broadcast_dfp_items_factors.value[~broadcast_array_items_factors.value["id"].isin(inventory_itemid_encoded)] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pyspark.sql import SparkSession, SQLContext | |
| import pyspark.sql.functions as F | |
| import pyspark.sql.types as T | |
| from pyspark.sql import Window | |
| dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid) | |
| previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora | |
| windowspec = Window.orderBy(F.col("itemid"))# build a window function |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| if time.time() - last_checked > 2: | |
| step += 1 | |
| # Update the last checked time | |
| last_checked = time.time() | |
| particle_states = get_particles_state(handler.data["particles"]) | |
| score = handler.data["score"] | |
| # Set the next particle's x position before releasing | |
| observation = { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Check if the user provided an argument | |
| if [ -z "$1" ]; then | |
| echo "Please specify the number of runs as an argument." | |
| exit 1 | |
| fi | |
| # Total number of runs specified by the first argument | |
| TOTAL_RUNS=$1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from datetime import datetime | |
| import random | |
| import pandas as pd | |
| class RandomAgent(): | |
| def __init__(self): | |
| self.creation_date = datetime.utcnow() | |
| self.tag = "random" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pydub import AudioSegment, silence | |
| import pandas as pd | |
| def build_segments(audio, length_segment=10, dbfs=0): | |
| silences = silence.detect_silence(audio, min_silence_len=1000, silence_thresh=dbfs-16) | |
| dfp_silences = pd.DataFrame(silences, columns = ["start_timecode", "end_timecode"]) | |
| threshold_segment = int(length_segment * 60 * 1000) | |
| first_timecode = 0 | |
| last_timecode = int(audio.duration_seconds * 1000) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pathlib import Path | |
| from openai import OpenAI | |
| client_openai = OpenAI( | |
| # This is the default and can be omitted | |
| api_key="sk-XXX", | |
| ) | |
| def get_transcript_openai_api(file, language="fr"): | |
| # f = open(file, "rb") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| from transformers import pipeline | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| mapping = {"whisper-tiny" : "tiny", "whisper-small" : "small", "whisper-medium" : "medium", "whisper-base" : "base"} | |
| hf_model_name = "whisper-medium" | |
| size_model = mapping[hf_model_name] #tiny, base, small, medium | |
| model = pipeline( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import whisper | |
| size_model = "medium" #the type of model in the model card , with .en or not | |
| model = whisper.load_model(size_model, device="cuda") | |
| def get_transcript_local_whisper(model, file, language): | |
| audio = whisper.load_audio(file) | |
| audio = whisper.pad_or_trim(audio) | |
| mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
| result = whisper.decode(model, mel, language=language) |