Last active
July 4, 2024 23:53
-
-
Save farach/24cda6c8ee94ca7260c92edab3fcde8d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Instructions: | |
# - Download LM Studio | |
# - Download Phi-3 Model (within LM Studio) | |
# - Load the model into LM Studio | |
# - Start the Local Server (instructions here: https://lmstudio.ai/docs/local-server) | |
# Load necessary libraries | |
library(httr) | |
library(jsonlite) | |
library(tidyverse) | |
library(glue) | |
# Generate synthetic survey data | |
survey_data <- tibble( | |
respondent_id = 1:5, | |
comment = c( | |
"The product is very easy to use.", | |
"Tôi thích sự hỗ trợ của khách hàng.", | |
"The price is unbeatable.", | |
"Tiene excelentes caracteristicas.", | |
"The design is sleek and modern." | |
) | |
) | |
# Define the local inference server URL | |
local_server_url <- "http://localhost:1234/v1/chat/completions" | |
# Function to categorize survey comments using the local model | |
local_model_categorize <- function(comment) { | |
# Define the categories and provides the model with a data dictionary | |
categories <- c( | |
"ease of use", | |
"customer support", | |
"pricing", | |
"features", | |
"design" | |
) | |
data_dictionary <- glue( | |
"Categories:", | |
" 1. Ease of Use - Refers to how easy and intuitive the product is to use;", | |
" 2. Customer Support - Refers to the quality and responsiveness of customer support provided;", | |
" 3. Pricing - Refers to the cost of the product and its affordability compared to competitors;", | |
" 4. Features - Refers to the functionality and capabilities provided by the product;", | |
" 5. Design - Refers to the aesthetic and ergonomic design of the product." | |
) | |
# Define the system message with detailed instructions and the data dictionary | |
system_message <- glue( | |
"You will act as an expert in survey comment categorization to help analyze and categorize responses. ", | |
"Use the following data dictionary to guide your categorization: {data_dictionary} ", | |
"Translate the response to English, categorize it into one of these categories: {glue_collapse(categories, sep = ', ')}, and provide a rationale. ", | |
"Label the translation, category, and rationale with semicolons. ", | |
"Here is an example of a response: 'My problem was quickly solved when I called customer support' ", | |
"Here is an example of the output I am requesting from you: 'Translation: My problem was quickly solved when I called customer support; Category: customer support; Rationale: The respondent mentioned a positive experience with customer support which indicates a positive perception of the customer support service provided.' ", | |
"If the response does not fit into any category, categorize as 'Other'." | |
) | |
# Define the data payload for the local server | |
data <- list( | |
messages = list( | |
list(role = "system", content = system_message), | |
list(role = "user", content = comment) | |
), | |
temperature = 0.7, | |
max_tokens = 150, | |
top_p = 0.9, | |
frequency_penalty = 0.0, | |
presence_penalty = 0.0 | |
) | |
json_body <- toJSON(data, auto_unbox = TRUE) | |
response <- POST(local_server_url, add_headers( | |
"Content-Type" = "application/json" | |
), body = json_body, encode = "json") | |
if (status_code(response) == 200) { | |
# Parse response and return the content in JSON format | |
response_content <- content(response, as = "parsed", type = "application/json") | |
print(response_content) # Print the response content for debugging | |
return(response_content) | |
} else { | |
stop(glue("Error in API request: {status_code(response)}, {content(response, 'text')}")) | |
} | |
} | |
# Apply the function to categorize comments in the survey data | |
results <- survey_data %>% | |
mutate(category_result = map(comment, ~ local_model_categorize(.x))) | |
# Print results | |
print(results) | |
# Extract relevant fields from the results and create a final dataframe | |
final_results <- results %>% | |
mutate( | |
translation = map_chr(category_result, ~ .x$choices[[1]]$message$content), | |
category = map_chr(translation, ~ str_extract(.x, "(?<=Category:)[^;]+")), | |
rationale = map_chr(translation, ~ str_extract(.x, "(?<=Rationale:).*")) | |
) %>% | |
select(respondent_id, comment, translation, category, rationale) | |
# Print final results | |
print(final_results) | |
# Write the results to a CSV file | |
write_csv(final_results, "survey_data_results.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment