Skip to content

Instantly share code, notes, and snippets.

@farach
Last active July 4, 2024 23:53
Show Gist options
  • Save farach/24cda6c8ee94ca7260c92edab3fcde8d to your computer and use it in GitHub Desktop.
Save farach/24cda6c8ee94ca7260c92edab3fcde8d to your computer and use it in GitHub Desktop.
# Instructions:
# - Download LM Studio
# - Download Phi-3 Model (within LM Studio)
# - Load the model into LM Studio
# - Start the Local Server (instructions here: https://lmstudio.ai/docs/local-server)
# Load necessary libraries
library(httr)
library(jsonlite)
library(tidyverse)
library(glue)
# Generate synthetic survey data
survey_data <- tibble(
respondent_id = 1:5,
comment = c(
"The product is very easy to use.",
"Tôi thích sự hỗ trợ của khách hàng.",
"The price is unbeatable.",
"Tiene excelentes caracteristicas.",
"The design is sleek and modern."
)
)
# Define the local inference server URL
local_server_url <- "http://localhost:1234/v1/chat/completions"
# Function to categorize survey comments using the local model
local_model_categorize <- function(comment) {
# Define the categories and provides the model with a data dictionary
categories <- c(
"ease of use",
"customer support",
"pricing",
"features",
"design"
)
data_dictionary <- glue(
"Categories:",
" 1. Ease of Use - Refers to how easy and intuitive the product is to use;",
" 2. Customer Support - Refers to the quality and responsiveness of customer support provided;",
" 3. Pricing - Refers to the cost of the product and its affordability compared to competitors;",
" 4. Features - Refers to the functionality and capabilities provided by the product;",
" 5. Design - Refers to the aesthetic and ergonomic design of the product."
)
# Define the system message with detailed instructions and the data dictionary
system_message <- glue(
"You will act as an expert in survey comment categorization to help analyze and categorize responses. ",
"Use the following data dictionary to guide your categorization: {data_dictionary} ",
"Translate the response to English, categorize it into one of these categories: {glue_collapse(categories, sep = ', ')}, and provide a rationale. ",
"Label the translation, category, and rationale with semicolons. ",
"Here is an example of a response: 'My problem was quickly solved when I called customer support' ",
"Here is an example of the output I am requesting from you: 'Translation: My problem was quickly solved when I called customer support; Category: customer support; Rationale: The respondent mentioned a positive experience with customer support which indicates a positive perception of the customer support service provided.' ",
"If the response does not fit into any category, categorize as 'Other'."
)
# Define the data payload for the local server
data <- list(
messages = list(
list(role = "system", content = system_message),
list(role = "user", content = comment)
),
temperature = 0.7,
max_tokens = 150,
top_p = 0.9,
frequency_penalty = 0.0,
presence_penalty = 0.0
)
json_body <- toJSON(data, auto_unbox = TRUE)
response <- POST(local_server_url, add_headers(
"Content-Type" = "application/json"
), body = json_body, encode = "json")
if (status_code(response) == 200) {
# Parse response and return the content in JSON format
response_content <- content(response, as = "parsed", type = "application/json")
print(response_content) # Print the response content for debugging
return(response_content)
} else {
stop(glue("Error in API request: {status_code(response)}, {content(response, 'text')}"))
}
}
# Apply the function to categorize comments in the survey data
results <- survey_data %>%
mutate(category_result = map(comment, ~ local_model_categorize(.x)))
# Print results
print(results)
# Extract relevant fields from the results and create a final dataframe
final_results <- results %>%
mutate(
translation = map_chr(category_result, ~ .x$choices[[1]]$message$content),
category = map_chr(translation, ~ str_extract(.x, "(?<=Category:)[^;]+")),
rationale = map_chr(translation, ~ str_extract(.x, "(?<=Rationale:).*"))
) %>%
select(respondent_id, comment, translation, category, rationale)
# Print final results
print(final_results)
# Write the results to a CSV file
write_csv(final_results, "survey_data_results.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment