farach · July 4, 2024 23:53
diff --git a/categorize_survey_comments_with_LM_Studio.R b/categorize_survey_comments_with_LM_Studio.R
 # Instructions:
 # - Download LM Studio
 # - Download Phi-3 Model (within LM Studio)
 # - Load the model into LM Studio
 # - Start the Local Server (instructions here: https://lmstudio.ai/docs/local-server)

 # Load necessary libraries
 library(httr)
 library(jsonlite)
 library(tidyverse)
 library(glue)

 # Generate synthetic survey data
 survey_data <- tibble(
  respondent_id = 1:5,
  comment = c(
    "The product is very easy to use.",
    "Tôi thích sự hỗ trợ của khách hàng.",
    "The price is unbeatable.",
    "Tiene excelentes caracteristicas.",
    "The design is sleek and modern."
  )
 )

 # Define the local inference server URL
 local_server_url <- "http://localhost:1234/v1/chat/completions"

 # Function to categorize survey comments using the local model
 local_model_categorize <- function(comment) {
  
  # Define the categories and provides the model with a data dictionary
  categories <- c(
    "ease of use",
    "customer support",
    "pricing",
    "features",
    "design"
  )
  
  data_dictionary <- glue(
    "Categories:",
    " 1. Ease of Use - Refers to how easy and intuitive the product is to use;",
    " 2. Customer Support - Refers to the quality and responsiveness of customer support provided;",
    " 3. Pricing - Refers to the cost of the product and its affordability compared to competitors;",
    " 4. Features - Refers to the functionality and capabilities provided by the product;",
    " 5. Design - Refers to the aesthetic and ergonomic design of the product."
  )
  
  # Define the system message with detailed instructions and the data dictionary
  system_message <- glue(
    "You will act as an expert in survey comment categorization to help analyze and categorize responses. ",
    "Use the following data dictionary to guide your categorization: {data_dictionary} ",
    "Translate the response to English, categorize it into one of these categories: {glue_collapse(categories, sep = ', ')}, and provide a rationale. ",
    "Label the translation, category, and rationale with semicolons. ",
    "Here is an example of a response: 'My problem was quickly solved when I called customer support' ",
    "Here is an example of the output I am requesting from you: 'Translation: My problem was quickly solved when I called customer support; Category: customer support; Rationale: The respondent mentioned a positive experience with customer support which indicates a positive perception of the customer support service provided.' ",
    "If the response does not fit into any category, categorize as 'Other'."
  )
  
  # Define the data payload for the local server
  data <- list(
    messages = list(
      list(role = "system", content = system_message),
      list(role = "user", content = comment)
    ),
    temperature = 0.7,
    max_tokens = 150,
    top_p = 0.9,
    frequency_penalty = 0.0,
    presence_penalty = 0.0
  )
  
  json_body <- toJSON(data, auto_unbox = TRUE)
  
  response <- POST(local_server_url, add_headers(
    "Content-Type" = "application/json"
  ), body = json_body, encode = "json")
  
  if (status_code(response) == 200) {
    # Parse response and return the content in JSON format
    response_content <- content(response, as = "parsed", type = "application/json")
    print(response_content)  # Print the response content for debugging
    return(response_content)
  } else {
    stop(glue("Error in API request: {status_code(response)}, {content(response, 'text')}"))
  }
 }

 # Apply the function to categorize comments in the survey data
 results <- survey_data %>%
  mutate(category_result = map(comment, ~ local_model_categorize(.x)))

 # Print results
 print(results)

 # Extract relevant fields from the results and create a final dataframe
 final_results <- results %>%
  mutate(
    translation = map_chr(category_result, ~ .x$choices[[1]]$message$content),
    category = map_chr(translation, ~ str_extract(.x, "(?<=Category:)[^;]+")),
    rationale = map_chr(translation, ~ str_extract(.x, "(?<=Rationale:).*"))
  ) %>%
  select(respondent_id, comment, translation, category, rationale)

 # Print final results
 print(final_results)

 # Write the results to a CSV file
 write_csv(final_results, "survey_data_results.csv")
	# Instructions:
	# - Download LM Studio
	# - Download Phi-3 Model (within LM Studio)
	# - Load the model into LM Studio
	# - Start the Local Server (instructions here: https://lmstudio.ai/docs/local-server)

	# Load necessary libraries
	library(httr)
	library(jsonlite)
	library(tidyverse)
	library(glue)

	# Generate synthetic survey data
	survey_data <- tibble(
	respondent_id = 1:5,
	comment = c(
	"The product is very easy to use.",
	"Tôi thích sự hỗ trợ của khách hàng.",
	"The price is unbeatable.",
	"Tiene excelentes caracteristicas.",
	"The design is sleek and modern."
	)
	)

	# Define the local inference server URL
	local_server_url <- "http://localhost:1234/v1/chat/completions"

	# Function to categorize survey comments using the local model
	local_model_categorize <- function(comment) {

	# Define the categories and provides the model with a data dictionary
	categories <- c(
	"ease of use",
	"customer support",
	"pricing",
	"features",
	"design"
	)

	data_dictionary <- glue(
	"Categories:",
	" 1. Ease of Use - Refers to how easy and intuitive the product is to use;",
	" 2. Customer Support - Refers to the quality and responsiveness of customer support provided;",
	" 3. Pricing - Refers to the cost of the product and its affordability compared to competitors;",
	" 4. Features - Refers to the functionality and capabilities provided by the product;",
	" 5. Design - Refers to the aesthetic and ergonomic design of the product."
	)

	# Define the system message with detailed instructions and the data dictionary
	system_message <- glue(
	"You will act as an expert in survey comment categorization to help analyze and categorize responses. ",
	"Use the following data dictionary to guide your categorization: {data_dictionary} ",
	"Translate the response to English, categorize it into one of these categories: {glue_collapse(categories, sep = ', ')}, and provide a rationale. ",
	"Label the translation, category, and rationale with semicolons. ",
	"Here is an example of a response: 'My problem was quickly solved when I called customer support' ",
	"Here is an example of the output I am requesting from you: 'Translation: My problem was quickly solved when I called customer support; Category: customer support; Rationale: The respondent mentioned a positive experience with customer support which indicates a positive perception of the customer support service provided.' ",
	"If the response does not fit into any category, categorize as 'Other'."
	)

	# Define the data payload for the local server
	data <- list(
	messages = list(
	list(role = "system", content = system_message),
	list(role = "user", content = comment)
	),
	temperature = 0.7,
	max_tokens = 150,
	top_p = 0.9,
	frequency_penalty = 0.0,
	presence_penalty = 0.0
	)

	json_body <- toJSON(data, auto_unbox = TRUE)

	response <- POST(local_server_url, add_headers(
	"Content-Type" = "application/json"
	), body = json_body, encode = "json")

	if (status_code(response) == 200) {
	# Parse response and return the content in JSON format
	response_content <- content(response, as = "parsed", type = "application/json")
	print(response_content) # Print the response content for debugging
	return(response_content)
	} else {
	stop(glue("Error in API request: {status_code(response)}, {content(response, 'text')}"))
	}
	}

	# Apply the function to categorize comments in the survey data
	results <- survey_data %>%
	mutate(category_result = map(comment, ~ local_model_categorize(.x)))

	# Print results
	print(results)

	# Extract relevant fields from the results and create a final dataframe
	final_results <- results %>%
	mutate(
	translation = map_chr(category_result, ~ .x$choices[[1]]$message$content),
	category = map_chr(translation, ~ str_extract(.x, "(?<=Category:)[^;]+")),
	rationale = map_chr(translation, ~ str_extract(.x, "(?<=Rationale:).*"))
	) %>%
	select(respondent_id, comment, translation, category, rationale)

	# Print final results
	print(final_results)

	# Write the results to a CSV file
	write_csv(final_results, "survey_data_results.csv")