Skip to content

Instantly share code, notes, and snippets.

@makslevental
Created May 30, 2025 18:21
Show Gist options
  • Save makslevental/62f49bf8e482370792a2475939006f90 to your computer and use it in GitHub Desktop.
Save makslevental/62f49bf8e482370792a2475939006f90 to your computer and use it in GitHub Desktop.
cluster lc by soln strat
import re
import time
from collections import defaultdict
from pprint import pprint
import hdbscan
import leetcode
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize
configuration = leetcode.Configuration()
configuration.api_key["x-csrftoken"] = csrf_token
configuration.api_key["csrftoken"] = csrf_token
configuration.api_key["LEETCODE_SESSION"] = leetcode_session
configuration.api_key["Referer"] = "https://leetcode.com"
configuration.debug = False
api_instance = leetcode.DefaultApi(leetcode.ApiClient(configuration))
def get_num_favs(favorite_slug):
operationName = "favoriteDetailV2"
graphql_request = dict(
query="""
query favoriteDetailV2($favoriteSlug: String!) {
favoriteDetailV2(favoriteSlug: $favoriteSlug) {
coverUrl
coverEmoji
coverBackgroundColor
description
creator {
realName
userAvatar
userSlug
}
hasCurrentQuestion
isPublicFavorite
lastQuestionAddedAt
name
questionNumber
slug
isDefaultList
favoriteType
lastModified: lastQuestionAddedAt
languageTagSlug
filtersInfo
sortByInfo
visibleFilters
collectCount
}
}
""",
variables={"favoriteSlug": favorite_slug},
operationName=operationName,
)
jsn = api_instance.graphql_post(
body=graphql_request, _preload_content=False
).json()["data"][operationName]
return jsn["questionNumber"]
def get_questions(list_id):
num_q = get_num_favs(list_id)
graphql_request = leetcode.models.graphql_query.GraphqlQuery(
query="""
query problemsetQuestionList($categorySlug: String, $limit: Int, $skip: Int, $filters: QuestionListFilterInput) {
problemsetQuestionList: questionList(
categorySlug: $categorySlug
limit: $limit
skip: $skip
filters: $filters
) {
questions: data {
questionFrontendId
title
titleSlug
categoryTitle
freqBar
content
isPaidOnly
difficulty
likes
dislikes
topicTags {
name
slug
}
stats
hints
}
}
}
""",
variables=leetcode.models.graphql_query_problemset_question_list_variables.GraphqlQueryProblemsetQuestionListVariables(
category_slug="",
limit=num_q,
skip=0,
filters=leetcode.models.graphql_query_problemset_question_list_variables_filter_input.GraphqlQueryProblemsetQuestionListVariablesFilterInput(
list_id=list_id
),
),
operation_name="problemsetQuestionList",
)
time.sleep(2) # Leetcode has a rate limiter
return api_instance.graphql_post(
body=graphql_request
).data.problemset_question_list.questions
def get_solution_tags(problem_slug: str):
query = """
query ugcArticleOfficialSolutionArticle($questionSlug: String!) {
ugcArticleOfficialSolutionArticle(questionSlug: $questionSlug) {
content
}
}
"""
graphql_request = {
"query": query,
"variables": {"questionSlug": problem_slug},
"operationName": "ugcArticleOfficialSolutionArticle",
}
data = api_instance.graphql_post(body=graphql_request, _preload_content=False)
if data:
data = data.json()["data"]["ugcArticleOfficialSolutionArticle"]
if not data or "content" not in data:
print(f"{problem_slug=} missing soln")
return None
rawmd = data["content"]
matches = [
m.split(":")[-1].strip().replace('"', "")
for m in re.findall(r"### Approach ([^<\n]*)", rawmd)
]
for m in matches:
if m and "<" in m:
print(problem_slug)
return matches
return None
def get_all_soln_strats(list_id):
all_text = []
for i, q in enumerate(get_questions(list_id)):
all_text.extend([t.name for t in q.topic_tags])
soln = get_solution_tags(q.title_slug)
if soln:
for s in soln:
all_text.append(s)
time.sleep(0.2)
return all_text
def cluster_by_soln_strat(list_id):
all_text = get_all_soln_strats(list_id)
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(all_text)
X_normalized = normalize(X)
clusterer = hdbscan.HDBSCAN(min_cluster_size=2, metric="euclidean")
cluster_labels = clusterer.fit_predict(X_normalized)
clusters = defaultdict(list)
for sentence, label in zip(all_text, cluster_labels):
clusters[label].append(sentence)
return clusters
fav_list_id = "2jvrtw0j"
clusters = cluster_by_soln_strat(fav_list_id)
pprint(clusters)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment