Skip to content

Instantly share code, notes, and snippets.

@ayoubzulfiqar
Created October 8, 2025 11:40
Show Gist options
  • Save ayoubzulfiqar/909959837b7578eb4770e9ae7b22380b to your computer and use it in GitHub Desktop.
Save ayoubzulfiqar/909959837b7578eb4770e9ae7b22380b to your computer and use it in GitHub Desktop.
Leetcode Dtata Scraper
import requests
import json
import time
import os
from typing import Dict, List, Any
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
class LeetCodeDataFetcher:
def __init__(self, max_workers=3):
self.base_url = "https://leetcode.com/graphql"
self.headers = {
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
self.session = requests.Session()
self.session.headers.update(self.headers)
self.max_workers = max_workers
self.lock = threading.Lock()
def fetch_problem_list(self) -> Dict[str, Any]:
"""Fetch the complete list of problems"""
query = """
query problemsetQuestionList {
problemsetQuestionList: questionList(
categorySlug: ""
limit: -1
skip: 0
filters: {}
) {
total: totalNum
questions: data {
acRate
difficulty
freqBar
frontendQuestionId: questionFrontendId
isFavor
paidOnly: isPaidOnly
status
title
titleSlug
topicTags {
name
slug
}
}
}
}
"""
try:
response = self.session.post(self.base_url, json={"query": query})
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching problem list: {e}")
return {"data": {"problemsetQuestionList": {"total": 0, "questions": []}}}
def fetch_single_problem(self, question: Dict[str, Any]) -> Dict[str, Any]:
"""Fetch detailed content for a single problem"""
try:
if question["paidOnly"]:
return {
"basic_info": question,
"detailed_content": None,
"is_paid": True,
"error": "Paid problem - no detailed content available"
}
query = """
query questionContent($titleSlug: String!) {
question(titleSlug: $titleSlug) {
content
codeDefinition
codeSnippets {
lang
langSlug
code
}
difficulty
frontendQuestionId: questionFrontendId
paidOnly: isPaidOnly
title
titleSlug
topicTags {
name
slug
}
hints
exampleTestcases
metaData
stats
similarQuestions
}
}
"""
variables = {"titleSlug": question["titleSlug"]}
response = self.session.post(self.base_url, json={"query": query, "variables": variables})
response.raise_for_status()
detailed_content = response.json()
return {
"basic_info": question,
"detailed_content": detailed_content["data"]["question"] if detailed_content["data"] else None,
"is_paid": False
}
except Exception as e:
return {
"basic_info": question,
"detailed_content": None,
"is_paid": question["paidOnly"],
"error": str(e)
}
def fetch_all_problems_concurrent(self) -> Dict[str, Any]:
"""Fetch all problems concurrently"""
print("Fetching problem list...")
problem_list = self.fetch_problem_list()
all_problems = {
"metadata": {
"total_problems": problem_list["data"]["problemsetQuestionList"]["total"],
"fetch_time": time.strftime("%Y-%m-%d %H:%M:%S"),
"method": "concurrent_fetch"
},
"problems": []
}
questions = problem_list["data"]["problemsetQuestionList"]["questions"]
total = len(questions)
print(f"Fetching detailed content for {total} problems concurrently (max {self.max_workers} workers)...")
completed = 0
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# Submit all tasks
future_to_question = {
executor.submit(self.fetch_single_problem, question): question
for question in questions
}
# Collect results as they complete
for future in as_completed(future_to_question):
result = future.result()
with self.lock:
all_problems["problems"].append(result)
completed += 1
# Print progress
question_title = result["basic_info"]["title"]
if result.get("is_paid"):
status = "PAID"
elif "error" in result:
status = "ERROR"
else:
status = "SUCCESS"
print(f"Progress: {completed}/{total} - {question_title} [{status}]")
# Small delay to be respectful
time.sleep(0.1)
return all_problems
def main():
print("Starting LeetCode data fetch...")
# Initialize fetcher
fetcher = LeetCodeDataFetcher(max_workers=3) # Conservative rate limiting
# Fetch all data
all_data = fetcher.fetch_all_problems_concurrent()
# Save complete data
output_file = "leetcode_complete_data.json"
with open(output_file, "w", encoding="utf-8") as f:
json.dump(all_data, f, ensure_ascii=False, indent=2)
print(f"\nComplete data saved to {output_file}")
# Create enhanced summary
free_problems = [p for p in all_data["problems"] if not p.get("is_paid") and "error" not in p]
paid_problems = [p for p in all_data["problems"] if p.get("is_paid")]
failed_fetches = [p for p in all_data["problems"] if "error" in p and not p.get("is_paid")]
summary = {
"metadata": all_data["metadata"],
"summary": {
"total_problems": len(all_data["problems"]),
"free_problems_with_content": len(free_problems),
"paid_problems": len(paid_problems),
"failed_free_problems": len(failed_fetches),
"difficulties": {},
"tags": {},
"stats": {
"easy_count": 0,
"medium_count": 0,
"hard_count": 0
}
}
}
# Count by difficulty and tags for free problems only
for problem in free_problems:
if problem.get("basic_info"):
difficulty = problem["basic_info"]["difficulty"]
summary["summary"]["difficulties"][difficulty] = summary["summary"]["difficulties"].get(difficulty, 0) + 1
# Update stats by difficulty
if difficulty.lower() == "easy":
summary["summary"]["stats"]["easy_count"] += 1
elif difficulty.lower() == "medium":
summary["summary"]["stats"]["medium_count"] += 1
elif difficulty.lower() == "hard":
summary["summary"]["stats"]["hard_count"] += 1
# Count tags
if problem["basic_info"]["topicTags"]:
for tag in problem["basic_info"]["topicTags"]:
tag_name = tag["name"]
summary["summary"]["tags"][tag_name] = summary["summary"]["tags"].get(tag_name, 0) + 1
summary_file = "leetcode_summary.json"
with open(summary_file, "w", encoding="utf-8") as f:
json.dump(summary, f, ensure_ascii=False, indent=2)
print(f"Summary saved to {summary_file}")
# Print corrected final stats
print("\nFinal Statistics:")
print(f"- Total problems: {summary['summary']['total_problems']}")
print(f"- Free problems with content: {summary['summary']['free_problems_with_content']}")
print(f"- Paid problems: {summary['summary']['paid_problems']}")
print(f"- Failed free problem fetches: {summary['summary']['failed_free_problems']}")
print(f"- Difficulty breakdown:")
print(f" - Easy: {summary['summary']['stats']['easy_count']}")
print(f" - Medium: {summary['summary']['medium_count']}")
print(f" - Hard: {summary['summary']['hard_count']}")
# Save separate files for easier GitHub Pages access
# Free problems only
free_problems_data = {
"metadata": all_data["metadata"],
"problems": free_problems
}
with open("leetcode_free_problems.json", "w", encoding="utf-8") as f:
json.dump(free_problems_data, f, ensure_ascii=False, indent=2)
print("Free problems data saved to leetcode_free_problems.json")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment