Skip to content

Instantly share code, notes, and snippets.

@sametcn99
Created June 11, 2025 08:42
Show Gist options
  • Select an option

  • Save sametcn99/660f2e9498009679744d96ce784bd451 to your computer and use it in GitHub Desktop.

Select an option

Save sametcn99/660f2e9498009679744d96ce784bd451 to your computer and use it in GitHub Desktop.
sql query builder
/**
* Database schema types for SQL Lab
*/
export type DatabaseColumn = {
column_name: string
data_type: string
is_nullable: boolean
column_default?: string
column_comment?: string
ordinal_position?: number
}
export type DatabaseConstraint = {
constraint_name: string
constraint_type: string
column_name?: string
foreign_table_name?: string
foreign_column_name?: string
}
export type DatabaseTable = {
table_name: string
schema_name: string
table_type: string
table_comment?: string
columns: DatabaseColumn[]
constraints?: DatabaseConstraint[]
estimated_row_count?: number | null
total_size_bytes?: number | null
table_size_bytes?: number | null
}
export type DatabaseSchema = {
schema_name: string
tables: DatabaseTable[]
}
export type DatabaseSchemaResponse = {
schemas: DatabaseSchema[]
total_tables: number
total_schemas: number
}
/**
* SQL Query Builder utility functions for generating common SQL patterns
*/
export class SqlQueryBuilder {
/**
* Generate a basic SELECT query for a table
*/
static generateSelectQuery(table: DatabaseTable, limit = 100): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
return `-- Select all columns from ${table.table_name}\nSELECT *\nFROM ${tableName}\nLIMIT ${limit};`
}
/**
* Generate SELECT query with specific columns
*/
static generateSelectSpecificColumnsQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const columns = table.columns
.slice(0, 5)
.map(col => col.column_name)
.join(',\n ')
return `-- Select specific columns from ${table.table_name}\nSELECT \n ${columns}\nFROM ${tableName}\nLIMIT 100;`
}
/**
* Generate SELECT DISTINCT query
*/
static generateSelectDistinctQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const textColumns = table.columns.filter(col =>
['character varying', 'varchar', 'text', 'char'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
if (textColumns.length === 0) {
return `-- No text columns found in ${table.table_name}\n-- Cannot generate DISTINCT query`
}
const column = textColumns[0].column_name
return `-- Select distinct values from ${table.table_name}\nSELECT DISTINCT ${column},\n COUNT(*) as frequency\nFROM ${tableName}\nGROUP BY ${column}\nORDER BY frequency DESC\nLIMIT 50;`
}
/**
* Generate GROUP BY aggregation query
*/
static generateGroupByQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const textColumns = table.columns.filter(col =>
['character varying', 'varchar', 'text', 'char'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
const numericColumns = table.columns.filter(col =>
['integer', 'bigint', 'decimal', 'numeric', 'real', 'double precision', 'smallint'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
if (textColumns.length === 0) {
return `-- No text columns found in ${table.table_name}\n-- Cannot generate GROUP BY query`
}
const groupByColumn = textColumns[0].column_name
let selectClause = ` ${groupByColumn},\n COUNT(*) as record_count`
if (numericColumns.length > 0) {
const numericCol = numericColumns[0].column_name
selectClause += `,\n AVG(${numericCol}) as avg_${numericCol},\n SUM(${numericCol}) as sum_${numericCol}`
}
return `-- Group by analysis for ${table.table_name}\nSELECT \n${selectClause}\nFROM ${tableName}\nGROUP BY ${groupByColumn}\nORDER BY record_count DESC\nLIMIT 20;`
}
/**
* Generate JOIN query template
*/
static generateJoinQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const foreignKeyColumns = table.columns.filter(
col => col.column_name.toLowerCase().includes('_id') || col.column_name.toLowerCase().endsWith('id')
)
if (foreignKeyColumns.length === 0) {
return `-- No apparent foreign key columns found in ${table.table_name}\n-- Manual JOIN example:\nSELECT \n a.*,\n b.column_name\nFROM ${tableName} a\nINNER JOIN other_table b ON a.foreign_key_id = b.id\nLIMIT 100;`
}
const fkColumn = foreignKeyColumns[0].column_name
const relatedTable = fkColumn.replace('_id', '').replace('Id', '')
return `-- JOIN query example for ${table.table_name}\nSELECT \n a.*,\n b.* -- Replace with specific columns\nFROM ${tableName} a\nINNER JOIN ${relatedTable} b ON a.${fkColumn} = b.id\nLIMIT 100;\n\n-- Alternative LEFT JOIN:\n-- LEFT JOIN ${relatedTable} b ON a.${fkColumn} = b.id`
}
/**
* Generate window function query
*/
static generateWindowFunctionQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const numericColumns = table.columns.filter(col =>
['integer', 'bigint', 'decimal', 'numeric', 'real', 'double precision', 'smallint'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
const timestampColumns = table.columns.filter(
col =>
['timestamp', 'timestamptz', 'date'].some(type => col.data_type.toLowerCase().includes(type.toLowerCase())) ||
col.column_name.toLowerCase().includes('time') ||
col.column_name.toLowerCase().includes('date') ||
col.column_name.toLowerCase().includes('created')
)
if (numericColumns.length === 0) {
return `-- No numeric columns found in ${table.table_name}\n-- Cannot generate window function query`
}
const numericCol = numericColumns[0].column_name
const orderByColumn = timestampColumns.length > 0 ? timestampColumns[0].column_name : numericCol
return `-- Window function analysis for ${table.table_name}\nSELECT \n *,\n ROW_NUMBER() OVER (ORDER BY ${orderByColumn}) as row_num,\n RANK() OVER (ORDER BY ${numericCol} DESC) as rank_by_${numericCol},\n LAG(${numericCol}) OVER (ORDER BY ${orderByColumn}) as previous_${numericCol},\n AVG(${numericCol}) OVER (ORDER BY ${orderByColumn} ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as moving_avg\nFROM ${tableName}\nORDER BY ${orderByColumn}\nLIMIT 100;`
}
/**
* Generate subquery example
*/
static generateSubqueryExample(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const numericColumns = table.columns.filter(col =>
['integer', 'bigint', 'decimal', 'numeric', 'real', 'double precision', 'smallint'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
if (numericColumns.length === 0) {
return `-- No numeric columns found in ${table.table_name}\n-- Basic subquery example:\nSELECT *\nFROM ${tableName}\nWHERE id IN (\n SELECT id \n FROM ${tableName} \n LIMIT 10\n);`
}
const numericCol = numericColumns[0].column_name
return `-- Subquery examples for ${table.table_name}\n-- Records above average:\nSELECT *\nFROM ${tableName}\nWHERE ${numericCol} > (\n SELECT AVG(${numericCol}) \n FROM ${tableName}\n)\nORDER BY ${numericCol} DESC\nLIMIT 50;\n\n-- Top 10% by value:\n-- SELECT *\n-- FROM ${tableName}\n-- WHERE ${numericCol} >= (\n-- SELECT PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY ${numericCol})\n-- FROM ${tableName}\n-- );`
}
/**
* Generate date range filtering query
*/
static generateDateRangeQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const timestampColumns = table.columns.filter(
col =>
['timestamp', 'timestamptz', 'date'].some(type => col.data_type.toLowerCase().includes(type.toLowerCase())) ||
col.column_name.toLowerCase().includes('time') ||
col.column_name.toLowerCase().includes('date') ||
col.column_name.toLowerCase().includes('created') ||
col.column_name.toLowerCase().includes('updated')
)
if (timestampColumns.length === 0) {
return `-- No date/timestamp columns found in ${table.table_name}\n-- Cannot generate date range query`
}
const dateColumn = timestampColumns[0].column_name
return `-- Date range filtering for ${table.table_name}\n-- Last 7 days:\nSELECT *\nFROM ${tableName}\nWHERE ${dateColumn} >= NOW() - INTERVAL '7 days'\nORDER BY ${dateColumn} DESC\nLIMIT 1000;\n\n-- Specific date range:\n-- SELECT *\n-- FROM ${tableName}\n-- WHERE ${dateColumn} BETWEEN '2024-01-01' AND '2024-12-31'\n-- ORDER BY ${dateColumn};\n\n-- Today's records:\n-- SELECT *\n-- FROM ${tableName}\n-- WHERE DATE(${dateColumn}) = CURRENT_DATE;`
}
/**
* Generate NULL value analysis query
*/
static generateNullAnalysisQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const nullableColumns = table.columns.filter(col => col.is_nullable === true).slice(0, 10)
if (nullableColumns.length === 0) {
return `-- No nullable columns found in ${table.table_name}\n-- All columns appear to be NOT NULL`
}
const nullChecks = nullableColumns
.map(col => ` SUM(CASE WHEN ${col.column_name} IS NULL THEN 1 ELSE 0 END) as null_${col.column_name}`)
.join(',\n')
return `-- NULL value analysis for ${table.table_name}\nSELECT \n COUNT(*) as total_rows,\n${nullChecks}\nFROM ${tableName};\n\n-- Records with NULL values:\n-- SELECT *\n-- FROM ${tableName}\n-- WHERE ${nullableColumns[0].column_name} IS NULL\n-- LIMIT 100;`
}
/**
* Generate duplicate detection query
*/
static generateDuplicateDetectionQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const textColumns = table.columns
.filter(col =>
['character varying', 'varchar', 'text', 'char'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
.slice(0, 3)
if (textColumns.length === 0) {
return `-- No text columns found in ${table.table_name}\n-- Cannot generate duplicate detection query`
}
const groupColumns = textColumns.map(col => col.column_name).join(', ')
const selectColumns = textColumns.map(col => col.column_name).join(',\n ')
return `-- Duplicate detection for ${table.table_name}\nSELECT \n ${selectColumns},\n COUNT(*) as duplicate_count\nFROM ${tableName}\nGROUP BY ${groupColumns}\nHAVING COUNT(*) > 1\nORDER BY duplicate_count DESC\nLIMIT 100;\n\n-- Show actual duplicate records:\n-- WITH duplicates AS (\n-- SELECT ${groupColumns}\n-- FROM ${tableName}\n-- GROUP BY ${groupColumns}\n-- HAVING COUNT(*) > 1\n-- )\n-- SELECT t.*\n-- FROM ${tableName} t\n-- INNER JOIN duplicates d ON (${textColumns.map(col => `t.${col.column_name} = d.${col.column_name}`).join(' AND ')});`
}
/**
* Generate pagination query
*/
static generatePaginationQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const primaryKeyColumns = table.columns.filter(
col => col.column_name.toLowerCase().includes('id') || col.column_name.toLowerCase() === 'pk'
)
const orderByColumn =
primaryKeyColumns.length > 0 ? primaryKeyColumns[0].column_name : table.columns[0]?.column_name || 'id'
return `-- Pagination examples for ${table.table_name}\n-- Page 1 (first 20 records):\nSELECT *\nFROM ${tableName}\nORDER BY ${orderByColumn}\nLIMIT 20 OFFSET 0;\n\n-- Page 2 (next 20 records):\n-- SELECT *\n-- FROM ${tableName}\n-- ORDER BY ${orderByColumn}\n-- LIMIT 20 OFFSET 20;\n\n-- More efficient cursor-based pagination:\n-- SELECT *\n-- FROM ${tableName}\n-- WHERE ${orderByColumn} > 'last_seen_id'\n-- ORDER BY ${orderByColumn}\n-- LIMIT 20;`
}
/**
* Generate data quality check query
*/
static generateDataQualityQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const textColumns = table.columns
.filter(col =>
['character varying', 'varchar', 'text', 'char'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
.slice(0, 3)
const numericColumns = table.columns
.filter(col =>
['integer', 'bigint', 'decimal', 'numeric', 'real', 'double precision', 'smallint'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
.slice(0, 2)
let qualityChecks = [' COUNT(*) as total_records']
textColumns.forEach(col => {
qualityChecks.push(
` SUM(CASE WHEN ${col.column_name} = '' OR ${col.column_name} IS NULL THEN 1 ELSE 0 END) as empty_${col.column_name}`
)
qualityChecks.push(` AVG(LENGTH(${col.column_name})) as avg_length_${col.column_name}`)
})
numericColumns.forEach(col => {
qualityChecks.push(` SUM(CASE WHEN ${col.column_name} < 0 THEN 1 ELSE 0 END) as negative_${col.column_name}`)
qualityChecks.push(` SUM(CASE WHEN ${col.column_name} IS NULL THEN 1 ELSE 0 END) as null_${col.column_name}`)
})
return `-- Data quality analysis for ${table.table_name}\nSELECT \n${qualityChecks.join(',\n')}\nFROM ${tableName};\n\n-- Detailed quality issues:\n-- SELECT *\n-- FROM ${tableName}\n-- WHERE ${textColumns.length > 0 ? `(${textColumns[0].column_name} = '' OR ${textColumns[0].column_name} IS NULL)` : '1=1'}\n-- LIMIT 100;`
}
/**
* Generate a query to show table structure
*/
static generateDescribeQuery(table: DatabaseTable): string {
return `-- Describe table structure for ${table.table_name}\nSELECT \n column_name,\n data_type,\n is_nullable,\n column_default\nFROM information_schema.columns\nWHERE table_schema = '${table.schema_name}' \n AND table_name = '${table.table_name}'\nORDER BY ordinal_position;`
}
/**
* Generate a query to count rows in a table
*/
static generateCountQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
return `-- Count rows in ${table.table_name}\nSELECT COUNT(*) as total_rows\nFROM ${tableName};`
}
/**
* Generate a query to show sample data with data types
*/
static generateSampleWithTypesQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const columns = table.columns.slice(0, 10) // Limit to first 10 columns to avoid wide results
const columnSelections = columns
.map((col: DatabaseColumn) => ` ${col.column_name} -- ${col.data_type}${col.is_nullable ? ' (nullable)' : ''}`)
.join(',\n')
return `-- Sample data from ${table.table_name} with column types\nSELECT \n${columnSelections}\nFROM ${tableName}\nLIMIT 10;`
}
/**
* Generate aggregate queries for numeric columns
*/
static generateAggregateQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const numericColumns = table.columns.filter((col: DatabaseColumn) =>
['integer', 'bigint', 'decimal', 'numeric', 'real', 'double precision', 'smallint'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
if (numericColumns.length === 0) {
return `-- No numeric columns found in ${table.table_name}\n-- Cannot generate aggregate query`
}
const aggregations = numericColumns
.slice(0, 5)
.map(
(col: DatabaseColumn) =>
` AVG(${col.column_name}) as avg_${col.column_name},\n MIN(${col.column_name}) as min_${col.column_name},\n MAX(${col.column_name}) as max_${col.column_name}`
)
.join(',\n')
return `-- Aggregate statistics for numeric columns in ${table.table_name}\nSELECT \n COUNT(*) as total_rows,\n${aggregations}\nFROM ${tableName};`
}
/**
* Generate a time-series query for tables with timestamp columns
*/
static generateTimeSeriesQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const timestampColumns = table.columns.filter(
(col: DatabaseColumn) =>
['timestamp', 'timestamptz', 'date', 'time'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
) ||
col.column_name.toLowerCase().includes('time') ||
col.column_name.toLowerCase().includes('date') ||
col.column_name.toLowerCase().includes('created') ||
col.column_name.toLowerCase().includes('updated')
)
if (timestampColumns.length === 0) {
return `-- No timestamp columns found in ${table.table_name}\n-- Cannot generate time-series query`
}
const timestampCol = timestampColumns[0].column_name
const numericColumns = table.columns.filter((col: DatabaseColumn) =>
['integer', 'bigint', 'decimal', 'numeric', 'real', 'double precision', 'smallint'].some(type =>
col.data_type.toLowerCase().includes(type.toLowerCase())
)
)
let selectColumns = ` DATE_TRUNC('hour', ${timestampCol}) as time_bucket,\n COUNT(*) as record_count`
if (numericColumns.length > 0) {
const firstNumericCol = numericColumns[0].column_name
selectColumns += `,\n AVG(${firstNumericCol}) as avg_${firstNumericCol}`
}
return `-- Time-series analysis for ${table.table_name}\nSELECT \n${selectColumns}\nFROM ${tableName}\nWHERE ${timestampCol} >= NOW() - INTERVAL '24 hours'\nGROUP BY DATE_TRUNC('hour', ${timestampCol})\nORDER BY time_bucket DESC\nLIMIT 24;`
}
/**
* Insert a column name into query at cursor position
*/
static insertColumnName(table: DatabaseTable, column: DatabaseColumn): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
return `${tableName}.${column.column_name}`
}
/**
* Generate INSERT query template
*/
static generateInsertQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const columns = table.columns
.filter((col: DatabaseColumn) => !col.column_default?.includes('nextval')) // Skip auto-increment columns
.slice(0, 10) // Limit columns for readability
const columnNames = columns.map((col: DatabaseColumn) => col.column_name).join(',\n ')
const placeholderValues = columns
.map((col: DatabaseColumn) => {
switch (col.data_type.toLowerCase()) {
case 'character varying':
case 'varchar':
case 'text':
case 'char':
return "'sample_text'"
case 'integer':
case 'bigint':
case 'smallint':
return '0'
case 'boolean':
return 'true'
case 'timestamp':
case 'timestamptz':
return 'NOW()'
case 'date':
return 'CURRENT_DATE'
case 'decimal':
case 'numeric':
case 'real':
case 'double precision':
return '0.0'
default:
return 'NULL'
}
})
.join(',\n ')
return `-- Insert sample data into ${table.table_name}\nINSERT INTO ${tableName} (\n ${columnNames}\n) VALUES (\n ${placeholderValues}\n);`
}
/**
* Generate UPDATE query template
*/
static generateUpdateQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const primaryKeyColumns = table.columns.filter(
(col: DatabaseColumn) => col.column_name.toLowerCase().includes('id') || col.column_name.toLowerCase() === 'pk'
)
const updateableColumns = table.columns
.filter((col: DatabaseColumn) => !col.column_default?.includes('nextval') && !primaryKeyColumns.includes(col))
.slice(0, 3) // Limit for readability
const setClause = updateableColumns
.map((col: DatabaseColumn) => {
switch (col.data_type.toLowerCase()) {
case 'character varying':
case 'varchar':
case 'text':
case 'char':
return ` ${col.column_name} = 'new_value'`
case 'integer':
case 'bigint':
case 'smallint':
return ` ${col.column_name} = 1`
case 'boolean':
return ` ${col.column_name} = true`
case 'timestamp':
case 'timestamptz':
return ` ${col.column_name} = NOW()`
default:
return ` ${col.column_name} = NULL`
}
})
.join(',\n')
const whereColumn = primaryKeyColumns[0]?.column_name || table.columns[0]?.column_name || 'id'
return `-- Update data in ${table.table_name}\nUPDATE ${tableName}\nSET \n${setClause}\nWHERE ${whereColumn} = 1; -- Replace with actual condition`
}
/**
* Generate DELETE query template
*/
static generateDeleteQuery(table: DatabaseTable): string {
const tableName = table.schema_name === 'public' ? table.table_name : `${table.schema_name}.${table.table_name}`
const primaryKeyColumns = table.columns.filter(
(col: DatabaseColumn) => col.column_name.toLowerCase().includes('id') || col.column_name.toLowerCase() === 'pk'
)
const whereColumn = primaryKeyColumns[0]?.column_name || table.columns[0]?.column_name || 'id'
return `-- Delete data from ${table.table_name}\n-- WARNING: This will permanently delete data!\nDELETE FROM ${tableName}\nWHERE ${whereColumn} = 1; -- Replace with actual condition\n\n-- To see what would be deleted first, run:\n-- SELECT * FROM ${tableName} WHERE ${whereColumn} = 1;`
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment