Skip to content

Instantly share code, notes, and snippets.

@tomhermans
Created February 8, 2026 16:24
Show Gist options
  • Select an option

  • Save tomhermans/db1f36de9cfdf24a01eec21c1c083dee to your computer and use it in GitHub Desktop.

Select an option

Save tomhermans/db1f36de9cfdf24a01eec21c1c083dee to your computer and use it in GitHub Desktop.
Bookmark CSV Converter (for Supabase (Instapaper/Pocket Format))
#!/usr/bin/env node
/**
* CSV Converter for Bookmark Import to Supabase
*
* This script converts bookmark exports (like Instapaper, Pocket, etc.)
* to a standardized CSV format compatible with Supabase database import.
*
* USAGE:
* node convert-csv-instapaper.js [input-file] [output-file]
*
* EXAMPLES:
* node convert-csv-instapaper.js my-export.csv converted.csv
* node convert-csv-instapaper.js # Uses default filenames
*
* INPUT FORMAT (Instapaper-style):
* Columns: URL, Title, Selection, Folder, Timestamp, Tags
* - URL: The bookmarked link
* - Title: Article/page title
* - Selection: Highlighted text from the article (optional)
* - Folder: "Unread", "Archive", or other folder names
* - Timestamp: Unix timestamp (seconds since 1970-01-01)
* - Tags: JSON array string like ["ai", "design"]
*
* OUTPUT FORMAT (Supabase-compatible):
* Columns: id, url, title, description, tags, category, favicon_url,
* is_read, is_archived, created_at, image_url, author, reading_time
*
* FIELD MAPPING:
* URL → url (direct mapping)
* Title → title (trimmed, whitespace normalized)
* Selection → description (highlighted text becomes description)
* Folder → is_read, is_archived (Unread=false/false, Archive=false/true, Other=true/false)
* Timestamp → created_at (Unix timestamp → ISO 8601 format)
* Tags → tags, category (JSON array → tags, first tag → category)
* (new) → id (generated UUID v4)
* (null) → favicon_url, image_url, author, reading_time (empty fields for future use)
*/
const fs = require("fs");
const { parse } = require("csv-parse/sync");
const { stringify } = require("csv-stringify/sync");
const { v4: uuidv4 } = require("uuid");
// ============================================================================
// CONFIGURATION
// ============================================================================
/**
* Default filenames if not provided via command line arguments
*/
const DEFAULT_INPUT = "Instapaper-Export-2026-02-08_13_29_18.csv";
const DEFAULT_OUTPUT = "new_instapaper_bookmarks.csv";
/**
* Get input/output filenames from command line or use defaults
* Usage: node script.js [input.csv] [output.csv]
*/
const inputFile = process.argv[2] || DEFAULT_INPUT;
const outputFile = process.argv[3] || DEFAULT_OUTPUT;
console.log(`📖 Reading from: ${inputFile}`);
console.log(`💾 Writing to: ${outputFile}\n`);
// ============================================================================
// TAG DETECTION CONFIGURATION
// ============================================================================
/**
* Tag detection keyword mapping
*
* This object maps canonical tag names to keyword patterns that should trigger
* that tag. The detection is case-insensitive and searches within the title.
*
* HOW IT WORKS:
* 1. For each bookmark, the title is converted to lowercase
* 2. Each keyword list is checked against the title
* 3. If ANY keyword matches, that tag is added (unless it already exists)
* 4. This helps auto-categorize bookmarks that don't have tags
*
* CUSTOMIZATION:
* Add your own tags and keywords here:
* "mytag": ["keyword1", "keyword2", "phrase to match"],
*/
const TAG_KEYWORDS = {
accessibility: ["accessibility", "a11y", "accessible"],
AI: ["ai", "artificial intelligence", "machine learning", "ml"],
animation: ["animation", "animate", "animated"],
astro: ["astro"],
auto: ["auto", "automotive", "car"],
brand: ["brand", "branding"],
business: ["business"],
color: ["color", "colour"],
css: ["css", "stylesheet"],
design: ["design", "designer"],
"design system": ["design system", "design-system"],
"design tokens": ["design token", "design-token"],
diy: ["diy", "do it yourself"],
electronics: ["electronics", "electronic"],
fonts: ["font", "fonts"],
gfx: ["graphic", "graphics", "gfx"],
health: ["health", "fitness", "medical"],
ideas: ["idea", "ideas"],
internet: ["internet"],
jobs: ["job", "jobs", "career"],
js: ["javascript", "js", "ecmascript"],
leven: ["leven"],
life: ["life"],
logo: ["logo"],
markdown: ["markdown", "md"],
nuxt: ["nuxt"],
performance: ["performance", "optimization", "speed"],
philosophy: ["philosophy", "philosophical"],
productivity: ["productivity", "productive"],
supabase: ["supabase"],
svelte: ["svelte"],
svg: ["svg", "scalable vector"],
tutorial: ["tutorial", "guide", "how to", "how-to"],
typo: ["typography", "typographic", "typeface", "font", "fonts"],
ux: ["ux", "user experience", "usability"],
vue: ["vue", "vuejs", "vue.js"],
webdev: ["web development", "web dev", "frontend", "backend", "fullstack"],
wordpress: ["wordpress", "wp"],
work: ["work", "workflow"],
wp: ["wordpress", "wp"],
};
// ============================================================================
// FILE READING & PARSING
// ============================================================================
/**
* Read the CSV file from disk
* This will throw an error if the file doesn't exist
*/
let oldCsvContent;
try {
oldCsvContent = fs.readFileSync(inputFile, "utf-8");
} catch (error) {
console.error(`❌ Error reading file: ${error.message}`);
process.exit(1);
}
/**
* Parse CSV into JavaScript objects
*
* OPTIONS:
* - columns: true → First row becomes object keys
* - skip_empty_lines: true → Ignore blank rows
* - trim: true → Remove whitespace from fields
* - quote: '"' → Handle quoted fields (for commas in data)
* - relax_quotes: true → Be lenient with malformed quotes
*/
const oldRecords = parse(oldCsvContent, {
columns: true,
skip_empty_lines: true,
trim: true,
quote: '"',
relax_quotes: true,
});
console.log(`📊 Found ${oldRecords.length} records to convert\n`);
// ============================================================================
// HELPER FUNCTIONS
// ============================================================================
/**
* Detect additional tags from title text
*
* This function scans the title for keywords and automatically adds
* relevant tags that weren't explicitly set in the original export.
*
* @param {string} title - The bookmark title to analyze
* @param {string[]} existingTags - Tags already assigned to this bookmark
* @returns {string[]} - Combined array of existing + detected tags
*
* ALGORITHM:
* 1. Convert title to lowercase for case-insensitive matching
* 2. Use a Set to avoid duplicate tags
* 3. For each tag in TAG_KEYWORDS:
* a. Skip if tag already exists (no duplicates)
* b. Check if ANY keyword for that tag appears in the title
* c. If match found, add the tag and stop checking other keywords
* 4. Return the final array of unique tags
*/
function detectTagsFromTitle(title, existingTags) {
const titleLower = title.toLowerCase();
const detectedTags = new Set(existingTags);
for (const [tag, keywords] of Object.entries(TAG_KEYWORDS)) {
// Skip if tag already exists
if (detectedTags.has(tag)) continue;
// Check if any keyword matches
for (const keyword of keywords) {
if (titleLower.includes(keyword.toLowerCase())) {
detectedTags.add(tag);
break; // Found a match, no need to check other keywords for this tag
}
}
}
return Array.from(detectedTags);
}
// ============================================================================
// DATA TRANSFORMATION
// ============================================================================
/**
* Transform each old record into the new format
*
* FIELD-BY-FIELD MAPPING:
*
* 1. id (NEW)
* - Generated fresh for each record using UUID v4
* - Format: xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx
*
* 2. url (Title → url)
* - Direct mapping from URL column
* - No transformation needed
*
* 3. title (Title → title)
* - Source: record.Title
* - Transformations:
* a. Replace all whitespace sequences (spaces, tabs, newlines) with single space
* b. Trim leading/trailing whitespace
* - Example: " Hello\n World " → "Hello World"
*
* 4. description (Selection → description)
* - Source: record.Selection (text user highlighted in Instapaper)
* - If empty, set to null
* - This field didn't exist in old format
*
* 5. tags (Tags → tags)
* - Source: record.Tags (JSON array string like '["ai","design"]')
* - Transformations:
* a. Parse JSON string into array
* b. Convert all tags to lowercase for consistency
* c. Run auto-detection to add tags based on title
* d. Convert back to JSON string for CSV storage
* - Example: '["AI","Design"]' → '["ai","design","webdev"]'
*
* 6. category (derived from tags)
* - Source: First tag in the tags array
* - Used as a primary category for the bookmark
* - If no tags, set to null
*
* 7. favicon_url (NEW)
* - Always null (to be populated later by your app)
*
* 8. is_read (Folder → is_read)
* - Source: record.Folder
* - Logic: If Folder === "Unread" → false, otherwise → true
* - Safety: Uses || false to guarantee non-null boolean
*
* 9. is_archived (Folder → is_archived)
* - Source: record.Folder
* - Logic: If Folder === "Archive" → true, otherwise → false
* - Safety: Uses || false to guarantee non-null boolean
*
* 10. created_at (Timestamp → created_at)
* - Source: record.Timestamp (Unix timestamp in seconds)
* - Transformations:
* a. Multiply by 1000 to convert seconds → milliseconds
* b. Create JavaScript Date object
* c. Convert to ISO 8601 string format
* - Example: 1636460629 → "2021-11-09T12:37:09.000Z"
*
* 11. image_url, author, reading_time (NEW)
* - All set to null
* - These fields don't exist in Instapaper export
* - Can be populated later by your app or API enrichment
*/
const newRecords = oldRecords.map((record) => {
// ----------------
// TITLE PROCESSING
// ----------------
// Clean up title by normalizing whitespace
const title = record.Title.replace(/\s+/g, " ").trim();
// ----------------
// DATE CONVERSION
// ----------------
// Convert Unix timestamp (seconds) to ISO 8601 datetime string
// Example: 1636460629 → "2021-11-09T12:37:09.000Z"
const createdAt = new Date(parseInt(record.Timestamp) * 1000).toISOString();
// ----------------
// TAG PROCESSING
// ----------------
let tagsArray = [];
if (record.Tags) {
try {
// Instapaper exports tags as JSON array string like ["AI","Design"]
tagsArray = JSON.parse(record.Tags);
// Normalize all tags to lowercase for consistency
tagsArray = tagsArray.map((tag) => tag.toLowerCase());
} catch (e) {
console.warn(`⚠️ Failed to parse tags for: ${title}`);
tagsArray = [];
}
}
// Run auto-detection to add tags based on title keywords
tagsArray = detectTagsFromTitle(title, tagsArray);
// ----------------
// STATUS MAPPING
// ----------------
// Map Instapaper folders to boolean read/archived states
// "Unread" → is_read=false, is_archived=false
// "Archive" → is_read=false, is_archived=true
// Anything else → is_read=true, is_archived=false
//
// The || false ensures we never get null/undefined
const isRead = record.Folder !== "Unread" || false;
const isArchived = record.Folder === "Archive" || false;
// ----------------
// BUILD NEW RECORD
// ----------------
return {
id: uuidv4(), // Generate new UUID
url: record.URL, // Direct mapping
title: title, // Cleaned title
description: record.Selection || null, // Highlighted text or null
tags: JSON.stringify(tagsArray), // Array → JSON string for CSV
category: tagsArray.length > 0 ? tagsArray[0] : null, // First tag
favicon_url: null, // To be populated later
is_read: isRead, // Folder-based boolean
is_archived: isArchived, // Folder-based boolean
created_at: createdAt, // Unix timestamp → ISO string
image_url: null, // To be populated later
author: null, // To be populated later
reading_time: null, // To be populated later
};
});
// ============================================================================
// CSV GENERATION
// ============================================================================
/**
* Convert JavaScript objects back to CSV format
*
* The 'columns' option ensures fields appear in this exact order
* in the output CSV, matching the Supabase table schema.
*/
const newCsvContent = stringify(newRecords, {
header: true,
columns: [
"id",
"url",
"title",
"description",
"tags",
"category",
"favicon_url",
"is_read",
"is_archived",
"created_at",
"image_url",
"author",
"reading_time",
],
});
// ============================================================================
// FILE WRITING & VALIDATION
// ============================================================================
/**
* Write the converted CSV to disk
*/
try {
fs.writeFileSync(outputFile, newCsvContent);
} catch (error) {
console.error(`❌ Error writing file: ${error.message}`);
process.exit(1);
}
// ============================================================================
// STATISTICS & VALIDATION
// ============================================================================
console.log(`✅ Converted ${newRecords.length} records`);
console.log(`💾 New CSV file created: ${outputFile}\n`);
// Count records that have at least one tag
const recordsWithDetectedTags = newRecords.filter((r) => {
const tags = JSON.parse(r.tags);
return tags.length > 0;
}).length;
console.log(`🏷️ Records with tags: ${recordsWithDetectedTags}`);
// Validate that all boolean fields are properly set (not null/undefined)
const nullBooleans = newRecords.filter(
(r) =>
r.is_read === null ||
r.is_read === undefined ||
r.is_archived === null ||
r.is_archived === undefined,
);
if (nullBooleans.length > 0) {
console.warn(
`⚠️ WARNING: ${nullBooleans.length} records have null boolean values!`,
);
} else {
console.log("✅ All boolean fields are non-null\n");
}
console.log("🎉 Conversion complete!\n");
console.log("📝 Next steps:");
console.log(" 1. Review the output file for accuracy");
console.log(" 2. Import to Supabase via Dashboard → Table Editor → Import CSV");
console.log(" 3. Verify data in your database");
console.log(" 4. Update is_read/is_archived defaults if needed:\n");
console.log(" UPDATE links SET is_archived = false WHERE is_archived IS NULL;");
console.log(" UPDATE links SET is_read = false WHERE is_read IS NULL;\n");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment