Created
February 8, 2026 16:24
-
-
Save tomhermans/db1f36de9cfdf24a01eec21c1c083dee to your computer and use it in GitHub Desktop.
Bookmark CSV Converter (for Supabase (Instapaper/Pocket Format))
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| /** | |
| * CSV Converter for Bookmark Import to Supabase | |
| * | |
| * This script converts bookmark exports (like Instapaper, Pocket, etc.) | |
| * to a standardized CSV format compatible with Supabase database import. | |
| * | |
| * USAGE: | |
| * node convert-csv-instapaper.js [input-file] [output-file] | |
| * | |
| * EXAMPLES: | |
| * node convert-csv-instapaper.js my-export.csv converted.csv | |
| * node convert-csv-instapaper.js # Uses default filenames | |
| * | |
| * INPUT FORMAT (Instapaper-style): | |
| * Columns: URL, Title, Selection, Folder, Timestamp, Tags | |
| * - URL: The bookmarked link | |
| * - Title: Article/page title | |
| * - Selection: Highlighted text from the article (optional) | |
| * - Folder: "Unread", "Archive", or other folder names | |
| * - Timestamp: Unix timestamp (seconds since 1970-01-01) | |
| * - Tags: JSON array string like ["ai", "design"] | |
| * | |
| * OUTPUT FORMAT (Supabase-compatible): | |
| * Columns: id, url, title, description, tags, category, favicon_url, | |
| * is_read, is_archived, created_at, image_url, author, reading_time | |
| * | |
| * FIELD MAPPING: | |
| * URL → url (direct mapping) | |
| * Title → title (trimmed, whitespace normalized) | |
| * Selection → description (highlighted text becomes description) | |
| * Folder → is_read, is_archived (Unread=false/false, Archive=false/true, Other=true/false) | |
| * Timestamp → created_at (Unix timestamp → ISO 8601 format) | |
| * Tags → tags, category (JSON array → tags, first tag → category) | |
| * (new) → id (generated UUID v4) | |
| * (null) → favicon_url, image_url, author, reading_time (empty fields for future use) | |
| */ | |
| const fs = require("fs"); | |
| const { parse } = require("csv-parse/sync"); | |
| const { stringify } = require("csv-stringify/sync"); | |
| const { v4: uuidv4 } = require("uuid"); | |
| // ============================================================================ | |
| // CONFIGURATION | |
| // ============================================================================ | |
| /** | |
| * Default filenames if not provided via command line arguments | |
| */ | |
| const DEFAULT_INPUT = "Instapaper-Export-2026-02-08_13_29_18.csv"; | |
| const DEFAULT_OUTPUT = "new_instapaper_bookmarks.csv"; | |
| /** | |
| * Get input/output filenames from command line or use defaults | |
| * Usage: node script.js [input.csv] [output.csv] | |
| */ | |
| const inputFile = process.argv[2] || DEFAULT_INPUT; | |
| const outputFile = process.argv[3] || DEFAULT_OUTPUT; | |
| console.log(`📖 Reading from: ${inputFile}`); | |
| console.log(`💾 Writing to: ${outputFile}\n`); | |
| // ============================================================================ | |
| // TAG DETECTION CONFIGURATION | |
| // ============================================================================ | |
| /** | |
| * Tag detection keyword mapping | |
| * | |
| * This object maps canonical tag names to keyword patterns that should trigger | |
| * that tag. The detection is case-insensitive and searches within the title. | |
| * | |
| * HOW IT WORKS: | |
| * 1. For each bookmark, the title is converted to lowercase | |
| * 2. Each keyword list is checked against the title | |
| * 3. If ANY keyword matches, that tag is added (unless it already exists) | |
| * 4. This helps auto-categorize bookmarks that don't have tags | |
| * | |
| * CUSTOMIZATION: | |
| * Add your own tags and keywords here: | |
| * "mytag": ["keyword1", "keyword2", "phrase to match"], | |
| */ | |
| const TAG_KEYWORDS = { | |
| accessibility: ["accessibility", "a11y", "accessible"], | |
| AI: ["ai", "artificial intelligence", "machine learning", "ml"], | |
| animation: ["animation", "animate", "animated"], | |
| astro: ["astro"], | |
| auto: ["auto", "automotive", "car"], | |
| brand: ["brand", "branding"], | |
| business: ["business"], | |
| color: ["color", "colour"], | |
| css: ["css", "stylesheet"], | |
| design: ["design", "designer"], | |
| "design system": ["design system", "design-system"], | |
| "design tokens": ["design token", "design-token"], | |
| diy: ["diy", "do it yourself"], | |
| electronics: ["electronics", "electronic"], | |
| fonts: ["font", "fonts"], | |
| gfx: ["graphic", "graphics", "gfx"], | |
| health: ["health", "fitness", "medical"], | |
| ideas: ["idea", "ideas"], | |
| internet: ["internet"], | |
| jobs: ["job", "jobs", "career"], | |
| js: ["javascript", "js", "ecmascript"], | |
| leven: ["leven"], | |
| life: ["life"], | |
| logo: ["logo"], | |
| markdown: ["markdown", "md"], | |
| nuxt: ["nuxt"], | |
| performance: ["performance", "optimization", "speed"], | |
| philosophy: ["philosophy", "philosophical"], | |
| productivity: ["productivity", "productive"], | |
| supabase: ["supabase"], | |
| svelte: ["svelte"], | |
| svg: ["svg", "scalable vector"], | |
| tutorial: ["tutorial", "guide", "how to", "how-to"], | |
| typo: ["typography", "typographic", "typeface", "font", "fonts"], | |
| ux: ["ux", "user experience", "usability"], | |
| vue: ["vue", "vuejs", "vue.js"], | |
| webdev: ["web development", "web dev", "frontend", "backend", "fullstack"], | |
| wordpress: ["wordpress", "wp"], | |
| work: ["work", "workflow"], | |
| wp: ["wordpress", "wp"], | |
| }; | |
| // ============================================================================ | |
| // FILE READING & PARSING | |
| // ============================================================================ | |
| /** | |
| * Read the CSV file from disk | |
| * This will throw an error if the file doesn't exist | |
| */ | |
| let oldCsvContent; | |
| try { | |
| oldCsvContent = fs.readFileSync(inputFile, "utf-8"); | |
| } catch (error) { | |
| console.error(`❌ Error reading file: ${error.message}`); | |
| process.exit(1); | |
| } | |
| /** | |
| * Parse CSV into JavaScript objects | |
| * | |
| * OPTIONS: | |
| * - columns: true → First row becomes object keys | |
| * - skip_empty_lines: true → Ignore blank rows | |
| * - trim: true → Remove whitespace from fields | |
| * - quote: '"' → Handle quoted fields (for commas in data) | |
| * - relax_quotes: true → Be lenient with malformed quotes | |
| */ | |
| const oldRecords = parse(oldCsvContent, { | |
| columns: true, | |
| skip_empty_lines: true, | |
| trim: true, | |
| quote: '"', | |
| relax_quotes: true, | |
| }); | |
| console.log(`📊 Found ${oldRecords.length} records to convert\n`); | |
| // ============================================================================ | |
| // HELPER FUNCTIONS | |
| // ============================================================================ | |
| /** | |
| * Detect additional tags from title text | |
| * | |
| * This function scans the title for keywords and automatically adds | |
| * relevant tags that weren't explicitly set in the original export. | |
| * | |
| * @param {string} title - The bookmark title to analyze | |
| * @param {string[]} existingTags - Tags already assigned to this bookmark | |
| * @returns {string[]} - Combined array of existing + detected tags | |
| * | |
| * ALGORITHM: | |
| * 1. Convert title to lowercase for case-insensitive matching | |
| * 2. Use a Set to avoid duplicate tags | |
| * 3. For each tag in TAG_KEYWORDS: | |
| * a. Skip if tag already exists (no duplicates) | |
| * b. Check if ANY keyword for that tag appears in the title | |
| * c. If match found, add the tag and stop checking other keywords | |
| * 4. Return the final array of unique tags | |
| */ | |
| function detectTagsFromTitle(title, existingTags) { | |
| const titleLower = title.toLowerCase(); | |
| const detectedTags = new Set(existingTags); | |
| for (const [tag, keywords] of Object.entries(TAG_KEYWORDS)) { | |
| // Skip if tag already exists | |
| if (detectedTags.has(tag)) continue; | |
| // Check if any keyword matches | |
| for (const keyword of keywords) { | |
| if (titleLower.includes(keyword.toLowerCase())) { | |
| detectedTags.add(tag); | |
| break; // Found a match, no need to check other keywords for this tag | |
| } | |
| } | |
| } | |
| return Array.from(detectedTags); | |
| } | |
| // ============================================================================ | |
| // DATA TRANSFORMATION | |
| // ============================================================================ | |
| /** | |
| * Transform each old record into the new format | |
| * | |
| * FIELD-BY-FIELD MAPPING: | |
| * | |
| * 1. id (NEW) | |
| * - Generated fresh for each record using UUID v4 | |
| * - Format: xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx | |
| * | |
| * 2. url (Title → url) | |
| * - Direct mapping from URL column | |
| * - No transformation needed | |
| * | |
| * 3. title (Title → title) | |
| * - Source: record.Title | |
| * - Transformations: | |
| * a. Replace all whitespace sequences (spaces, tabs, newlines) with single space | |
| * b. Trim leading/trailing whitespace | |
| * - Example: " Hello\n World " → "Hello World" | |
| * | |
| * 4. description (Selection → description) | |
| * - Source: record.Selection (text user highlighted in Instapaper) | |
| * - If empty, set to null | |
| * - This field didn't exist in old format | |
| * | |
| * 5. tags (Tags → tags) | |
| * - Source: record.Tags (JSON array string like '["ai","design"]') | |
| * - Transformations: | |
| * a. Parse JSON string into array | |
| * b. Convert all tags to lowercase for consistency | |
| * c. Run auto-detection to add tags based on title | |
| * d. Convert back to JSON string for CSV storage | |
| * - Example: '["AI","Design"]' → '["ai","design","webdev"]' | |
| * | |
| * 6. category (derived from tags) | |
| * - Source: First tag in the tags array | |
| * - Used as a primary category for the bookmark | |
| * - If no tags, set to null | |
| * | |
| * 7. favicon_url (NEW) | |
| * - Always null (to be populated later by your app) | |
| * | |
| * 8. is_read (Folder → is_read) | |
| * - Source: record.Folder | |
| * - Logic: If Folder === "Unread" → false, otherwise → true | |
| * - Safety: Uses || false to guarantee non-null boolean | |
| * | |
| * 9. is_archived (Folder → is_archived) | |
| * - Source: record.Folder | |
| * - Logic: If Folder === "Archive" → true, otherwise → false | |
| * - Safety: Uses || false to guarantee non-null boolean | |
| * | |
| * 10. created_at (Timestamp → created_at) | |
| * - Source: record.Timestamp (Unix timestamp in seconds) | |
| * - Transformations: | |
| * a. Multiply by 1000 to convert seconds → milliseconds | |
| * b. Create JavaScript Date object | |
| * c. Convert to ISO 8601 string format | |
| * - Example: 1636460629 → "2021-11-09T12:37:09.000Z" | |
| * | |
| * 11. image_url, author, reading_time (NEW) | |
| * - All set to null | |
| * - These fields don't exist in Instapaper export | |
| * - Can be populated later by your app or API enrichment | |
| */ | |
| const newRecords = oldRecords.map((record) => { | |
| // ---------------- | |
| // TITLE PROCESSING | |
| // ---------------- | |
| // Clean up title by normalizing whitespace | |
| const title = record.Title.replace(/\s+/g, " ").trim(); | |
| // ---------------- | |
| // DATE CONVERSION | |
| // ---------------- | |
| // Convert Unix timestamp (seconds) to ISO 8601 datetime string | |
| // Example: 1636460629 → "2021-11-09T12:37:09.000Z" | |
| const createdAt = new Date(parseInt(record.Timestamp) * 1000).toISOString(); | |
| // ---------------- | |
| // TAG PROCESSING | |
| // ---------------- | |
| let tagsArray = []; | |
| if (record.Tags) { | |
| try { | |
| // Instapaper exports tags as JSON array string like ["AI","Design"] | |
| tagsArray = JSON.parse(record.Tags); | |
| // Normalize all tags to lowercase for consistency | |
| tagsArray = tagsArray.map((tag) => tag.toLowerCase()); | |
| } catch (e) { | |
| console.warn(`⚠️ Failed to parse tags for: ${title}`); | |
| tagsArray = []; | |
| } | |
| } | |
| // Run auto-detection to add tags based on title keywords | |
| tagsArray = detectTagsFromTitle(title, tagsArray); | |
| // ---------------- | |
| // STATUS MAPPING | |
| // ---------------- | |
| // Map Instapaper folders to boolean read/archived states | |
| // "Unread" → is_read=false, is_archived=false | |
| // "Archive" → is_read=false, is_archived=true | |
| // Anything else → is_read=true, is_archived=false | |
| // | |
| // The || false ensures we never get null/undefined | |
| const isRead = record.Folder !== "Unread" || false; | |
| const isArchived = record.Folder === "Archive" || false; | |
| // ---------------- | |
| // BUILD NEW RECORD | |
| // ---------------- | |
| return { | |
| id: uuidv4(), // Generate new UUID | |
| url: record.URL, // Direct mapping | |
| title: title, // Cleaned title | |
| description: record.Selection || null, // Highlighted text or null | |
| tags: JSON.stringify(tagsArray), // Array → JSON string for CSV | |
| category: tagsArray.length > 0 ? tagsArray[0] : null, // First tag | |
| favicon_url: null, // To be populated later | |
| is_read: isRead, // Folder-based boolean | |
| is_archived: isArchived, // Folder-based boolean | |
| created_at: createdAt, // Unix timestamp → ISO string | |
| image_url: null, // To be populated later | |
| author: null, // To be populated later | |
| reading_time: null, // To be populated later | |
| }; | |
| }); | |
| // ============================================================================ | |
| // CSV GENERATION | |
| // ============================================================================ | |
| /** | |
| * Convert JavaScript objects back to CSV format | |
| * | |
| * The 'columns' option ensures fields appear in this exact order | |
| * in the output CSV, matching the Supabase table schema. | |
| */ | |
| const newCsvContent = stringify(newRecords, { | |
| header: true, | |
| columns: [ | |
| "id", | |
| "url", | |
| "title", | |
| "description", | |
| "tags", | |
| "category", | |
| "favicon_url", | |
| "is_read", | |
| "is_archived", | |
| "created_at", | |
| "image_url", | |
| "author", | |
| "reading_time", | |
| ], | |
| }); | |
| // ============================================================================ | |
| // FILE WRITING & VALIDATION | |
| // ============================================================================ | |
| /** | |
| * Write the converted CSV to disk | |
| */ | |
| try { | |
| fs.writeFileSync(outputFile, newCsvContent); | |
| } catch (error) { | |
| console.error(`❌ Error writing file: ${error.message}`); | |
| process.exit(1); | |
| } | |
| // ============================================================================ | |
| // STATISTICS & VALIDATION | |
| // ============================================================================ | |
| console.log(`✅ Converted ${newRecords.length} records`); | |
| console.log(`💾 New CSV file created: ${outputFile}\n`); | |
| // Count records that have at least one tag | |
| const recordsWithDetectedTags = newRecords.filter((r) => { | |
| const tags = JSON.parse(r.tags); | |
| return tags.length > 0; | |
| }).length; | |
| console.log(`🏷️ Records with tags: ${recordsWithDetectedTags}`); | |
| // Validate that all boolean fields are properly set (not null/undefined) | |
| const nullBooleans = newRecords.filter( | |
| (r) => | |
| r.is_read === null || | |
| r.is_read === undefined || | |
| r.is_archived === null || | |
| r.is_archived === undefined, | |
| ); | |
| if (nullBooleans.length > 0) { | |
| console.warn( | |
| `⚠️ WARNING: ${nullBooleans.length} records have null boolean values!`, | |
| ); | |
| } else { | |
| console.log("✅ All boolean fields are non-null\n"); | |
| } | |
| console.log("🎉 Conversion complete!\n"); | |
| console.log("📝 Next steps:"); | |
| console.log(" 1. Review the output file for accuracy"); | |
| console.log(" 2. Import to Supabase via Dashboard → Table Editor → Import CSV"); | |
| console.log(" 3. Verify data in your database"); | |
| console.log(" 4. Update is_read/is_archived defaults if needed:\n"); | |
| console.log(" UPDATE links SET is_archived = false WHERE is_archived IS NULL;"); | |
| console.log(" UPDATE links SET is_read = false WHERE is_read IS NULL;\n"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment