Last active
September 11, 2025 16:18
-
-
Save 13rac1/2a8637d025d406bc6ddc5117a4400539 to your computer and use it in GitHub Desktop.
ChatGPT to Markdown with Tables and Lists - 1. Open a conversation in ChatGPT 2. Open DevTools → Console 3. Paste contents of exporter-markdown.js 4. Hit Enter — file will download. Quick fork of https://github.com/rashidazarang/chatgpt-chat-exporter to add tables/lists
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (() => { | |
| function formatDate(date = new Date()) { | |
| return date.toISOString().split('T')[0]; | |
| } | |
| function cleanMarkdown(text) { | |
| return text | |
| // Only escape backslashes that aren't already escaping something | |
| .replace(/\\(?![\*_`])/g, '\\\\') | |
| // Clean up excessive newlines | |
| .replace(/\n{4,}/g, '\n\n') | |
| // Remove any HTML entities that might have leaked through | |
| .replace(/</g, '<') | |
| .replace(/>/g, '>') | |
| .replace(/&/g, '&'); | |
| } | |
| function convertTableToMarkdown(table) { | |
| const rows = table.querySelectorAll('tr'); | |
| if (rows.length === 0) return ''; | |
| const markdownRows = []; | |
| // Process each row | |
| rows.forEach((row, rowIndex) => { | |
| const cells = row.querySelectorAll('td, th'); | |
| if (cells.length === 0) return; | |
| const cellContents = Array.from(cells).map(cell => { | |
| // Clean cell content and escape pipes | |
| return cell.innerText.trim().replace(/\|/g, '\\|'); | |
| }); | |
| // Create markdown row | |
| markdownRows.push(`| ${cellContents.join(' | ')} |`); | |
| // Add header separator after first row if it contains th elements | |
| if (rowIndex === 0 && row.querySelectorAll('th').length > 0) { | |
| const separator = Array(cellContents.length).fill('---'); | |
| markdownRows.push(`| ${separator.join(' | ')} |`); | |
| } | |
| }); | |
| // If no header row was detected but we have data, add separator after first row | |
| if (rows.length > 1 && !rows[0].querySelector('th')) { | |
| const firstRowCells = rows[0].querySelectorAll('td, th').length; | |
| if (firstRowCells > 0) { | |
| const separator = Array(firstRowCells).fill('---'); | |
| markdownRows.splice(1, 0, `| ${separator.join(' | ')} |`); | |
| } | |
| } | |
| // Add newlines before and after table for proper rendering | |
| return '\n\n' + markdownRows.join('\n') + '\n\n'; | |
| } | |
| function htmlToMarkdownText(element) { | |
| // Get the HTML and convert structural elements to newlines | |
| let html = element.innerHTML; | |
| // Replace div and paragraph endings with double newlines | |
| html = html.replace(/<\/div>\s*/gi, '\n\n'); | |
| html = html.replace(/<\/p>\s*/gi, '\n\n'); | |
| // Replace br tags with single newlines | |
| html = html.replace(/<br\s*\/?>(\s*)/gi, '\n'); | |
| // Replace heading tags (in case any weren't processed) | |
| html = html.replace(/<\/h[1-6]>\s*/gi, '\n'); | |
| // Replace list item endings | |
| html = html.replace(/<\/li>\s*/gi, '\n'); | |
| // Remove remaining HTML tags | |
| html = html.replace(/<[^>]+>/g, ''); | |
| // Decode HTML entities | |
| html = html.replace(/</g, '<') | |
| .replace(/>/g, '>') | |
| .replace(/&/g, '&') | |
| .replace(/ /g, ' '); | |
| return html.trim(); | |
| } | |
| function processMessageContent(element) { | |
| const clone = element.cloneNode(true); | |
| // Remove UI elements that shouldn't be in the export | |
| clone.querySelectorAll('button, svg, [class*="copy"], [class*="edit"], [class*="regenerate"]').forEach(el => el.remove()); | |
| // Convert headers to markdown headers FIRST - one newline before, two after | |
| clone.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(header => { | |
| const level = parseInt(header.tagName.substring(1)); | |
| const headerText = header.innerText.trim(); | |
| const markdownHeader = document.createTextNode(`\n${'#'.repeat(level)} ${headerText}\n`); | |
| header.parentNode.replaceChild(markdownHeader, header); | |
| }); | |
| // Process tables BEFORE converting to text | |
| clone.querySelectorAll('table').forEach(table => { | |
| const markdownTable = convertTableToMarkdown(table); | |
| const tablePlaceholder = document.createTextNode(markdownTable); | |
| table.parentNode.replaceChild(tablePlaceholder, table); | |
| }); | |
| // Replace <pre><code> blocks with proper markdown | |
| clone.querySelectorAll('pre').forEach(pre => { | |
| const code = pre.innerText.trim(); | |
| const langMatch = pre.querySelector('code')?.className?.match(/language-([a-zA-Z0-9]+)/); | |
| const lang = langMatch ? langMatch[1] : ''; | |
| const codeBlock = document.createTextNode(`\n\`\`\`${lang}\n${code}\n\`\`\`\n\n`); | |
| pre.parentNode.replaceChild(codeBlock, pre); | |
| }); | |
| // Handle inline code spans | |
| clone.querySelectorAll('code:not(pre code)').forEach(code => { | |
| const inlineCode = document.createTextNode(`\`${code.innerText}\``); | |
| code.parentNode.replaceChild(inlineCode, code); | |
| }); | |
| // Convert bold and italic formatting | |
| clone.querySelectorAll('strong, b').forEach(bold => { | |
| const boldText = document.createTextNode(`**${bold.innerText}**`); | |
| bold.parentNode.replaceChild(boldText, bold); | |
| }); | |
| clone.querySelectorAll('em, i').forEach(italic => { | |
| const italicText = document.createTextNode(`*${italic.innerText}*`); | |
| italic.parentNode.replaceChild(italicText, italic); | |
| }); | |
| // Convert lists to markdown | |
| clone.querySelectorAll('ul').forEach(ul => { | |
| const listItems = ul.querySelectorAll('li'); | |
| const markdownList = Array.from(listItems).map(li => `- ${li.innerText.trim()}`).join('\n'); | |
| const listPlaceholder = document.createTextNode(`\n${markdownList}\n\n`); | |
| ul.parentNode.replaceChild(listPlaceholder, ul); | |
| }); | |
| clone.querySelectorAll('ol').forEach(ol => { | |
| const listItems = ol.querySelectorAll('li'); | |
| const markdownList = Array.from(listItems).map((li, index) => `${index + 1}. ${li.innerText.trim()}`).join('\n'); | |
| const listPlaceholder = document.createTextNode(`\n${markdownList}\n\n`); | |
| ol.parentNode.replaceChild(listPlaceholder, ol); | |
| }); | |
| // Handle blockquotes | |
| clone.querySelectorAll('blockquote').forEach(quote => { | |
| const quotedText = quote.innerText.trim().split('\n').map(line => `> ${line}`).join('\n'); | |
| const quotePlaceholder = document.createTextNode(`\n${quotedText}\n\n`); | |
| quote.parentNode.replaceChild(quotePlaceholder, quote); | |
| }); | |
| // Replace images and canvas with placeholders | |
| clone.querySelectorAll('img, canvas').forEach(el => { | |
| const alt = el.alt || 'Image'; | |
| const placeholder = document.createTextNode(`![${alt}]`); | |
| el.parentNode.replaceChild(placeholder, el); | |
| }); | |
| // Handle links (preserve them in markdown format) | |
| clone.querySelectorAll('a[href]').forEach(link => { | |
| const linkText = link.innerText.trim(); | |
| const href = link.href; | |
| const markdownLink = document.createTextNode(`[${linkText}](${href})`); | |
| link.parentNode.replaceChild(markdownLink, link); | |
| }); | |
| // Convert HTML to markdown text preserving newlines | |
| let content = htmlToMarkdownText(clone); | |
| // Clean up extra whitespace while preserving intentional line breaks | |
| content = content.replace(/[ \t]+$/gm, ''); // Remove trailing whitespace on lines | |
| content = content.replace(/\n{4,}/g, '\n\n'); // Limit to max 2 consecutive newlines | |
| return cleanMarkdown(content); | |
| } | |
| function findMessages() { | |
| // More specific selectors to avoid nested elements | |
| const selectors = [ | |
| 'div[data-message-author-role]', // Modern ChatGPT with clear author role | |
| 'article[data-testid*="conversation-turn"]', // Conversation turns | |
| 'div[data-testid="conversation-turn"]', // Specific conversation turn | |
| 'div[class*="group"]:not([class*="group"] [class*="group"])', // Top-level groups only | |
| ]; | |
| let messages = []; | |
| for (const selector of selectors) { | |
| messages = document.querySelectorAll(selector); | |
| if (messages.length > 0) { | |
| console.log(`Using selector: ${selector}, found ${messages.length} messages`); | |
| break; | |
| } | |
| } | |
| if (messages.length === 0) { | |
| // Fallback: try to find conversation container and parse its structure | |
| const conversationContainer = document.querySelector('[role="main"], main, .conversation, [class*="conversation"]'); | |
| if (conversationContainer) { | |
| // Look for direct children that seem like message containers | |
| messages = conversationContainer.querySelectorAll(':scope > div, :scope > article'); | |
| console.log(`Fallback: found ${messages.length} potential messages in conversation container`); | |
| } | |
| } | |
| // Filter and validate messages | |
| const validMessages = Array.from(messages).filter(msg => { | |
| const text = msg.textContent.trim(); | |
| // Must have substantial content | |
| if (text.length < 30) return false; | |
| if (text.length > 100000) return false; | |
| // Skip elements that are clearly UI components | |
| if (msg.querySelector('input[type="text"], textarea')) return false; | |
| if (msg.classList.contains('typing') || msg.classList.contains('loading')) return false; | |
| // Must contain meaningful content (not just buttons/UI) | |
| const meaningfulText = text.replace(/\s+/g, ' ').trim(); | |
| if (meaningfulText.split(' ').length < 5) return false; | |
| return true; | |
| }); | |
| // Remove nested messages and consolidate content | |
| const consolidatedMessages = []; | |
| const usedElements = new Set(); | |
| validMessages.forEach(msg => { | |
| if (usedElements.has(msg)) return; | |
| // Check if this message is nested within another valid message | |
| const isNested = validMessages.some(other => | |
| other !== msg && other.contains(msg) && !usedElements.has(other) | |
| ); | |
| if (!isNested) { | |
| consolidatedMessages.push(msg); | |
| usedElements.add(msg); | |
| } | |
| }); | |
| return consolidatedMessages; | |
| } | |
| function identifySender(messageElement, index, allMessages) { | |
| // Method 1: Check for data attributes (most reliable) | |
| const authorRole = messageElement.getAttribute('data-message-author-role'); | |
| if (authorRole) { | |
| return authorRole === 'user' ? 'You' : 'ChatGPT'; | |
| } | |
| // Method 2: Look for avatar images with better detection | |
| const avatars = messageElement.querySelectorAll('img'); | |
| for (const avatar of avatars) { | |
| const alt = avatar.alt?.toLowerCase() || ''; | |
| const src = avatar.src?.toLowerCase() || ''; | |
| const classes = avatar.className?.toLowerCase() || ''; | |
| // User indicators | |
| if (alt.includes('user') || src.includes('user') || classes.includes('user')) { | |
| return 'You'; | |
| } | |
| // Assistant indicators | |
| if (alt.includes('chatgpt') || alt.includes('assistant') || alt.includes('gpt') || | |
| src.includes('assistant') || src.includes('chatgpt') || classes.includes('assistant')) { | |
| return 'ChatGPT'; | |
| } | |
| } | |
| // Method 3: Content analysis with better patterns | |
| const text = messageElement.textContent.toLowerCase(); | |
| const textStart = text.substring(0, 200); // Look at beginning of message | |
| // Strong ChatGPT indicators | |
| if (textStart.match(/^(i understand|i can help|here's|i'll|let me|i'd be happy|certainly|of course)/)) { | |
| return 'ChatGPT'; | |
| } | |
| // Strong user indicators | |
| if (textStart.match(/^(can you|please help|how do i|i need|i want|help me|could you)/)) { | |
| return 'You'; | |
| } | |
| // Method 4: Structural analysis - look at DOM structure | |
| const hasCodeBlocks = messageElement.querySelectorAll('pre, code').length > 0; | |
| const hasLongText = messageElement.textContent.length > 200; | |
| const hasLists = messageElement.querySelectorAll('ul, ol, li').length > 0; | |
| const hasTables = messageElement.querySelectorAll('table').length > 0; | |
| // ChatGPT messages tend to be longer and more structured | |
| if ((hasCodeBlocks || hasTables) && hasLongText && hasLists) { | |
| return 'ChatGPT'; | |
| } | |
| // Method 5: Position-based fallback with better logic | |
| // Try to detect actual alternating pattern by looking at content characteristics | |
| if (index > 0 && allMessages[index - 1]) { | |
| const prevText = allMessages[index - 1].textContent; | |
| const currentText = messageElement.textContent; | |
| // If previous was short and current is long, likely user -> assistant | |
| if (prevText.length < 100 && currentText.length > 300) { | |
| return 'ChatGPT'; | |
| } | |
| // If previous was long and current is short, likely assistant -> user | |
| if (prevText.length > 300 && currentText.length < 100) { | |
| return 'You'; | |
| } | |
| } | |
| // Final fallback | |
| return index % 2 === 0 ? 'You' : 'ChatGPT'; | |
| } | |
| function extractConversationTitle() { | |
| // Try to get actual conversation title | |
| const titleSelectors = [ | |
| 'h1:not([class*="hidden"])', | |
| '[class*="conversation-title"]', | |
| '[data-testid*="conversation-title"]', | |
| 'title' | |
| ]; | |
| for (const selector of titleSelectors) { | |
| const element = document.querySelector(selector); | |
| if (element && element.textContent.trim()) { | |
| const title = element.textContent.trim(); | |
| // Avoid generic titles | |
| if (!['chatgpt', 'new chat', 'untitled', 'chat'].includes(title.toLowerCase())) { | |
| return title; | |
| } | |
| } | |
| } | |
| return 'Conversation with ChatGPT'; | |
| } | |
| // Main export logic | |
| const messages = findMessages(); | |
| if (messages.length === 0) { | |
| alert('No messages found. The page structure may have changed.'); | |
| return; | |
| } | |
| console.log(`Processing ${messages.length} messages...`); | |
| const lines = []; | |
| const title = extractConversationTitle(); | |
| const date = formatDate(); | |
| const url = window.location.href; | |
| lines.push(`# ${title}\n`); | |
| lines.push(`**Date:** ${date}`); | |
| lines.push(`**Source:** [chat.openai.com](${url})\n`); | |
| lines.push(`---\n`); | |
| // Process messages with better duplicate detection | |
| const processedMessages = []; | |
| const seenContent = new Set(); | |
| messages.forEach((messageElement, index) => { | |
| const sender = identifySender(messageElement, index, messages); | |
| const content = processMessageContent(messageElement); | |
| // Skip if empty or too short | |
| if (!content || content.trim().length < 30) { | |
| console.log(`Skipping message ${index}: too short or empty`); | |
| return; | |
| } | |
| // Create a content hash for duplicate detection | |
| const contentHash = content.substring(0, 100).replace(/\s+/g, ' ').trim(); | |
| if (seenContent.has(contentHash)) { | |
| console.log(`Skipping message ${index}: duplicate content`); | |
| return; | |
| } | |
| seenContent.add(contentHash); | |
| processedMessages.push({ | |
| sender, | |
| content, | |
| originalIndex: index | |
| }); | |
| }); | |
| // Apply sender sequence correction | |
| for (let i = 1; i < processedMessages.length; i++) { | |
| const current = processedMessages[i]; | |
| const previous = processedMessages[i - 1]; | |
| // If we have two consecutive messages from the same sender, try to fix it | |
| if (current.sender === previous.sender) { | |
| // Use content analysis to determine which should be flipped | |
| const currentLength = current.content.length; | |
| const previousLength = previous.content.length; | |
| // If current message is much longer, it's likely ChatGPT | |
| if (currentLength > previousLength * 2 && currentLength > 500) { | |
| current.sender = 'ChatGPT'; | |
| } else if (previousLength > currentLength * 2 && previousLength > 500) { | |
| previous.sender = 'ChatGPT'; | |
| current.sender = 'You'; | |
| } else { | |
| // Default alternating fix | |
| current.sender = current.sender === 'You' ? 'ChatGPT' : 'You'; | |
| } | |
| console.log(`Fixed consecutive ${previous.sender} messages at positions ${i-1} and ${i}`); | |
| } | |
| } | |
| // Generate final output | |
| processedMessages.forEach(({ sender, content }) => { | |
| lines.push(`### **${sender}**\n`); | |
| lines.push(content); | |
| lines.push('\n---\n'); | |
| }); | |
| // Create and download file | |
| const markdownContent = lines.join('\n'); | |
| const blob = new Blob([markdownContent], { type: 'text/markdown' }); | |
| const url2 = URL.createObjectURL(blob); | |
| const a = document.createElement('a'); | |
| a.href = url2; | |
| a.download = `ChatGPT_Conversation_${date}.md`; | |
| document.body.appendChild(a); | |
| a.click(); | |
| document.body.removeChild(a); | |
| URL.revokeObjectURL(url2); | |
| console.log(`Export completed: ${processedMessages.length} messages exported`); | |
| })(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment