Skip to content

Instantly share code, notes, and snippets.

@cmod
Last active February 26, 2025 23:25
Show Gist options
  • Save cmod/be2e8faaeeae06bbf3841417a316ff70 to your computer and use it in GitHub Desktop.
Save cmod/be2e8faaeeae06bbf3841417a316ff70 to your computer and use it in GitHub Desktop.
Convert HTML (in clipboard) to Markdown (in clipboard) Alfred Workflow

Simple Alfred workflow using Node to convert rich-text HTML (i.e., just select and copy from a website) in your clipbaord to Markdown, replacing the HTML in the clipboard.

Requirements

  • Node.js
    • brew install node in macOS terminal
    • turndown package (npm install turndown in terminal)
  • Alfred launcher

Workflow Configuration

  1. Create a new Alfred Workflow that looks like this, with three components: keyword, run script, copy to clipboard
Screenshot 2025-02-15 at 16 38 43
  1. In run script, set the variables and directories as appropriate to your environment:
Screenshot 2025-02-15 at 16 43 25
  1. Save the following code in some dir as html2md.js and update the workflow run script directory as appropriate:
#!/usr/bin/env node

const { spawn } = require('child_process');
const TurndownService = require('turndown');

// Initialize turndown
const turndownService = new TurndownService({
  headingStyle: 'atx',
  codeBlockStyle: 'fenced',
  emDelimiter: '*',
  strongDelimiter: '**'
});

// Clean up the markdown output
function cleanMarkdown(str) {
  return str.replace(/[\u2018\u2019\u00b4]/g, "'")
    .replace(/[\u201c\u201d\u2033]/g, '"')
    .replace(/[\u2212\u2022\u00b7\u25aa]/g, '-')
    .replace(/[\u2013\u2015]/g, '--')
    .replace(/\u2014/g, '---')
    .replace(/\u2026/g, '...')
    .replace(/[ ]+\n/g, '\n')
    .replace(/\s*\\\n/g, '\\\n')
    .replace(/\s*\\\n\s*\\\n/g, '\n\n')
    .replace(/\s*\\\n\n/g, '\n\n')
    .replace(/\n-\n/g, '\n')
    .replace(/\n\n\s*\\\n/g, '\n\n')
    .replace(/\n\n\n*/g, '\n\n')
    .replace(/[ ]+$/gm, '')
    .replace(/^\s+|[\s\\]+$/g, '');
}

// Decode hex-encoded HTML string
function decodeHexHTML(str) {
  // Remove the «data HTML prefix if present
  str = str.replace(/^«data HTML/i, '');
  
  // Convert hex to string
  try {
    return Buffer.from(str, 'hex').toString('utf8');
  } catch (error) {
    console.error('Error decoding hex:', error);
    return str;
  }
}

// Get HTML from clipboard using osascript
function getHTMLFromClipboard() {
  const script = `
    tell application "System Events"
      the clipboard as «class HTML»
    end tell
  `;

  return new Promise((resolve, reject) => {
    const osascript = spawn('osascript');
    let stdout = '';
    let stderr = '';

    osascript.stdin.write(script);
    osascript.stdin.end();

    osascript.stdout.on('data', data => {
      stdout += data;
    });

    osascript.stderr.on('data', data => {
      stderr += data;
    });

    osascript.on('close', code => {
      if (code !== 0 || !stdout.trim()) {
        // If HTML fails, try getting plain text
        const plainScript = `
          set the_clip to the clipboard as string
          return the_clip
        `;
        const osascript2 = spawn('osascript');
        let plainText = '';
        
        osascript2.stdin.write(plainScript);
        osascript2.stdin.end();
        
        osascript2.stdout.on('data', data => {
          plainText += data;
        });
        
        osascript2.on('close', code2 => {
          if (code2 === 0 && plainText.trim()) {
            resolve(plainText);
          } else {
            reject(new Error(`Failed to get clipboard content: ${stderr || 'No content found'}`));
          }
        });
      } else {
        resolve(stdout);
      }
    });
  });
}

// Set clipboard content using pbcopy
function setClipboard(data) {
  return new Promise((resolve, reject) => {
    const pbcopy = spawn('pbcopy');
    pbcopy.stdin.write(data);
    pbcopy.stdin.end();
    
    pbcopy.on('close', code => {
      if (code !== 0) reject(new Error('Failed to copy to clipboard'));
      else resolve();
    });
  });
}

// Debug function to log the content at each step
function debugLog(step, content) {
  console.error('\n' + '='.repeat(50));
  console.error(`${step}:`);
  console.error('-'.repeat(50));
  
  if (typeof content === 'object') {
    console.error(JSON.stringify(content, null, 2));
  } else {
    console.error(content);
  }
  
  console.error('='.repeat(50) + '\n');
}

async function main() {
  try {
    // Get HTML from clipboard
    const hexContent = await getHTMLFromClipboard();
    debugLog('Original Hex Content', hexContent);
    
    // Decode the hex-encoded HTML
    const html = decodeHexHTML(hexContent);
    debugLog('Decoded HTML', html);
    
    if (!html.trim()) {
      throw new Error('No content found in clipboard');
    }

    // Convert to markdown
    const markdown = cleanMarkdown(turndownService.turndown(html));
    debugLog('Converted Markdown', markdown);

    // Copy markdown back to clipboard and output it
    await setClipboard(markdown);
    console.log(markdown);

  } catch (error) {
    const errorOutput = {
      alfredworkflow: {
        error: error.message
      }
    };
    debugLog('Error Output', errorOutput);
    console.error(JSON.stringify(errorOutput));
    process.exit(1);
  }
}

main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment