theabbie · October 13, 2025 08:18
diff --git a/README.md b/README.md
diff --git a/index.js b/index.js
 #!/usr/bin/env node

 import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
 import {
  CallToolRequestSchema,
  ListToolsRequestSchema,
 } from '@modelcontextprotocol/sdk/types.js';
 import puppeteer from 'puppeteer';
 import * as cheerio from 'cheerio';
 import TurndownService from 'turndown';
 import path from 'path';
 import { fileURLToPath } from 'url';

 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);

 let browser = null;
 let page = null;
 const turndownService = new TurndownService({
  headingStyle: 'atx',
  codeBlockStyle: 'fenced'
 });

 async function launchBrowser() {
  if (browser) {
    return;
  }

  console.error('\n=== MCP Authenticated Browser ===');
  console.error('Launching Chrome browser...');
  console.error('IMPORTANT: A visible Chrome window will open.');
  console.error('Please authenticate (VPN, SSO, login) in the browser window.');
  console.error('The session will be saved and reused for future requests.\n');

  browser = await puppeteer.launch({
    headless: false,
    defaultViewport: null,
    userDataDir: path.join(__dirname, 'chrome-session'),
    args: [
      '--start-maximized',
      '--disable-blink-features=AutomationControlled',
      '--no-sandbox'
    ]
  });

  const pages = await browser.pages();
  page = pages[0] || await browser.newPage();
  
  await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
  
  console.error('Browser launched successfully.');
  console.error('You can now authenticate in the browser window.\n');
 }

 async function getPageContent(url, format = 'text') {
  if (!browser || !page) {
    await launchBrowser();
  }

  if (url) {
    await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
  }

  const html = await page.content();
  const $ = cheerio.load(html);
  
  $('script, style, nav, header, footer, .sidebar, #sidebar, .navigation, .menu').remove();
  
  let content;
  let mainHtml;
  
  const mainSelectors = [
    'main',
    '[role="main"]',
    '#main-content',
    '.main-content',
    '#content',
    '.content',
    'article',
    '.article',
    '#wiki-content',
    '.wiki-content',
    '.page-content'
  ];
  
  for (const sel of mainSelectors) {
    if ($(sel).length > 0) {
      mainHtml = $(sel).html();
      content = $(sel).text().trim();
      break;
    }
  }
  
  if (!content) {
    mainHtml = $('body').html();
    content = $('body').text().trim();
  }

  const title = await page.title();
  const currentUrl = page.url();
  
  let markdown = null;
  if (format === 'markdown' && mainHtml) {
    markdown = turndownService.turndown(mainHtml);
  }

  return {
    url: currentUrl,
    title: title,
    content: content,
    markdown: markdown
  };
 }

 async function getPageLinks(url, filter = null) {
  if (!browser || !page) {
    await launchBrowser();
  }

  if (url) {
    await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
  }

  const html = await page.content();
  const $ = cheerio.load(html);
  const currentUrl = page.url();
  
  let $scope = $;
  const contentSelectors = ['main', '[role="main"]', '#main-content', '.main-content', 'article', '.page-content'];
  for (const sel of contentSelectors) {
    if ($(sel).length > 0) {
      $scope = $(sel);
      break;
    }
  }
  
  const links = [];
  $scope.find('a').each((i, elem) => {
    const $a = $(elem);
    const href = $a.attr('href');
    const text = $a.text().trim();
    
    if (href && text) {
      let fullUrl = href;
      if (href.startsWith('/')) {
        const urlObj = new URL(currentUrl);
        fullUrl = `${urlObj.protocol}//${urlObj.host}${href}`;
      } else if (!href.startsWith('http')) {
        try {
          fullUrl = new URL(href, currentUrl).href;
        } catch (e) {
          fullUrl = href;
        }
      }
      
      links.push({
        text: text,
        href: fullUrl,
        title: $a.attr('title') || ''
      });
    }
  });
  
  let filteredLinks = links;
  if (filter) {
    const filterLower = filter.toLowerCase();
    filteredLinks = links.filter(link => 
      link.text.toLowerCase().includes(filterLower) ||
      link.href.toLowerCase().includes(filterLower)
    );
  }

  return {
    url: currentUrl,
    links: filteredLinks,
    totalLinks: links.length
  };
 }

 async function searchPage(url, query, caseSensitive = false) {
  if (!browser || !page) {
    await launchBrowser();
  }

  if (url) {
    await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
  }

  const html = await page.content();
  const $ = cheerio.load(html);
  
  $('script, style, nav, header, footer, .sidebar, #sidebar').remove();
  
  const text = $('body').text();
  const searchQuery = caseSensitive ? query : query.toLowerCase();
  const searchText = caseSensitive ? text : text.toLowerCase();
  
  const matches = [];
  let index = 0;
  
  while ((index = searchText.indexOf(searchQuery, index)) !== -1) {
    const start = Math.max(0, index - 100);
    const end = Math.min(text.length, index + query.length + 100);
    const context = text.substring(start, end);
    
    matches.push({
      position: index,
      context: context.trim()
    });
    
    index += query.length;
  }

  return {
    url: page.url(),
    query: query,
    found: matches.length > 0,
    matchCount: matches.length,
    matches: matches.slice(0, 10)
  };
 }

 const server = new Server(
  {
    name: 'mcp-authenticated-browser',
    version: '1.0.0',
  },
  {
    capabilities: {
      tools: {},
    },
  }
 );

 server.setRequestHandler(ListToolsRequestSchema, async () => {
  return {
    tools: [
      {
        name: 'fetch_page_content',
        description: 'Fetch and extract clean content from internal company documentation, VPN-protected pages, or authenticated websites (Confluence, internal wikis, etc.). Use this when the user mentions internal docs, company documentation, or any URL behind authentication. Automatically removes navigation, headers, footers. Returns text or markdown format.',
        inputSchema: {
          type: 'object',
          properties: {
            url: {
              type: 'string',
              description: 'The URL to fetch content from',
            },
            format: {
              type: 'string',
              enum: ['text', 'markdown'],
              description: 'Output format (default: text)',
              default: 'text'
            },
          },
          required: ['url'],
        },
      },
      {
        name: 'get_page_links',
        description: 'Extract all links from internal documentation pages or authenticated websites. Use this to discover related internal docs or navigate company wikis. Focused on main content area, can filter links by text or URL.',
        inputSchema: {
          type: 'object',
          properties: {
            url: {
              type: 'string',
              description: 'The URL to extract links from',
            },
            filter: {
              type: 'string',
              description: 'Optional: filter links by text or URL containing this string',
            },
          },
          required: ['url'],
        },
      },
      {
        name: 'search_page',
        description: 'Search for specific text within internal documentation or authenticated pages. Use this to find information within company docs, Confluence pages, or internal wikis. Returns matches with surrounding context.',
        inputSchema: {
          type: 'object',
          properties: {
            url: {
              type: 'string',
              description: 'The URL to search within',
            },
            query: {
              type: 'string',
              description: 'The text to search for',
            },
            caseSensitive: {
              type: 'boolean',
              description: 'Whether search should be case-sensitive (default: false)',
              default: false
            },
          },
          required: ['url', 'query'],
        },
      },
    ],
  };
 });

 server.setRequestHandler(CallToolRequestSchema, async (request) => {
  try {
    const { name, arguments: args } = request.params;

    switch (name) {
      case 'fetch_page_content': {
        const result = await getPageContent(args.url, args.format || 'text');
        return {
          content: [
            {
              type: 'text',
              text: args.format === 'markdown' && result.markdown 
                ? result.markdown 
                : result.content,
            },
          ],
        };
      }

      case 'get_page_links': {
        const result = await getPageLinks(args.url, args.filter);
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(result, null, 2),
            },
          ],
        };
      }

      case 'search_page': {
        const result = await searchPage(args.url, args.query, args.caseSensitive || false);
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(result, null, 2),
            },
          ],
        };
      }

      default:
        throw new Error(`Unknown tool: ${name}`);
    }
  } catch (error) {
    return {
      content: [
        {
          type: 'text',
          text: `Error: ${error.message}`,
        },
      ],
      isError: true,
    };
  }
 });

 async function runServer() {
  const transport = new StdioServerTransport();
  await server.connect(transport);
  
  console.error('MCP VPN Browser Server running on stdio');
  console.error('Browser will launch on first request');
 }

 runServer().catch((error) => {
  console.error('Server error:', error);
  process.exit(1);
 });

 process.on('SIGINT', async () => {
  if (browser) {
    await browser.close();
  }
  process.exit(0);
 });
diff --git a/package.json b/package.json
 {
  "name": "mcp-authenticated-browser",
  "version": "1.0.0",
  "description": "MCP server for accessing VPN-protected and authenticated documentation",
  "type": "module",
  "bin": {
    "mcp-authenticated-browser": "./index.js"
  },
  "scripts": {
    "start": "node index.js"
  },
  "dependencies": {
    "@modelcontextprotocol/sdk": "^1.0.4",
    "puppeteer": "^24.15.0",
    "cheerio": "^1.0.0-rc.12",
    "turndown": "^7.1.2"
  }
 }
	#!/usr/bin/env node

	import { Server } from '@modelcontextprotocol/sdk/server/index.js';
	import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
	import {
	CallToolRequestSchema,
	ListToolsRequestSchema,
	} from '@modelcontextprotocol/sdk/types.js';
	import puppeteer from 'puppeteer';
	import * as cheerio from 'cheerio';
	import TurndownService from 'turndown';
	import path from 'path';
	import { fileURLToPath } from 'url';

	const __filename = fileURLToPath(import.meta.url);
	const __dirname = path.dirname(__filename);

	let browser = null;
	let page = null;
	const turndownService = new TurndownService({
	headingStyle: 'atx',
	codeBlockStyle: 'fenced'
	});

	async function launchBrowser() {
	if (browser) {
	return;
	}

	console.error('\n=== MCP Authenticated Browser ===');
	console.error('Launching Chrome browser...');
	console.error('IMPORTANT: A visible Chrome window will open.');
	console.error('Please authenticate (VPN, SSO, login) in the browser window.');
	console.error('The session will be saved and reused for future requests.\n');

	browser = await puppeteer.launch({
	headless: false,
	defaultViewport: null,
	userDataDir: path.join(__dirname, 'chrome-session'),
	args: [
	'--start-maximized',
	'--disable-blink-features=AutomationControlled',
	'--no-sandbox'
	]
	});

	const pages = await browser.pages();
	page = pages[0] \|\| await browser.newPage();

	await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');

	console.error('Browser launched successfully.');
	console.error('You can now authenticate in the browser window.\n');
	}

	async function getPageContent(url, format = 'text') {
	if (!browser \|\| !page) {
	await launchBrowser();
	}

	if (url) {
	await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
	}

	const html = await page.content();
	const $ = cheerio.load(html);

	$('script, style, nav, header, footer, .sidebar, #sidebar, .navigation, .menu').remove();

	let content;
	let mainHtml;

	const mainSelectors = [
	'main',
	'[role="main"]',
	'#main-content',
	'.main-content',
	'#content',
	'.content',
	'article',
	'.article',
	'#wiki-content',
	'.wiki-content',
	'.page-content'
	];

	for (const sel of mainSelectors) {
	if ($(sel).length > 0) {
	mainHtml = $(sel).html();
	content = $(sel).text().trim();
	break;
	}
	}

	if (!content) {
	mainHtml = $('body').html();
	content = $('body').text().trim();
	}

	const title = await page.title();
	const currentUrl = page.url();

	let markdown = null;
	if (format === 'markdown' && mainHtml) {
	markdown = turndownService.turndown(mainHtml);
	}

	return {
	url: currentUrl,
	title: title,
	content: content,
	markdown: markdown
	};
	}

	async function getPageLinks(url, filter = null) {
	if (!browser \|\| !page) {
	await launchBrowser();
	}

	if (url) {
	await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
	}

	const html = await page.content();
	const $ = cheerio.load(html);
	const currentUrl = page.url();

	let $scope = $;
	const contentSelectors = ['main', '[role="main"]', '#main-content', '.main-content', 'article', '.page-content'];
	for (const sel of contentSelectors) {
	if ($(sel).length > 0) {
	$scope = $(sel);
	break;
	}
	}

	const links = [];
	$scope.find('a').each((i, elem) => {
	const $a = $(elem);
	const href = $a.attr('href');
	const text = $a.text().trim();

	if (href && text) {
	let fullUrl = href;
	if (href.startsWith('/')) {
	const urlObj = new URL(currentUrl);
	fullUrl = `${urlObj.protocol}//${urlObj.host}${href}`;
	} else if (!href.startsWith('http')) {
	try {
	fullUrl = new URL(href, currentUrl).href;
	} catch (e) {
	fullUrl = href;
	}
	}

	links.push({
	text: text,
	href: fullUrl,
	title: $a.attr('title') \|\| ''
	});
	}
	});

	let filteredLinks = links;
	if (filter) {
	const filterLower = filter.toLowerCase();
	filteredLinks = links.filter(link =>
	link.text.toLowerCase().includes(filterLower) \|\|
	link.href.toLowerCase().includes(filterLower)
	);
	}

	return {
	url: currentUrl,
	links: filteredLinks,
	totalLinks: links.length
	};
	}

	async function searchPage(url, query, caseSensitive = false) {
	if (!browser \|\| !page) {
	await launchBrowser();
	}

	if (url) {
	await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
	}

	const html = await page.content();
	const $ = cheerio.load(html);

	$('script, style, nav, header, footer, .sidebar, #sidebar').remove();

	const text = $('body').text();
	const searchQuery = caseSensitive ? query : query.toLowerCase();
	const searchText = caseSensitive ? text : text.toLowerCase();

	const matches = [];
	let index = 0;

	while ((index = searchText.indexOf(searchQuery, index)) !== -1) {
	const start = Math.max(0, index - 100);
	const end = Math.min(text.length, index + query.length + 100);
	const context = text.substring(start, end);

	matches.push({
	position: index,
	context: context.trim()
	});

	index += query.length;
	}

	return {
	url: page.url(),
	query: query,
	found: matches.length > 0,
	matchCount: matches.length,
	matches: matches.slice(0, 10)
	};
	}

	const server = new Server(
	{
	name: 'mcp-authenticated-browser',
	version: '1.0.0',
	},
	{
	capabilities: {
	tools: {},
	},
	}
	);

	server.setRequestHandler(ListToolsRequestSchema, async () => {
	return {
	tools: [
	{
	name: 'fetch_page_content',
	description: 'Fetch and extract clean content from internal company documentation, VPN-protected pages, or authenticated websites (Confluence, internal wikis, etc.). Use this when the user mentions internal docs, company documentation, or any URL behind authentication. Automatically removes navigation, headers, footers. Returns text or markdown format.',
	inputSchema: {
	type: 'object',
	properties: {
	url: {
	type: 'string',
	description: 'The URL to fetch content from',
	},
	format: {
	type: 'string',
	enum: ['text', 'markdown'],
	description: 'Output format (default: text)',
	default: 'text'
	},
	},
	required: ['url'],
	},
	},
	{
	name: 'get_page_links',
	description: 'Extract all links from internal documentation pages or authenticated websites. Use this to discover related internal docs or navigate company wikis. Focused on main content area, can filter links by text or URL.',
	inputSchema: {
	type: 'object',
	properties: {
	url: {
	type: 'string',
	description: 'The URL to extract links from',
	},
	filter: {
	type: 'string',
	description: 'Optional: filter links by text or URL containing this string',
	},
	},
	required: ['url'],
	},
	},
	{
	name: 'search_page',
	description: 'Search for specific text within internal documentation or authenticated pages. Use this to find information within company docs, Confluence pages, or internal wikis. Returns matches with surrounding context.',
	inputSchema: {
	type: 'object',
	properties: {
	url: {
	type: 'string',
	description: 'The URL to search within',
	},
	query: {
	type: 'string',
	description: 'The text to search for',
	},
	caseSensitive: {
	type: 'boolean',
	description: 'Whether search should be case-sensitive (default: false)',
	default: false
	},
	},
	required: ['url', 'query'],
	},
	},
	],
	};
	});

	server.setRequestHandler(CallToolRequestSchema, async (request) => {
	try {
	const { name, arguments: args } = request.params;

	switch (name) {
	case 'fetch_page_content': {
	const result = await getPageContent(args.url, args.format \|\| 'text');
	return {
	content: [
	{
	type: 'text',
	text: args.format === 'markdown' && result.markdown
	? result.markdown
	: result.content,
	},
	],
	};
	}

	case 'get_page_links': {
	const result = await getPageLinks(args.url, args.filter);
	return {
	content: [
	{
	type: 'text',
	text: JSON.stringify(result, null, 2),
	},
	],
	};
	}

	case 'search_page': {
	const result = await searchPage(args.url, args.query, args.caseSensitive \|\| false);
	return {
	content: [
	{
	type: 'text',
	text: JSON.stringify(result, null, 2),
	},
	],
	};
	}

	default:
	throw new Error(`Unknown tool: ${name}`);
	}
	} catch (error) {
	return {
	content: [
	{
	type: 'text',
	text: `Error: ${error.message}`,
	},
	],
	isError: true,
	};
	}
	});

	async function runServer() {
	const transport = new StdioServerTransport();
	await server.connect(transport);

	console.error('MCP VPN Browser Server running on stdio');
	console.error('Browser will launch on first request');
	}

	runServer().catch((error) => {
	console.error('Server error:', error);
	process.exit(1);
	});

	process.on('SIGINT', async () => {
	if (browser) {
	await browser.close();
	}
	process.exit(0);
	});
	{
	"name": "mcp-authenticated-browser",
	"version": "1.0.0",
	"description": "MCP server for accessing VPN-protected and authenticated documentation",
	"type": "module",
	"bin": {
	"mcp-authenticated-browser": "./index.js"
	},
	"scripts": {
	"start": "node index.js"
	},
	"dependencies": {
	"@modelcontextprotocol/sdk": "^1.0.4",
	"puppeteer": "^24.15.0",
	"cheerio": "^1.0.0-rc.12",
	"turndown": "^7.1.2"
	}
	}