dfosco · June 12, 2025 20:03
diff --git a/modelsProcessing.js b/modelsProcessing.js
 const fs = require('fs');
 const shell = require('shelljs');
 const ModelClient = require("@azure-rest/ai-inference").default;
 const { isUnexpected } = require("@azure-rest/ai-inference");
 const { AzureKeyCredential } = require("@azure/core-auth");

 /**
 * Process text using Azure inference for GitHub Models
 * @param {string} text - Text to process
 * @param {string} prompt - Prompt to use
 * @param {string|null} modelId - Optional model ID
 * @param {Object} options - Additional options
 * @param {boolean} options.debug - Whether to save debug information
 * @param {number} options.maxChunkSize - Maximum size of chunks to process
 * @returns {Promise<string>} - Processed text
 */
 async function processWithGitHubModels(text, prompt, modelId, options = {}) {
  const debug = options.debug || false;
  const maxChunkSize = options.maxChunkSize || 50000; // Default max chunk size of ~50K chars
  
  // Create a timestamped directory for debug files if debug mode is enabled
  let debugDir;
  if (debug) {
    const projectRoot = require('path').dirname(__dirname);
    debugDir = `${projectRoot}/debug/gh_models_debug_${Date.now()}`;
    try {
      fs.mkdirSync(debugDir, { recursive: true });
    } catch (err) {
      shell.echo(`Warning: Could not create debug directory: ${err.message}`);
    }
  }
  
  try {
    if (!text || text.trim() === '') {
      shell.echo('Warning: Empty text provided for processing with GitHub Models.');
      return text;
    }
    
    // Check if GITHUB_TOKEN is available
    const token = process.env["GITHUB_TOKEN"];
    if (!token) {
      shell.echo('Warning: GITHUB_TOKEN environment variable is not set.');
      return text;
    }
    
    // Check if we need to split the text into smaller chunks
    if (text.length > maxChunkSize) {
      shell.echo(`Text is too large (${text.length} characters). Processing in chunks...`);
      
      // Simple chunking by splitting at paragraph boundaries
      const paragraphs = text.split(/\n\s*\n/);
      let chunks = [];
      let currentChunk = '';
      
      for (const paragraph of paragraphs) {
        if ((currentChunk + paragraph).length <= maxChunkSize) {
          currentChunk += paragraph + '\n\n';
        } else {
          if (currentChunk) {
            chunks.push(currentChunk);
          }
          currentChunk = paragraph + '\n\n';
        }
      }
      
      if (currentChunk) {
        chunks.push(currentChunk);
      }
      
      shell.echo(`Split into ${chunks.length} chunks.`);
      
      // Process each chunk separately
      let processedText = '';
      for (let i = 0; i < chunks.length; i++) {
        shell.echo(`Processing chunk ${i + 1}/${chunks.length}...`);
        const chunkResult = await processWithGitHubModels(chunks[i], prompt, modelId, { debug: false });
        processedText += chunkResult + '\n\n';
      }
      
      return processedText;
    }
    
    const endpoint = "https://models.github.ai/inference";
    const model = modelId || 'openai/gpt-4o-mini'; // Default to gpt-4o-mini if no model specified
    
    // Save debug information
    if (debug && debugDir) {
      try {
        fs.writeFileSync(`${debugDir}/input.txt`, text, 'utf8');
        fs.writeFileSync(`${debugDir}/prompt.txt`, prompt, 'utf8');
        fs.writeFileSync(`${debugDir}/model.txt`, model, 'utf8');
      } catch (err) {
        shell.echo(`Warning: Could not save debug input files: ${err.message}`);
      }
    }
    
    // Create Azure inference client
    const client = ModelClient(
      endpoint,
      new AzureKeyCredential(token),
    );
    
    shell.echo(`Processing text with GitHub Models using model ${model}...`);
    
    // Make the API call with the required API version
    const response = await client.path("/chat/completions").post({
      headers: {
        "api-version": "2024-12-01-preview"
      },
      body: {
        messages: [
          { role: "system", content: prompt },
          { role: "user", content: text }
        ],
        temperature: 1.0,
        top_p: 1.0,
        model: model
      }
    });
    
    if (isUnexpected(response)) {
      const error = response.body?.error || response.body || 'Unknown error';
      shell.echo(`Error from GitHub Models API: ${typeof error === 'object' ? JSON.stringify(error) : error}`);
      
      // Save error for debugging
      if (debug && debugDir) {
        try {
          fs.writeFileSync(`${debugDir}/error.txt`, JSON.stringify(error, null, 2), 'utf8');
          fs.writeFileSync(`${debugDir}/response.txt`, JSON.stringify(response, null, 2), 'utf8');
        } catch (err) {
          shell.echo(`Warning: Could not save debug error file: ${err.message}`);
        }
      }
      
      return text; // Return original text if there was an error
    }
    
    const processedText = response.body.choices[0].message.content;
    
    // Save the output for debugging
    if (debug && debugDir) {
      try {
        fs.writeFileSync(`${debugDir}/output.txt`, processedText || 'No output', 'utf8');
        fs.writeFileSync(`${debugDir}/response.json`, JSON.stringify(response.body, null, 2), 'utf8');
      } catch (err) {
        shell.echo(`Warning: Could not save debug output files: ${err.message}`);
      }
    }
    
    if (!processedText || processedText.trim() === '') {
      shell.echo('Warning: GitHub Models returned empty output. Using original text.');
      return text;
    }
    
    return processedText;
  } catch (error) {
    shell.echo(`Error processing with GitHub Models: ${error.message}`);
    shell.echo('Using original text to avoid data loss.');
    
    // Save error for debugging
    if (debug && debugDir) {
      try {
        fs.writeFileSync(`${debugDir}/error.txt`, error.stack || error.message, 'utf8');
      } catch (err) {
        shell.echo(`Warning: Could not save debug error file: ${err.message}`);
      }
    }
    
    return text; // Return original text if there was an error
  }
 }

 /**
 * Get available models from GitHub Models catalog
 * @returns {Promise<Array>} - List of available models
 */
 async function getAvailableModels() {
  try {
    const token = process.env["GITHUB_TOKEN"];
    if (!token) {
      shell.echo('Warning: GITHUB_TOKEN environment variable is not set.');
      return [];
    }

    const endpoint = "https://models.github.ai/inference";
    const client = ModelClient(
      endpoint,
      new AzureKeyCredential(token),
    );

    const response = await client.path("/catalog/models").get();
    
    if (isUnexpected(response)) {
      const error = response.body?.error || response.body || 'Unknown error';
      shell.echo(`Error getting models: ${typeof error === 'object' ? JSON.stringify(error) : error}`);
      return [];
    }

    return response.body.data || [];
  } catch (error) {
    shell.echo(`Error getting available models: ${error.message}`);
    return [];
  }
 }

 module.exports = {
  processWithGitHubModels,
  getAvailableModels
 };
	const fs = require('fs');
	const shell = require('shelljs');
	const ModelClient = require("@azure-rest/ai-inference").default;
	const { isUnexpected } = require("@azure-rest/ai-inference");
	const { AzureKeyCredential } = require("@azure/core-auth");

	/**
	* Process text using Azure inference for GitHub Models
	* @param {string} text - Text to process
	* @param {string} prompt - Prompt to use
	* @param {string\|null} modelId - Optional model ID
	* @param {Object} options - Additional options
	* @param {boolean} options.debug - Whether to save debug information
	* @param {number} options.maxChunkSize - Maximum size of chunks to process
	* @returns {Promise<string>} - Processed text
	*/
	async function processWithGitHubModels(text, prompt, modelId, options = {}) {
	const debug = options.debug \|\| false;
	const maxChunkSize = options.maxChunkSize \|\| 50000; // Default max chunk size of ~50K chars

	// Create a timestamped directory for debug files if debug mode is enabled
	let debugDir;
	if (debug) {
	const projectRoot = require('path').dirname(__dirname);
	debugDir = `${projectRoot}/debug/gh_models_debug_${Date.now()}`;
	try {
	fs.mkdirSync(debugDir, { recursive: true });
	} catch (err) {
	shell.echo(`Warning: Could not create debug directory: ${err.message}`);
	}
	}

	try {
	if (!text \|\| text.trim() === '') {
	shell.echo('Warning: Empty text provided for processing with GitHub Models.');
	return text;
	}

	// Check if GITHUB_TOKEN is available
	const token = process.env["GITHUB_TOKEN"];
	if (!token) {
	shell.echo('Warning: GITHUB_TOKEN environment variable is not set.');
	return text;
	}

	// Check if we need to split the text into smaller chunks
	if (text.length > maxChunkSize) {
	shell.echo(`Text is too large (${text.length} characters). Processing in chunks...`);

	// Simple chunking by splitting at paragraph boundaries
	const paragraphs = text.split(/\n\s*\n/);
	let chunks = [];
	let currentChunk = '';

	for (const paragraph of paragraphs) {
	if ((currentChunk + paragraph).length <= maxChunkSize) {
	currentChunk += paragraph + '\n\n';
	} else {
	if (currentChunk) {
	chunks.push(currentChunk);
	}
	currentChunk = paragraph + '\n\n';
	}
	}

	if (currentChunk) {
	chunks.push(currentChunk);
	}

	shell.echo(`Split into ${chunks.length} chunks.`);

	// Process each chunk separately
	let processedText = '';
	for (let i = 0; i < chunks.length; i++) {
	shell.echo(`Processing chunk ${i + 1}/${chunks.length}...`);
	const chunkResult = await processWithGitHubModels(chunks[i], prompt, modelId, { debug: false });
	processedText += chunkResult + '\n\n';
	}

	return processedText;
	}

	const endpoint = "https://models.github.ai/inference";
	const model = modelId \|\| 'openai/gpt-4o-mini'; // Default to gpt-4o-mini if no model specified

	// Save debug information
	if (debug && debugDir) {
	try {
	fs.writeFileSync(`${debugDir}/input.txt`, text, 'utf8');
	fs.writeFileSync(`${debugDir}/prompt.txt`, prompt, 'utf8');
	fs.writeFileSync(`${debugDir}/model.txt`, model, 'utf8');
	} catch (err) {
	shell.echo(`Warning: Could not save debug input files: ${err.message}`);
	}
	}

	// Create Azure inference client
	const client = ModelClient(
	endpoint,
	new AzureKeyCredential(token),
	);

	shell.echo(`Processing text with GitHub Models using model ${model}...`);

	// Make the API call with the required API version
	const response = await client.path("/chat/completions").post({
	headers: {
	"api-version": "2024-12-01-preview"
	},
	body: {
	messages: [
	{ role: "system", content: prompt },
	{ role: "user", content: text }
	],
	temperature: 1.0,
	top_p: 1.0,
	model: model
	}
	});

	if (isUnexpected(response)) {
	const error = response.body?.error \|\| response.body \|\| 'Unknown error';
	shell.echo(`Error from GitHub Models API: ${typeof error === 'object' ? JSON.stringify(error) : error}`);

	// Save error for debugging
	if (debug && debugDir) {
	try {
	fs.writeFileSync(`${debugDir}/error.txt`, JSON.stringify(error, null, 2), 'utf8');
	fs.writeFileSync(`${debugDir}/response.txt`, JSON.stringify(response, null, 2), 'utf8');
	} catch (err) {
	shell.echo(`Warning: Could not save debug error file: ${err.message}`);
	}
	}

	return text; // Return original text if there was an error
	}

	const processedText = response.body.choices[0].message.content;

	// Save the output for debugging
	if (debug && debugDir) {
	try {
	fs.writeFileSync(`${debugDir}/output.txt`, processedText \|\| 'No output', 'utf8');
	fs.writeFileSync(`${debugDir}/response.json`, JSON.stringify(response.body, null, 2), 'utf8');
	} catch (err) {
	shell.echo(`Warning: Could not save debug output files: ${err.message}`);
	}
	}

	if (!processedText \|\| processedText.trim() === '') {
	shell.echo('Warning: GitHub Models returned empty output. Using original text.');
	return text;
	}

	return processedText;
	} catch (error) {
	shell.echo(`Error processing with GitHub Models: ${error.message}`);
	shell.echo('Using original text to avoid data loss.');

	// Save error for debugging
	if (debug && debugDir) {
	try {
	fs.writeFileSync(`${debugDir}/error.txt`, error.stack \|\| error.message, 'utf8');
	} catch (err) {
	shell.echo(`Warning: Could not save debug error file: ${err.message}`);
	}
	}

	return text; // Return original text if there was an error
	}
	}

	/**
	* Get available models from GitHub Models catalog
	* @returns {Promise<Array>} - List of available models
	*/
	async function getAvailableModels() {
	try {
	const token = process.env["GITHUB_TOKEN"];
	if (!token) {
	shell.echo('Warning: GITHUB_TOKEN environment variable is not set.');
	return [];
	}

	const endpoint = "https://models.github.ai/inference";
	const client = ModelClient(
	endpoint,
	new AzureKeyCredential(token),
	);

	const response = await client.path("/catalog/models").get();

	if (isUnexpected(response)) {
	const error = response.body?.error \|\| response.body \|\| 'Unknown error';
	shell.echo(`Error getting models: ${typeof error === 'object' ? JSON.stringify(error) : error}`);
	return [];
	}

	return response.body.data \|\| [];
	} catch (error) {
	shell.echo(`Error getting available models: ${error.message}`);
	return [];
	}
	}

	module.exports = {
	processWithGitHubModels,
	getAvailableModels
	};