Created
June 12, 2025 20:03
-
-
Save dfosco/0195e3c9f65abe1e2cad140fafcf8b37 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const shell = require('shelljs'); | |
const ModelClient = require("@azure-rest/ai-inference").default; | |
const { isUnexpected } = require("@azure-rest/ai-inference"); | |
const { AzureKeyCredential } = require("@azure/core-auth"); | |
/** | |
* Process text using Azure inference for GitHub Models | |
* @param {string} text - Text to process | |
* @param {string} prompt - Prompt to use | |
* @param {string|null} modelId - Optional model ID | |
* @param {Object} options - Additional options | |
* @param {boolean} options.debug - Whether to save debug information | |
* @param {number} options.maxChunkSize - Maximum size of chunks to process | |
* @returns {Promise<string>} - Processed text | |
*/ | |
async function processWithGitHubModels(text, prompt, modelId, options = {}) { | |
const debug = options.debug || false; | |
const maxChunkSize = options.maxChunkSize || 50000; // Default max chunk size of ~50K chars | |
// Create a timestamped directory for debug files if debug mode is enabled | |
let debugDir; | |
if (debug) { | |
const projectRoot = require('path').dirname(__dirname); | |
debugDir = `${projectRoot}/debug/gh_models_debug_${Date.now()}`; | |
try { | |
fs.mkdirSync(debugDir, { recursive: true }); | |
} catch (err) { | |
shell.echo(`Warning: Could not create debug directory: ${err.message}`); | |
} | |
} | |
try { | |
if (!text || text.trim() === '') { | |
shell.echo('Warning: Empty text provided for processing with GitHub Models.'); | |
return text; | |
} | |
// Check if GITHUB_TOKEN is available | |
const token = process.env["GITHUB_TOKEN"]; | |
if (!token) { | |
shell.echo('Warning: GITHUB_TOKEN environment variable is not set.'); | |
return text; | |
} | |
// Check if we need to split the text into smaller chunks | |
if (text.length > maxChunkSize) { | |
shell.echo(`Text is too large (${text.length} characters). Processing in chunks...`); | |
// Simple chunking by splitting at paragraph boundaries | |
const paragraphs = text.split(/\n\s*\n/); | |
let chunks = []; | |
let currentChunk = ''; | |
for (const paragraph of paragraphs) { | |
if ((currentChunk + paragraph).length <= maxChunkSize) { | |
currentChunk += paragraph + '\n\n'; | |
} else { | |
if (currentChunk) { | |
chunks.push(currentChunk); | |
} | |
currentChunk = paragraph + '\n\n'; | |
} | |
} | |
if (currentChunk) { | |
chunks.push(currentChunk); | |
} | |
shell.echo(`Split into ${chunks.length} chunks.`); | |
// Process each chunk separately | |
let processedText = ''; | |
for (let i = 0; i < chunks.length; i++) { | |
shell.echo(`Processing chunk ${i + 1}/${chunks.length}...`); | |
const chunkResult = await processWithGitHubModels(chunks[i], prompt, modelId, { debug: false }); | |
processedText += chunkResult + '\n\n'; | |
} | |
return processedText; | |
} | |
const endpoint = "https://models.github.ai/inference"; | |
const model = modelId || 'openai/gpt-4o-mini'; // Default to gpt-4o-mini if no model specified | |
// Save debug information | |
if (debug && debugDir) { | |
try { | |
fs.writeFileSync(`${debugDir}/input.txt`, text, 'utf8'); | |
fs.writeFileSync(`${debugDir}/prompt.txt`, prompt, 'utf8'); | |
fs.writeFileSync(`${debugDir}/model.txt`, model, 'utf8'); | |
} catch (err) { | |
shell.echo(`Warning: Could not save debug input files: ${err.message}`); | |
} | |
} | |
// Create Azure inference client | |
const client = ModelClient( | |
endpoint, | |
new AzureKeyCredential(token), | |
); | |
shell.echo(`Processing text with GitHub Models using model ${model}...`); | |
// Make the API call with the required API version | |
const response = await client.path("/chat/completions").post({ | |
headers: { | |
"api-version": "2024-12-01-preview" | |
}, | |
body: { | |
messages: [ | |
{ role: "system", content: prompt }, | |
{ role: "user", content: text } | |
], | |
temperature: 1.0, | |
top_p: 1.0, | |
model: model | |
} | |
}); | |
if (isUnexpected(response)) { | |
const error = response.body?.error || response.body || 'Unknown error'; | |
shell.echo(`Error from GitHub Models API: ${typeof error === 'object' ? JSON.stringify(error) : error}`); | |
// Save error for debugging | |
if (debug && debugDir) { | |
try { | |
fs.writeFileSync(`${debugDir}/error.txt`, JSON.stringify(error, null, 2), 'utf8'); | |
fs.writeFileSync(`${debugDir}/response.txt`, JSON.stringify(response, null, 2), 'utf8'); | |
} catch (err) { | |
shell.echo(`Warning: Could not save debug error file: ${err.message}`); | |
} | |
} | |
return text; // Return original text if there was an error | |
} | |
const processedText = response.body.choices[0].message.content; | |
// Save the output for debugging | |
if (debug && debugDir) { | |
try { | |
fs.writeFileSync(`${debugDir}/output.txt`, processedText || 'No output', 'utf8'); | |
fs.writeFileSync(`${debugDir}/response.json`, JSON.stringify(response.body, null, 2), 'utf8'); | |
} catch (err) { | |
shell.echo(`Warning: Could not save debug output files: ${err.message}`); | |
} | |
} | |
if (!processedText || processedText.trim() === '') { | |
shell.echo('Warning: GitHub Models returned empty output. Using original text.'); | |
return text; | |
} | |
return processedText; | |
} catch (error) { | |
shell.echo(`Error processing with GitHub Models: ${error.message}`); | |
shell.echo('Using original text to avoid data loss.'); | |
// Save error for debugging | |
if (debug && debugDir) { | |
try { | |
fs.writeFileSync(`${debugDir}/error.txt`, error.stack || error.message, 'utf8'); | |
} catch (err) { | |
shell.echo(`Warning: Could not save debug error file: ${err.message}`); | |
} | |
} | |
return text; // Return original text if there was an error | |
} | |
} | |
/** | |
* Get available models from GitHub Models catalog | |
* @returns {Promise<Array>} - List of available models | |
*/ | |
async function getAvailableModels() { | |
try { | |
const token = process.env["GITHUB_TOKEN"]; | |
if (!token) { | |
shell.echo('Warning: GITHUB_TOKEN environment variable is not set.'); | |
return []; | |
} | |
const endpoint = "https://models.github.ai/inference"; | |
const client = ModelClient( | |
endpoint, | |
new AzureKeyCredential(token), | |
); | |
const response = await client.path("/catalog/models").get(); | |
if (isUnexpected(response)) { | |
const error = response.body?.error || response.body || 'Unknown error'; | |
shell.echo(`Error getting models: ${typeof error === 'object' ? JSON.stringify(error) : error}`); | |
return []; | |
} | |
return response.body.data || []; | |
} catch (error) { | |
shell.echo(`Error getting available models: ${error.message}`); | |
return []; | |
} | |
} | |
module.exports = { | |
processWithGitHubModels, | |
getAvailableModels | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment