Skip to content

Instantly share code, notes, and snippets.

@zazaulola
Created November 14, 2024 09:16
Show Gist options
  • Save zazaulola/e93246c1c7e1b94aa74ece02ead8f7ae to your computer and use it in GitHub Desktop.
Save zazaulola/e93246c1c7e1b94aa74ece02ead8f7ae to your computer and use it in GitHub Desktop.
/** @format */
const path = require('path');
const fs = require('fs');
const https = require('https');
const http = require('http');
const cheerio = require('cheerio');
const os = require('os');
// Configuration
const CONFIG = {
mirrorListUrl: 'https://repo.manjaro.org/',
rootPath: path.resolve(__dirname, 'PortableMirror'),
branches: ['stable', 'testing', 'unstable'],
repos: ['core', 'extra'],
architectures: ['x86_64'], // Можно добавить 'aarch64', 'arm'
downloadTimeout: 30000,
maxConcurrentDownloads: os.cpus().length * 2,
blackListMirrors: [
'mirrors.ucr.ac.cr',
'mirror.funami.tec',
'mirror.is.co.za',
'kartolo.sby.datautama.net.id',
'muug.ca/mirror',
'ftp.nluug.nl',
]
};
// Progress display
const print = {
progress: (info) => {
process.stdout.clearLine(0);
process.stdout.cursorTo(0);
process.stdout.write(info);
},
line: (text) => console.log(text),
debug: (text) => console.log(`DEBUG: ${text}`)
};
// Utility functions
const utils = {
formatSize: (bytes) => {
if (bytes === 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(1024));
return `${(bytes / Math.pow(1024, i)).toFixed(2)} ${units[i]}`;
},
formatSpeed: (bytesPerSecond) => {
return `${utils.formatSize(bytesPerSecond)}/s`;
},
delay: (ms) => new Promise(resolve => setTimeout(resolve, ms)),
parseSize: (sizeStr) => {
if (!sizeStr || sizeStr === 'unknown') return 0;
const matches = sizeStr.match(/^([\d.]+)\s*([KMGT]?B)?$/i);
if (!matches) return 0;
const size = parseFloat(matches[1]);
const unit = (matches[2] || 'B').toUpperCase();
const units = { B: 1, KB: 1024, MB: 1024**2, GB: 1024**3, TB: 1024**4 };
return Math.round(size * (units[unit] || 1));
},
getFileStats: async (filePath) => {
try {
const stats = await fs.promises.stat(filePath);
return {
exists: true,
size: stats.size,
mtime: stats.mtime
};
} catch {
return { exists: false };
}
}
};
/** @format */
// ... (предыдущие импорты)
// Утилиты для работы с URL
const urlUtils = {
isValidUrl(string) {
try {
new URL(string);
return true;
} catch {
return false;
}
},
normalize(urlString) {
try {
// Добавляем протокол если его нет
if (!urlString.match(/^https?:\/\//i)) {
urlString = 'https://' + urlString;
}
// Заменяем множественные слеши, кроме протокола
urlString = urlString.replace(/(https?:\/\/)/, '$1').replace(/([^:]\/)\/+/g, '$1');
// Убираем trailing slash
return urlString.replace(/\/$/, '');
} catch (error) {
throw new Error(`Invalid URL: ${urlString}`);
}
},
join(...parts) {
// Фильтруем undefined и пустые строки
const validParts = parts.filter(part => part != null && part !== '');
// Очищаем части от лишних слешей
const cleanParts = validParts.map(part => {
if (typeof part !== 'string') return '';
return part.replace(/^\/+|\/+$/g, '');
});
// Собираем URL
return cleanParts.join('/');
},
getFileName(url) {
try {
return decodeURIComponent(new URL(url).pathname.split('/').pop());
} catch {
return url.split('/').pop();
}
},
buildDownloadUrl(mirror, branch, repo, arch, filename) {
// Убираем лишние слеши и точки в начале имени файла
const cleanFilename = filename.replace(/^\.\//, '').replace(/^\//, '');
// Собираем части URL
const parts = [
mirror.url,
branch,
repo,
arch,
cleanFilename
].map(part => part.replace(/^\/+|\/+$/g, ''));
// Собираем URL с одинарными слешами
return parts.join('/');
},
// Проверяем, является ли URL абсолютным
isAbsoluteUrl(url) {
return /^https?:\/\//i.test(url);
}
};
function makeRequest(url, options = {}) {
return new Promise((resolve, reject) => {
const protocol = url.startsWith('https:') ? https : http;
const request = protocol.get(url, options, (response) => {
// Следуем за редиректами
if (response.statusCode === 301 || response.statusCode === 302) {
const newUrl = response.headers.location;
if (newUrl) {
return makeRequest(newUrl, options).then(resolve).catch(reject);
}
}
if (response.statusCode !== 200) {
reject(new Error(`HTTP error! status: ${response.statusCode}`));
return;
}
const data = [];
response.on('data', chunk => data.push(chunk));
response.on('end', () => resolve({
ok: true,
status: response.statusCode,
headers: response.headers,
text: () => Promise.resolve(Buffer.concat(data).toString()),
}));
});
request.on('error', reject);
if (options.timeout) {
request.setTimeout(options.timeout, () => {
request.destroy();
reject(new Error('Request timeout'));
});
}
});
}
async function downloadFile(url, destPath, downloadInfo) {
return new Promise((resolve, reject) => {
const protocol = url.startsWith('https:') ? https : http;
const file = fs.createWriteStream(destPath);
const request = protocol.get(url, response => {
// Следуем за редиректами
if (response.statusCode === 301 || response.statusCode === 302) {
fs.unlink(destPath, () => {
const newUrl = response.headers.location;
if (newUrl) {
downloadFile(newUrl, destPath, downloadInfo)
.then(resolve)
.catch(reject);
} else {
reject(new Error('Redirect without location header'));
}
});
return;
}
if (response.statusCode !== 200) {
reject(new Error(`HTTP ${response.statusCode}`));
return;
}
downloadInfo.total = parseInt(response.headers['content-length'], 10) || downloadInfo.total;
const lastModified = new Date(response.headers['last-modified']);
response.pipe(file);
response.on('data', chunk => {
downloadInfo.downloaded += chunk.length;
this.downloadedBytes += chunk.length;
const elapsed = (Date.now() - downloadInfo.started) / 1000;
downloadInfo.speed = downloadInfo.downloaded / elapsed;
});
file.on('finish', async () => {
file.close();
try {
if (!isNaN(lastModified.getTime())) {
await fs.promises.utimes(destPath, new Date(), lastModified);
}
resolve();
} catch (error) {
reject(error);
}
});
});
request.on('error', err => {
fs.unlink(destPath, () => reject(err));
});
if (CONFIG.downloadTimeout) {
request.setTimeout(CONFIG.downloadTimeout, () => {
request.destroy();
reject(new Error('Download timeout'));
});
}
});
}
class DownloadManager {
constructor(mirrors, repoPath, branch, repo, arch) {
this.mirrors = mirrors.filter(m => m && m.url);
this.repoPath = repoPath;
this.branch = branch;
this.repo = repo;
this.arch = arch;
this.queue = [];
this.activeDownloads = new Map();
this.completed = new Set();
this.failed = new Map();
this.skipped = new Set();
this.startTime = Date.now();
this.totalBytes = 0;
this.downloadedBytes = 0;
this.currentMirrorIndex = 0;
print.debug(`Initialized with ${this.mirrors.length} mirrors`);
}
updateProgress(filename, progress) {
const percent = ((progress.downloaded / progress.total) * 100).toFixed(1);
const speed = utils.formatSpeed(progress.speed);
const downloaded = utils.formatSize(progress.downloaded);
const total = utils.formatSize(progress.total);
// Форматируем строку прогресса
const progressText = `[${percent}%] ${filename} ${downloaded}/${total} at ${speed}`;
print.progress(progressText);
}
async downloadFile(file, retryCount = 0) {
const maxRetries = this.mirrors.length;
try {
if (!file || !file.name) {
throw new Error('Invalid file object');
}
const mirror = this.mirrors[this.currentMirrorIndex];
if (!mirror || !mirror.url) {
throw new Error('Invalid mirror selected');
}
const downloadUrl = this.buildDownloadUrl(mirror, file.name);
const localPath = path.join(this.repoPath, file.name);
print.debug(`Downloading ${file.name} from ${mirror.name}`);
print.debug(`URL: ${downloadUrl}`);
print.debug(`Local path: ${localPath}`);
return new Promise((resolve, reject) => {
const fileStream = fs.createWriteStream(localPath);
const protocol = downloadUrl.startsWith('https') ? https : http;
const request = protocol.get(downloadUrl, response => {
if (response.statusCode !== 200) {
reject(new Error(`HTTP ${response.statusCode}`));
return;
}
let downloaded = 0;
const startTime = Date.now();
const totalSize = parseInt(response.headers['content-length'], 10) || file.size || 0;
response.on('data', chunk => {
downloaded += chunk.length;
this.downloadedBytes += chunk.length;
const elapsed = (Date.now() - startTime) / 1000;
const speed = downloaded / elapsed;
this.updateProgress(file.name, {
downloaded,
total: totalSize,
speed,
elapsed
});
});
response.pipe(fileStream);
fileStream.on('finish', () => {
print.progress(''); // Очищаем строку прогресса
print.line(`Completed: ${file.name}`);
fileStream.close(() => resolve());
});
fileStream.on('error', err => {
fileStream.close();
fs.unlink(localPath, () => reject(err));
});
});
request.on('error', err => {
fileStream.close();
fs.unlink(localPath, () => reject(err));
});
request.setTimeout(CONFIG.downloadTimeout, () => {
request.destroy();
fileStream.close();
fs.unlink(localPath, () => reject(new Error('Download timeout')));
});
});
} catch (error) {
print.debug(`Download failed from mirror ${this.mirrors[this.currentMirrorIndex].name}: ${error.message}`);
if (retryCount < maxRetries) {
this.currentMirrorIndex = (this.currentMirrorIndex + 1) % this.mirrors.length;
const nextMirror = this.mirrors[this.currentMirrorIndex];
print.line(`Retrying download from ${nextMirror.name} (attempt ${retryCount + 1}/${maxRetries})`);
return this.downloadFile(file, retryCount + 1);
}
throw error;
}
}
async start() {
if (this.mirrors.length === 0) {
throw new Error('No valid mirrors available for download');
}
print.line(`\nStarting download of ${this.queue.length} files using ${this.mirrors.length} mirrors`);
print.line('Progress will be shown below:\n');
const maxConcurrent = Math.min(CONFIG.maxConcurrentDownloads, this.queue.length);
const activeDownloads = new Set();
while (this.queue.length > 0 || activeDownloads.size > 0) {
while (this.queue.length > 0 && activeDownloads.size < maxConcurrent) {
const file = this.queue.shift();
const downloadPromise = this.downloadFile(file)
.then(() => {
this.completed.add(file.name);
activeDownloads.delete(downloadPromise);
})
.catch(error => {
this.failed.set(file.name, error);
activeDownloads.delete(downloadPromise);
print.line(`Failed: ${file.name} (${error.message})`);
});
activeDownloads.add(downloadPromise);
}
if (activeDownloads.size > 0) {
await Promise.race(Array.from(activeDownloads));
}
}
return {
completed: this.completed.size,
failed: this.failed.size,
skipped: this.skipped.size,
totalBytes: this.totalBytes,
downloadedBytes: this.downloadedBytes
};
}
getNextMirror() {
if (this.mirrors.length === 0) {
throw new Error('No valid mirrors available');
}
this.currentMirrorIndex = (this.currentMirrorIndex + 1) % this.mirrors.length;
const mirror = this.mirrors[this.currentMirrorIndex];
return mirror;
}
buildDownloadUrl(mirror, filename) {
if (!mirror || !mirror.url) {
throw new Error('Invalid mirror configuration');
}
// Очищаем URL зеркала от trailing слешей
const baseUrl = mirror.url.replace(/\/+$/, '');
// Очищаем части пути
const parts = [
baseUrl,
this.branch,
this.repo,
this.arch,
filename.replace(/^\.\//, '') // Удаляем ./ в начале имени файла
].filter(Boolean); // Удаляем пустые части
// Соединяем части одним слешем
return parts.join('/');
}
addFile(file) {
this.queue.push(file);
this.totalBytes += file.size || 0;
}
}
async function testMirror(mirror) {
try {
const testUrl = urlUtils.join(mirror.url, 'stable', 'core', 'x86_64');
print.debug(`Testing mirror ${mirror.name} (${testUrl})`);
const startTime = Date.now();
const response = await makeRequest(testUrl, {
timeout: 10000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
if (!response.ok) {
print.debug(`Mirror ${mirror.name} returned status ${response.status}`);
return null;
}
const html = await response.text();
if (!html.includes('core') && !html.includes('db') && !html.includes('files')) {
print.debug(`Mirror ${mirror.name} returned invalid content`);
return null;
}
const speed = Date.now() - startTime;
print.debug(`Mirror ${mirror.name} responded in ${speed}ms`);
return { ...mirror, speed };
} catch (error) {
print.debug(`Mirror ${mirror.name} test failed: ${error.message}`);
return null;
}
}
async function tryMirrors(mirrors) {
const normalizedMirrors = mirrors.map(mirror => ({
...mirror,
url: normalizeUrl(mirror.url)
}));
for (const mirror of normalizedMirrors) {
try {
print.line(`Trying mirror: ${mirror.name} (${mirror.url})`);
const files = await getFileList(mirror.url);
if (files && files.length > 0) {
print.line(`Successfully got file list from ${mirror.name}`);
return { mirror, files };
}
} catch (error) {
print.debug(`Mirror ${mirror.name} failed: ${error.message}`);
}
}
throw new Error('No working mirrors found');
}
// Обновляем функцию tryMirrors для обработки URL
function normalizeUrl(url) {
// Убираем trailing slash если есть
url = url.replace(/\/+$/, '');
// Добавляем протокол если его нет
if (!url.startsWith('http://') && !url.startsWith('https://')) {
url = 'https://' + url;
}
// Заменяем множественные слеши на одинарные (кроме протокола)
url = url.replace(/(https?:\/\/)/, '$1').replace(/([^:]\/)\/+/g, '$1');
return url;
}
// Класс для хранения статистики синхронизации
class SyncStats {
constructor(branch, repo, arch) {
this.branch = branch;
this.repo = repo;
this.arch = arch;
this.totalFiles = 0;
this.completedFiles = 0;
this.failedFiles = 0;
this.skippedFiles = 0;
this.totalBytes = 0;
this.downloadedBytes = 0;
this.startTime = Date.now();
}
get duration() {
return (Date.now() - this.startTime) / 1000;
}
get averageSpeed() {
return this.downloadedBytes / this.duration;
}
toString() {
return [
`\n${this.branch}/${this.repo}/${this.arch}:`,
`Files: ${this.completedFiles}/${this.totalFiles} (${this.skippedFiles} skipped, ${this.failedFiles} failed)`,
`Data: ${utils.formatSize(this.downloadedBytes)}/${utils.formatSize(this.totalBytes)}`,
`Speed: ${utils.formatSpeed(this.averageSpeed)}`,
`Time: ${this.duration.toFixed(1)}s`
].join('\n');
}
}
class RepositoryManager {
constructor() {
this.stats = new Map(); // Ключ: "branch/repo/arch"
this.startTime = Date.now();
}
getStats(branch, repo, arch) {
const key = `${branch}/${repo}/${arch}`;
if (!this.stats.has(key)) {
this.stats.set(key, new SyncStats(branch, repo, arch));
}
return this.stats.get(key);
}
updateStats(branch, repo, arch, result) {
const stats = this.getStats(branch, repo, arch);
stats.completedFiles += result.completed;
stats.failedFiles += result.failed;
stats.skippedFiles += result.skipped;
stats.totalBytes += result.totalBytes;
stats.downloadedBytes += result.downloadedBytes;
}
getTotalStats() {
return Array.from(this.stats.values()).reduce((total, stats) => ({
totalFiles: total.totalFiles + stats.totalFiles,
completedFiles: total.completedFiles + stats.completedFiles,
failedFiles: total.failedFiles + stats.failedFiles,
skippedFiles: total.skippedFiles + stats.skippedFiles,
totalBytes: total.totalBytes + stats.totalBytes,
downloadedBytes: total.downloadedBytes + stats.downloadedBytes
}), {
totalFiles: 0,
completedFiles: 0,
failedFiles: 0,
skippedFiles: 0,
totalBytes: 0,
downloadedBytes: 0
});
}
printSummary() {
print.line('\nSync Summary:');
// Вывод по каждой комбинации
for (const stats of this.stats.values()) {
print.line(stats.toString());
}
// Общая статистика
const totals = this.getTotalStats();
const duration = (Date.now() - this.startTime) / 1000;
const speed = totals.downloadedBytes / duration;
print.line('\nOverall Statistics:');
print.line(`Total repositories processed: ${this.stats.size}`);
print.line(`Total files: ${totals.totalFiles}`);
print.line(`Successfully downloaded: ${totals.completedFiles}`);
print.line(`Failed: ${totals.failedFiles}`);
print.line(`Skipped (up-to-date): ${totals.skippedFiles}`);
print.line(`Total data: ${utils.formatSize(totals.downloadedBytes)} / ${utils.formatSize(totals.totalBytes)}`);
print.line(`Average speed: ${utils.formatSpeed(speed)}`);
print.line(`Total time: ${duration.toFixed(1)} seconds`);
}
}
async function syncRepository() {
try {
const repoManager = new RepositoryManager();
// Получаем и проверяем зеркала
const goodMirrors = await initializeMirrors();
// Перебираем все комбинации
for (const branch of CONFIG.branches) {
for (const repo of CONFIG.repos) {
for (const arch of CONFIG.architectures) {
await syncRepositoryComponent(branch, repo, arch, goodMirrors, repoManager);
}
}
}
// Выводим итоговую статистику
repoManager.printSummary();
} catch (error) {
console.error('Sync failed:', error);
throw error;
}
}
async function syncRepositoryComponent(branch, repo, arch, mirrors, repoManager) {
print.line(`\nProcessing ${branch}/${repo}/${arch}...`);
// Фильтруем невалидные зеркала
const validMirrors = mirrors.filter(m => m && m.url);
if (validMirrors.length === 0) {
throw new Error('No valid mirrors available');
}
// Создаем структуру директорий
const repoPath = path.join(CONFIG.rootPath, branch, repo, arch);
await fs.promises.mkdir(repoPath, { recursive: true });
// Получаем список файлов
let files = null;
let workingMirror = null;
for (const mirror of validMirrors) {
try {
print.line(`Trying mirror ${mirror.name} for ${branch}/${repo}/${arch}`);
files = await getFileList(mirror.url, branch, repo, arch);
if (files && files.length > 0) {
workingMirror = mirror;
break;
}
} catch (error) {
print.debug(`Failed to get files from ${mirror.name}: ${error.message}`);
}
}
if (!files || files.length === 0) {
print.line(`No files found for ${branch}/${repo}/${arch}, skipping...`);
return;
}
const stats = repoManager.getStats(branch, repo, arch);
stats.totalFiles = files.length;
const manager = new DownloadManager(validMirrors, repoPath, branch, repo, arch);
for (const file of files) {
manager.addFile(file);
}
print.line(`Starting download for ${branch}/${repo}/${arch}`);
const result = await manager.start();
repoManager.updateStats(branch, repo, arch, result);
}
async function initializeMirrors() {
print.line('Getting mirror list...');
const response = await fetch(CONFIG.mirrorListUrl);
const html = await response.text();
const $ = cheerio.load(html);
const mirrors = [];
$('tr.green').each((_, element) => {
const url = $(element).find('a').attr('href');
const name = $(element).find('a').text();
if (url && !CONFIG.blackListMirrors.includes(name)) {
try {
const normalizedUrl = urlUtils.normalize(url);
mirrors.push({ name, url: normalizedUrl });
} catch (error) {
print.debug(`Skipping invalid mirror URL ${url}: ${error.message}`);
}
}
});
print.line(`Found ${mirrors.length} potential mirrors`);
if (mirrors.length === 0) {
throw new Error('No mirrors found in the list');
}
// Тестируем зеркала
print.line('Testing mirrors (this may take a while)...');
const testedMirrors = await Promise.all(
mirrors.map(async mirror => {
const result = await testMirror(mirror);
if (result) {
print.line(`✓ Mirror ${mirror.name} is working (${result.speed}ms)`);
} else {
print.line(`✗ Mirror ${mirror.name} failed testing`);
}
return result;
})
);
const workingMirrors = testedMirrors
.filter(m => m !== null)
.sort((a, b) => a.speed - b.speed);
if (workingMirrors.length === 0) {
// Если все зеркала не прошли тест, попробуем использовать первое из списка
print.line('No mirrors passed testing, trying first mirror from list...');
const firstMirror = await testMirror(mirrors[0]);
if (firstMirror) {
workingMirrors.push(firstMirror);
}
}
if (workingMirrors.length === 0) {
throw new Error('No working mirrors found. Please check your internet connection or try again later.');
}
const selectedMirrors = workingMirrors.slice(0, Math.min(5, workingMirrors.length));
print.line(`\nSelected ${selectedMirrors.length} mirrors for downloading:`);
selectedMirrors.forEach(m => {
print.line(`- ${m.name} (${m.speed}ms)`);
});
return selectedMirrors;
}
function makeRequest(url, options = {}) {
return new Promise((resolve, reject) => {
const parsedUrl = new URL(url);
const protocol = parsedUrl.protocol === 'https:' ? https : http;
const request = protocol.get(url, {
timeout: options.timeout || 30000,
headers: options.headers || {}
}, (response) => {
if (response.statusCode === 301 || response.statusCode === 302) {
const newUrl = response.headers.location;
if (newUrl) {
return makeRequest(new URL(newUrl, url).toString(), options)
.then(resolve)
.catch(reject);
}
}
const chunks = [];
response.on('data', chunk => chunks.push(chunk));
response.on('end', () => {
resolve({
ok: response.statusCode >= 200 && response.statusCode < 300,
status: response.statusCode,
headers: response.headers,
text: () => Promise.resolve(Buffer.concat(chunks).toString())
});
});
});
request.on('error', reject);
if (options.timeout) {
request.setTimeout(options.timeout, () => {
request.destroy();
reject(new Error('Request timeout'));
});
}
});
}
// Обновляем getFileList для поддержки веток и репозиториев
async function getFileList(mirrorUrl, branch, repo, arch) {
try {
// Формируем URL репозитория
const repoUrl = urlUtils.join(mirrorUrl, branch, repo, arch);
print.line(`Fetching file list from ${repoUrl}`);
const response = await makeRequest(repoUrl, {
timeout: 10000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const html = await response.text();
print.debug(`Received HTML content from ${repoUrl}`);
const $ = cheerio.load(html);
const files = new Set(); // Используем Set для уникальных файлов
// Пробуем разные селекторы для поиска файлов
$('a, tr td a').each((_, element) => {
const fileName = $(element).attr('href');
if (!fileName) return;
// Проверяем валидность имени файла
if (fileName.includes('..') || fileName.includes('?') || fileName.startsWith('/')) {
return;
}
// Проверяем расширения файлов
if (fileName.endsWith('.pkg.tar.zst') ||
fileName.endsWith('.pkg.tar.zst.sig') ||
fileName.endsWith('.db') ||
fileName.endsWith('.files')) {
const row = $(element).closest('tr');
let size = '0';
let date = new Date();
// Пытаемся получить размер и дату файла
if (row.length) {
size = row.find('td:nth-child(2)').text().trim();
const dateStr = row.find('td:nth-child(3)').text().trim();
if (dateStr) {
date = new Date(dateStr);
}
}
files.add({
name: fileName,
size: utils.parseSize(size),
date: date
});
}
});
const fileArray = Array.from(files);
print.debug(`Found ${fileArray.length} files in ${repoUrl}`);
if (fileArray.length === 0) {
print.debug('HTML content preview:');
print.debug(html.substring(0, 500));
}
return fileArray;
} catch (error) {
print.debug(`Error fetching files from mirror: ${error.message}`);
return null;
}
}
// Запускаем синхронизацию
syncRepository().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment