const fs = require('fs'); const path = require('path'); const { exec, spawn } = require('child_process'); const util = require('util'); const execPromise = util.promisify(exec); const nodemailer = require('nodemailer'); const config = { emailHost: process.env.EMAIL_HOST, emailPort: process.env.EMAIL_PORT, emailUser: process.env.EMAIL_USER, emailPass: process.env.EMAIL_PASS, htmlDir: process.env.HTML_BASE, mediaModel: process.env.MEDIA_MODEL }; const jobs = new Map(); const stoppedJobs = new Set(); // Track stopped jobs // --- Helpers --- async function ensureDirectoryExists(dirPath) { if (!fs.existsSync(dirPath)) { await fs.promises.mkdir(dirPath, { recursive: true }); } } // Simple CSV parser that handles quoted fields function parseCSVLine(line) { const result = []; let current = ''; let inQuotes = false; for (let i = 0; i < line.length; i++) { const char = line[i]; const nextChar = line[i + 1]; if (char === '"') { if (inQuotes && nextChar === '"') { // Escaped quote current += '"'; i++; // Skip next quote } else { // Toggle quote state inQuotes = !inQuotes; } } else if (char === ',' && !inQuotes) { // Field separator result.push(current.trim()); current = ''; } else { current += char; } } // Add last field result.push(current.trim()); return result; } // Merge CSV files by genome column async function mergeCsvFilesByGenome(csvFiles, outputPath) { try { // Read all CSV files const dataFrames = []; const allHeaders = new Set(['genome']); // genome is the key column for (const csvFile of csvFiles) { if (!fs.existsSync(csvFile)) { console.warn(`[Merge] CSV file not found: ${csvFile}`); continue; } const content = await fs.promises.readFile(csvFile, 'utf8'); const lines = content.trim().split('\n').filter(line => line.trim()); if (lines.length < 2) { console.warn(`[Merge] CSV file is empty or has no data: ${csvFile}`); continue; } const headers = parseCSVLine(lines[0]).map(h => h.trim().replace(/^"|"$/g, '')); const data = {}; // Find genome column index const genomeColIndex = headers.findIndex(h => h.toLowerCase() === 'genome'); if (genomeColIndex === -1) { console.warn(`[Merge] No 'genome' column found in ${csvFile}, skipping`); continue; } // Collect all headers (except genome which is the key) headers.forEach(h => { if (h.toLowerCase() !== 'genome') { allHeaders.add(h); } }); // Parse data rows for (let i = 1; i < lines.length; i++) { const values = parseCSVLine(lines[i]).map(v => v.trim().replace(/^"|"$/g, '')); const genome = values[genomeColIndex]?.trim(); if (!genome) continue; if (!data[genome]) { data[genome] = {}; } // Map values to headers headers.forEach((header, idx) => { if (header.toLowerCase() !== 'genome' && values[idx] && values[idx] !== '') { data[genome][header] = values[idx]; } }); } dataFrames.push({ headers, data }); } // Merge all data by genome const mergedData = {}; const allGenomes = new Set(); // Collect all genomes dataFrames.forEach(df => { Object.keys(df.data).forEach(genome => allGenomes.add(genome)); }); // Merge data for each genome allGenomes.forEach(genome => { mergedData[genome] = { genome }; dataFrames.forEach(df => { if (df.data[genome]) { Object.assign(mergedData[genome], df.data[genome]); } }); }); // Write merged CSV const headerArray = Array.from(allHeaders); const csvLines = [headerArray.join(',')]; // Sort genomes for consistent output const sortedGenomes = Array.from(allGenomes).sort(); sortedGenomes.forEach(genome => { const row = headerArray.map(header => { const value = mergedData[genome][header] || ''; // Escape values that contain commas or quotes if (value.includes(',') || value.includes('"') || value.includes('\n')) { return `"${value.replace(/"/g, '""')}"`; } return value; }); csvLines.push(row.join(',')); }); await fs.promises.writeFile(outputPath, csvLines.join('\n'), 'utf8'); console.log(`[Merge] Merged ${csvFiles.length} CSV files into ${outputPath}, ${sortedGenomes.length} genomes`); return outputPath; } catch (error) { console.error('[Merge] Error merging CSV files:', error); throw error; } } async function sendResultsEmail(analysisId, email, jobId, analysisType, customResultFile = null) { const resultFileMap = { 'ph': 'ph_predict/pHpredict.csv', 'nutrition': 'media_predict/generateMedia.csv', 'oxygen': 'tempo2predict/tempo2predict.csv', 'temperature': 'tempo2predict/tempo2predict.csv', 'growth': 'growth_predict/growthPrediction.csv', 'all': 'merged_results.csv' }; const transporter = nodemailer.createTransport({ host: config.emailHost, port: config.emailPort, secure: true, auth: { user: config.emailUser, pass: config.emailPass } }); // Use custom file path if provided, otherwise use default mapping let resultFilePath; if (customResultFile) { resultFilePath = customResultFile; } else { const relativePath = resultFileMap[analysisType] || resultFileMap['nutrition']; resultFilePath = path.join(__dirname, '..', 'results', jobId, relativePath); } const mailOptions = { from: config.emailUser, to: email, subject: `Analysis Results - Job ID: ${analysisId}`, text: `Your analysis has been completed.`, html: `
Job ID: ${analysisId}
Analysis Type: ${analysisType}
`, attachments: [ { filename: `analysis_result_${analysisId}.csv`, path: resultFilePath, contentType: 'text/csv' } ] }; return await transporter.sendMail(mailOptions); } // --- Docker Scripts --- async function executePhpredictScripts(jobId) { const inputDir = path.join(__dirname, '..', 'results', jobId); const hostInputDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'results', jobId) : path.join(__dirname, '..', 'results', jobId); await ensureDirectoryExists(inputDir); const scriptsDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'public', 'scripts') : path.join(__dirname, '..', 'public', 'scripts'); const command = `docker run --rm -v "${hostInputDir}:${inputDir}" -v "${scriptsDir}":/app/scripts media-ph-pred:v1 bash -c "/opt/conda/bin/conda run -n ph_model_jupyter bash /app/scripts/pHPredict.sh ${inputDir}/prokka_annotation ${inputDir}/ph_predict"`; const result = await execPromise(command); const resultFile = `${inputDir}/ph_predict/pHpredict.csv`; if (!fs.existsSync(resultFile)) throw new Error(`Result file missing: ${resultFile}`); return { resultFile, ...result }; } async function executeTempOxygenScripts(jobId) { const inputDir = path.join(__dirname, '..', 'results', jobId); const hostInputDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'results', jobId) : path.join(__dirname, '..', 'results', jobId); await ensureDirectoryExists(inputDir); const scriptsDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'public', 'scripts') : path.join(__dirname, '..', 'public', 'scripts'); const command = `docker run --rm -v "${hostInputDir}:${inputDir}" -v "${scriptsDir}":/app/scripts -w /app media-bacdiveai:v1 bash -c "source /etc/profile && source /opt/conda/etc/profile.d/conda.sh && conda activate bacdiveai && pip install tqdm && bash /app/scripts/TempO2Predict.sh ${inputDir}/prokka_annotation ${inputDir}/tempo2predict"`; const result = await execPromise(command); const resultFile = `${inputDir}/tempo2predict/tempo2predict.csv`; if (!fs.existsSync(resultFile)) throw new Error(`Result file missing: ${resultFile}`); return { resultFile, ...result }; } async function executeGrowthScripts(jobId) { const inputDir = path.join(__dirname, '..', 'results', jobId); const hostInputDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'results', jobId) : path.join(__dirname, '..', 'results', jobId); await ensureDirectoryExists(inputDir); const scriptsDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'public', 'scripts') : path.join(__dirname, '..', 'public', 'scripts'); const command = `docker run --rm -e PATH='/usr/local/bin:/usr/bin:/bin:/opt/conda/lib/R/bin' -v "${hostInputDir}:${inputDir}" -v "${scriptsDir}":/app/scripts -w /app media-grodon2:v1 bash -c "bash /app/scripts/GrowthPredict.sh ${inputDir}/prokka_annotation ${inputDir}/growth_predict"`; const result = await execPromise(command); const resultFile = `${inputDir}/growth_predict/growthPrediction.csv`; if (!fs.existsSync(resultFile)) throw new Error(`Result file missing: ${resultFile}`); return { resultFile, ...result }; } async function executeNutritionScripts(jobId) { const inputDir = path.join(__dirname, '..', 'results', jobId); const hostInputDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'results', jobId) : path.join(__dirname, '..', 'results', jobId); await ensureDirectoryExists(inputDir); const command = `docker run --rm -v "${hostInputDir}:${inputDir}" -v "${config.mediaModel}":/app/scripts -w /app media-transformer:v2 bash -c "source /opt/conda/etc/profile.d/conda.sh && conda activate uncultured_pytorch && pip install sentencepiece --root-user-action=ignore && bash /app/scripts/mediaPredict.sh ${inputDir}/prokka_annotation ${inputDir}/media_predict"`; const result = await execPromise(command); const resultFile = `${inputDir}/media_predict/generateMedia.csv`; if (!fs.existsSync(resultFile)) throw new Error(`Result file missing: ${resultFile}`); return { resultFile, ...result }; } // --- Main Orchestrator --- async function executeJobProcess(job) { try { // 检查是否已被停止 if (stoppedJobs.has(job.id)) { console.log(`[Job ${job.id}] Job was stopped before execution`); job.status = 'failed'; job.error = 'Stopped by user'; return; } // 使用项目根目录下的 results 文件夹,与上传路径保持一致 // __dirname 是 utils 文件夹,所以需要往上退一级到项目根目录 const inputDir = path.join(__dirname, '..', 'results', job.id); // 如果配置了 HTML_BASE,使用配置的路径;否则使用项目根目录 // 使用 path.join 避免路径拼接问题(如双斜杠) const hostInputDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'results', job.id) : path.join(__dirname, '..', 'results', job.id); console.log(`[Job ${job.id}] Input directory: ${inputDir}`); console.log(`[Job ${job.id}] Host input directory: ${hostInputDir}`); // 确保目录存在 await ensureDirectoryExists(inputDir); await ensureDirectoryExists(path.join(inputDir, 'uploads')); // 1. Prokka Annotation job.status = 'analyzing'; job.progress = 10; // 再次检查是否已被停止 if (stoppedJobs.has(job.id)) { console.log(`[Job ${job.id}] Job was stopped before Prokka execution`); job.status = 'failed'; job.error = 'Stopped by user'; return; } // 构建脚本目录路径,脚本在 public/scripts 目录下 // 使用 path.join 避免路径拼接问题(如双斜杠) const scriptsDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'public', 'scripts') : path.join(__dirname, '..', 'public', 'scripts'); const dockerCommand = `docker run --rm -v "${hostInputDir}:${inputDir}" -v "${scriptsDir}":/app/scripts -w /app media-prokka:v1 bash -c "bash /app/scripts/prokka.sh --output ${inputDir}/prokka_annotation --input ${inputDir}/uploads"`; console.log(`[Job ${job.id}] Executing Docker command: ${dockerCommand}`); // 使用spawn来执行Docker命令,以便可以终止 const dockerProcess = spawn('docker', [ 'run', '--rm', '-v', `${hostInputDir}:${inputDir}`, '-v', `${scriptsDir}:/app/scripts`, '-w', '/app', 'media-prokka:v1', 'bash', '-c', `bash /app/scripts/prokka.sh --output ${inputDir}/prokka_annotation --input ${inputDir}/uploads` ], { stdio: 'pipe' }); // 存储进程引用以便停止 job.dockerProcess = dockerProcess; // 等待进程完成 const { stdout, stderr } = await new Promise((resolve, reject) => { let stdoutData = ''; let stderrData = ''; dockerProcess.stdout.on('data', (data) => { stdoutData += data.toString(); }); dockerProcess.stderr.on('data', (data) => { stderrData += data.toString(); }); dockerProcess.on('close', (code) => { if (stoppedJobs.has(job.id)) { reject(new Error('Stopped by user')); } else if (code !== 0) { reject(new Error(`Docker process exited with code ${code}: ${stderrData}`)); } else { resolve({ stdout: stdoutData, stderr: stderrData }); } }); dockerProcess.on('error', (error) => { reject(error); }); }); job.progress = 60; // 2. Analysis Execution const executionMap = { 'ph': executePhpredictScripts, 'nutrition': executeNutritionScripts, 'oxygen': executeTempOxygenScripts, 'temperature': executeTempOxygenScripts, 'growth': executeGrowthScripts }; // Check if analysis all mode const isAnalysisAll = job.analysis_type === 'all'; const analysisTypes = isAnalysisAll ? ['ph', 'nutrition', 'oxygen', 'temperature', 'growth'] : [job.analysis_type]; if (!isAnalysisAll && !executionMap[job.analysis_type]) { throw new Error('Unsupported analysis'); } const allResults = []; const csvFiles = []; const totalAnalyses = analysisTypes.length; // Execute all analyses for (let i = 0; i < analysisTypes.length; i++) { // 检查是否已被停止 if (stoppedJobs.has(job.id)) { console.log(`[Job ${job.id}] Job was stopped during analysis execution`); job.status = 'failed'; job.error = 'Stopped by user'; return; } const analysisType = analysisTypes[i]; const progressBase = 60; const progressStep = 35 / totalAnalyses; // Use 35% of progress for analyses job.progress = Math.floor(progressBase + (i * progressStep)); console.log(`[Job ${job.id}] Executing ${analysisType} analysis (${i + 1}/${totalAnalyses})`); try { const scriptResult = await executionMap[analysisType](job.id); allResults.push({ type: analysisType, result: scriptResult }); csvFiles.push(scriptResult.resultFile); console.log(`[Job ${job.id}] ${analysisType} analysis completed`); } catch (error) { // 如果是因为停止导致的错误,直接返回 if (stoppedJobs.has(job.id) || error.message.includes('Stopped by user')) { console.log(`[Job ${job.id}] Analysis stopped by user`); job.status = 'failed'; job.error = 'Stopped by user'; return; } console.error(`[Job ${job.id}] ${analysisType} analysis failed:`, error); // Continue with other analyses even if one fails } } job.progress = 95; // 3. Merge CSV files if analysis all mode let finalResultFile; let parsedResult = {}; if (isAnalysisAll && csvFiles.length > 0) { // Merge all CSV files by genome column const mergedCsvPath = path.join(inputDir, 'merged_results.csv'); await mergeCsvFilesByGenome(csvFiles, mergedCsvPath); finalResultFile = mergedCsvPath; // Parse merged result for display try { const csvContent = await fs.promises.readFile(mergedCsvPath, 'utf8'); const lines = csvContent.trim().split('\n'); if (lines.length > 1) { const headers = lines[0].split(',').map(h => h.trim()); const values = lines[1]?.split(',') || []; headers.forEach((h, i) => parsedResult[h] = values[i]?.trim() || ''); } } catch (e) { console.error('[Job] Error parsing merged CSV:', e); } } else if (allResults.length > 0) { // Single analysis mode finalResultFile = allResults[0].result.resultFile; try { const csvContent = await fs.promises.readFile(finalResultFile, 'utf8'); const lines = csvContent.trim().split('\n'); const headers = lines[0].split(',').map(h => h.trim()); const values = lines[1]?.split(',') || []; headers.forEach((h, i) => parsedResult[h] = values[i]?.trim() || ''); } catch (e) { console.error(e); } } else { throw new Error('No analysis results available'); } // 检查是否在最后阶段被停止 if (stoppedJobs.has(job.id)) { console.log(`[Job ${job.id}] Job was stopped before completion`); job.status = 'failed'; job.error = 'Stopped by user'; stoppedJobs.delete(job.id); // 清理停止标志 return; } job.progress = 100; job.status = 'completed'; job.result = { analysis_id: job.id, result_file: finalResultFile, analysis_types: isAnalysisAll ? analysisTypes : [job.analysis_type], ...parsedResult }; // 清理停止标志和进程引用 stoppedJobs.delete(job.id); delete job.dockerProcess; console.log(`[Job ${job.id}] successfully completed !`); if (job.email) { // For analysis all, send email with merged file const emailAnalysisType = isAnalysisAll ? 'all' : job.analysis_type; await sendResultsEmail(job.id, job.email, job.id, emailAnalysisType, finalResultFile); console.log(`[Job ${job.id}] email sent to ${job.email} successfully !`); } } catch (error) { console.error(`[Job ${job.id}] Error:`, error); // 如果是因为停止导致的错误,使用停止消息 if (stoppedJobs.has(job.id) || error.message.includes('Stopped by user')) { job.status = 'failed'; job.error = 'Stopped by user'; stoppedJobs.delete(job.id); } else { job.status = 'failed'; job.error = error.message; } job.progress = 0; // 清理进程引用 delete job.dockerProcess; } } const jobService = { createJob(id, analysisType, email, files) { const job = { id, analysis_type: analysisType, email, files, created_at: Date.now(), status: 'queued', progress: 0, file_count: files.length, eta_seconds: 10 }; jobs.set(id, job); // Async Start setTimeout(() => executeJobProcess(job), 1000); return job; }, getJob(id) { return jobs.get(id); }, async stopJob(id) { const job = jobs.get(id); if (!job) throw new Error('Job not found'); // 如果任务已完成或已失败,不需要停止 if (job.status === 'completed' || job.status === 'failed') { console.log(`[Stop] Job ${id} is already ${job.status}`); return true; } console.log(`[Stop] Stopping job ${id}...`); // 标记任务为已停止 stoppedJobs.add(id); // 停止Docker进程(如果存在) if (job.dockerProcess) { try { console.log(`[Stop] Killing Docker process for job ${id}`); job.dockerProcess.kill('SIGTERM'); // 如果SIGTERM不起作用,使用SIGKILL setTimeout(() => { if (!job.dockerProcess.killed) { job.dockerProcess.kill('SIGKILL'); } }, 5000); } catch (error) { console.error(`[Stop] Error killing Docker process:`, error); } } // 停止所有相关的Docker容器 try { const hostInputDir = config.htmlDir ? path.join(config.htmlDir.replace(/\/$/, ''), 'results', id) : path.join(__dirname, '..', 'results', id); // 方法1: 通过volume路径查找容器(使用--filter volume) try { const getContainersCmd1 = `docker ps --filter "volume=${hostInputDir}" --format "{{.ID}}"`; const { stdout } = await execPromise(getContainersCmd1); if (stdout && stdout.trim()) { const cids = stdout.trim().split('\n').filter(cid => cid); for (const cid of cids) { console.log(`[Stop] Stopping container ${cid} for job ${id} (method 1)`); try { await execPromise(`docker stop ${cid}`); console.log(`[Stop] Container ${cid} stopped`); } catch (e) { console.warn(`[Stop] Failed to stop container ${cid}:`, e.message); } } } } catch (e) { console.warn(`[Stop] Method 1 failed:`, e.message); } // 方法2: 查找所有运行中的容器,检查挂载点 try { const getAllContainersCmd = `docker ps --format "{{.ID}} {{.Mounts}}"`; const { stdout } = await execPromise(getAllContainersCmd); if (stdout) { const lines = stdout.trim().split('\n'); const containerIds = new Set(); for (const line of lines) { // 检查是否包含job相关的路径 if (line.includes(hostInputDir) || line.includes(`results/${id}`) || line.includes(`results\\\\${id}`)) { const containerId = line.split(' ')[0]; if (containerId && containerId.length === 12) { // Docker container IDs are 12 chars containerIds.add(containerId); } } } for (const containerId of containerIds) { console.log(`[Stop] Stopping container ${containerId} for job ${id} (method 2)`); try { await execPromise(`docker stop ${containerId}`); console.log(`[Stop] Container ${containerId} stopped`); } catch (e) { console.warn(`[Stop] Failed to stop container ${containerId}:`, e.message); } } } } catch (e) { console.warn(`[Stop] Method 2 failed:`, e.message); } // 方法3: 使用docker ps -a查找所有容器(包括已停止的),然后强制停止 // 这个方法作为最后的备用方案 try { const getAllContainersCmd = `docker ps -a --format "{{.ID}} {{.Mounts}}"`; const { stdout } = await execPromise(getAllContainersCmd); if (stdout) { const lines = stdout.trim().split('\n'); for (const line of lines) { if (line.includes(`results/${id}`) || line.includes(`results\\\\${id}`)) { const containerId = line.split(' ')[0]; if (containerId && containerId.length === 12) { console.log(`[Stop] Force stopping container ${containerId} for job ${id} (method 3)`); try { // 先尝试正常停止 await execPromise(`docker stop ${containerId}`).catch(() => {}); // 如果还在运行,强制杀死 await execPromise(`docker kill ${containerId}`).catch(() => {}); console.log(`[Stop] Container ${containerId} force stopped`); } catch (e) { console.warn(`[Stop] Failed to force stop container ${containerId}:`, e.message); } } } } } } catch (e) { console.warn(`[Stop] Method 3 failed:`, e.message); } } catch (error) { console.error(`[Stop] Error stopping containers:`, error); } // 更新任务状态 job.status = 'failed'; job.error = 'Stopped by user'; job.progress = 0; // 清理进程引用 delete job.dockerProcess; console.log(`[Stop] Job ${id} stopped successfully`); return true; }, async resendEmail(id, email) { const job = jobs.get(id); if(!job) throw new Error('Job not found'); return await sendResultsEmail(id, email, id, job.analysis_type); } }; module.exports = jobService;