labweb/routes/api.js

const express = require('express');
const router = express.Router();
const multer = require('multer');
const { nanoid } = require('nanoid');
const fs = require('fs');
const path = require('path');
const { exec } = require('child_process');
const util = require('util');
const execPromise = util.promisify(exec);
const nodemailer = require('nodemailer');
const { pool } = require('../config/db');

// 配置常量
const config = {
    emailHost: process.env.EMAIL_HOST,
    emailPort: process.env.EMAIL_PORT,
    emailUser: process.env.EMAIL_USER,
    emailPass: process.env.EMAIL_PASS,
    htmlDir: process.env.HTML_BASE,
    mediaModel: process.env.MEDIA_MODEL
};

// --- Multer 配置 ---
const storage = multer.diskStorage({
    destination: (req, file, cb) => {
        const jobId = req.jobId;
        // 注意：这里的 __dirname 现在是 routes 文件夹，需要往上退一级
        const dynamicUploadDir = path.join(__dirname, '..', 'results', jobId, 'uploads');
        fs.mkdirSync(dynamicUploadDir, { recursive: true });
        cb(null, dynamicUploadDir)
    },
    filename: (req, file, cb) => {
        cb(null, file.originalname);
    }
});
const upload = multer({ storage }).array('files', 30);

// --- 任务管理 (内存中) ---
const jobs = new Map();

// --- 辅助函数 ---
async function ensureDirectoryExists(dirPath) {
    if (!fs.existsSync(dirPath)) {
        await fs.promises.mkdir(dirPath, { recursive: true });
    }
}

// 邮件发送功能
async function sendResultsEmail(analysisId, email, jobId, analysisType) {
    try {
        const resultFileMap = {
            'ph': 'ph_predict/pHpredict.csv',
            'nutrition': 'media_predict/generateMedia.csv',
            'tempOxygen': 'tempo2predict/tempo2predict.csv',
            'growth': 'growth_predict/growthPrediction.csv'
        };

        // Configure the email transmitter
        const transporter = nodemailer.createTransport({
            host: config.emailHost,
            port: config.emailPort,
            secure: true,
            auth: {
                user: config.emailUser,
                pass: config.emailPass
            }
        });

        // Email content
        const mailOptions = {
            from: config.emailUser,
            to: email,
            subject: `Analysis Results - Job ID: ${analysisId}`,
            text: `Your analysis has been completed. Please find the results attached.\n\nJob ID: ${analysisId}`,
            html: `
                <h2>Analysis Results</h2>
                <p>Your analysis has been completed successfully.</p>
                <p><strong>Job ID:</strong> ${analysisId}</p>
                <p>Please find the results attached as a CSV file.</p>
            `,
            attachments: [
                {
                    filename: `analysis_result_${analysisId}.csv`,
                    path: path.join(__dirname, 'results', jobId, resultFileMap[analysisType]),
                    contentType: 'text/csv'
                }
            ]
        };

        // Send an email
        const info = await transporter.sendMail(mailOptions);
        console.log('Email sent successfully:', info.messageId);
        return { success: true, messageId: info.messageId };
    } catch (error) {
        console.error('Error sending email:', error);
        throw error;
    }
}
// --- Docker 执行脚本逻辑 ---
// ... (保留 executePhpredictScripts, executeTempOxygenScripts 等函数) ...
// 务必将这些函数中的 __dirname 替换为 path.join(__dirname, '..') 以指向项目根目录

// Execute pH prediction script (ph-pred image)
async function executePhpredictScripts(jobId) {
    const inputDir = path.join(__dirname, 'results', jobId);
    // const outputDir = path.join(__dirname, 'results', jobId, 'ph_predict');
    await ensureDirectoryExists(inputDir);

    // await ensureDirectoryExists(outputDir);
    // Docker command: mount input/output directories and run script with specified image
    const command = `docker run --rm \
                            -v "${config.htmlDir}/results/${jobId}:${inputDir}" \
                            -v "${config.htmlDir}/scripts":/app/scripts \
                            media-ph-pred:v1 \
                            bash -c "/opt/conda/bin/conda run -n ph_model_jupyter  bash /app/scripts/pHPredict.sh ${inputDir}/prokka_annotation ${inputDir}/ph_predict"`;

    console.log(`Execute pH prediction command: ${command}`);
    const result = await execPromise(command);

    // Check result file existence
    const resultFile = `${inputDir}/ph_predict/pHpredict.csv`;
    if (!fs.existsSync(resultFile)) {
        throw new Error(`The pH prediction result file does not exist: ${resultFile}`);
    }

    return {
        script: 'pHPredict.sh',
        inputDir,
        resultFile,
        ...result
    };
}

// Execute temperature and oxygen prediction script (bacdive-ai image)
async function executeTempOxygenScripts(jobId) {
  const inputDir = path.join(__dirname, 'results', jobId);
  await ensureDirectoryExists(inputDir);

  // Docker command: mount directories and use bacdive-ai image
  const command = `docker run --rm \
                        -v "${config.htmlDir}/results/${jobId}:${inputDir}" \
                        -v "${config.htmlDir}/scripts":/app/scripts \
                        -w /app \
                        media-bacdiveai:v1 \
                        bash -c "source /etc/profile && source /opt/conda/etc/profile.d/conda.sh && conda activate bacdiveai && pip install tqdm && bash /app/scripts/TempO2Predict.sh ${inputDir}/prokka_annotation ${inputDir}/tempo2predict"`;

  console.log(`Execute temperature and oxygen prediction command: ${command}`);
  const result = await execPromise(command);

  // Check result file existence
  const resultFile = `${inputDir}/tempo2predict/tempo2predict.csv`;
  if (!fs.existsSync(resultFile)) {
    throw new Error(`The temperature and oxygen prediction result file does not exist: ${resultFile}`);
  }

  return {
    script: 'TempO2Predict.sh',
    inputDir,
    resultFile,
    ...result
  };
}

// Execute growth prediction script (shengwei/grodon2 image)
async function executeGrowthScripts(jobId) {
    const inputDir = path.join(__dirname, 'results', jobId);
    await ensureDirectoryExists(inputDir);

    // Docker command: run script with grodon2 image
    const command = `docker run --rm \
                            -e PATH='/usr/local/bin:/usr/bin:/bin:/opt/conda/lib/R/bin' \
                            -v "${config.htmlDir}/results/${jobId}:${inputDir}" \
                            -v "${config.htmlDir}/scripts":/app/scripts \
                            -w /app \
                            media-grodon2:v1 \
                            bash -c "bash /app/scripts/GrowthPredict.sh ${inputDir}/prokka_annotation ${inputDir}/growth_predict"`

    console.log(`Execute growth rate prediction command: ${command}`);
    const result = await execPromise(command);

    // Check result file existence
    const resultFile = `${inputDir}/growth_predict/growthPrediction.csv`;
    if (!fs.existsSync(resultFile)) {
        throw new Error(`The growth rate prediction result file does not exist: ${resultFile}`);
    }

    return {
        script: 'GrowthPredict.sh',
        resultFile,
        ...result
    };
}

// Execute culture medium prediction script
async function executeNutritionScripts(jobId) {
    const inputDir = path.join(__dirname, 'results', jobId);
    await ensureDirectoryExists(inputDir);

    // Docker command: start miniconda3, activate specified environment and run script
    const command = `docker run --rm \
                            -v "${config.htmlDir}/results/${jobId}:${inputDir}" \
                            -v "${config.mediaModel}":/app/scripts \
                            -w /app \
                            media-transformer:v2 \
                            bash -c "source /opt/conda/etc/profile.d/conda.sh && conda activate uncultured_pytorch && pip install sentencepiece --root-user-action=ignore && bash /app/scripts/mediaPredict.sh ${inputDir}/prokka_annotation ${inputDir}/media_predict"`

    console.log(`Execute culture medium prediction command: ${command}`);
    const result = await execPromise(command);

    // Check result file existence
    const resultFile = `${inputDir}/media_predict/generateMedia.csv`;
    if (!fs.existsSync(resultFile)) {
        throw new Error(`The culture medium prediction result file does not exist: ${resultFile}`);
    }

    return {
        script: 'mediaPredict.sh',
        inputDir,
        resultFile,
        ...result
    };
}


// 主执行函数 (简化版展示结构)
// async function executePythonScript(job) {
//     // ... (保留原有逻辑) ...
// }

// Main execution function
async function executePythonScript(job) {
  try {
    const inputDir = path.join(__dirname, 'results', job.id);
    const hostInputDir = `${config.htmlDir}/results/${job.id}`;
    const hostDir = `${config.htmlDir}`;
    await ensureDirectoryExists(inputDir);
    await ensureDirectoryExists(hostInputDir);
    const analysisType = job.analysis_type;

    // Update job status
    job.status = 'analyzing';
    job.progress = 10; // Reduced initial progress to account for Docker step
    console.log(`Starting Prokka annotation for task ID: ${job.id}`);

    // Execute Docker Prokka command
    const dockerCommand = `docker run --rm \
        -v "${hostInputDir}:${inputDir}" \
        -v "${hostDir}/scripts/:/app/scripts" \
        -w /app \
        media-prokka:v1 \
        ./scripts/prokka.sh --output ${inputDir}/prokka_annotation --input ${inputDir}/uploads`;

    console.log(`Executing Docker command: ${dockerCommand}`);
    const { stdout, stderr } = await execPromise(dockerCommand);

    // Log Docker execution results
    if (stdout) console.log(`Prokka Docker output: ${stdout}`);
    if (stderr) console.warn(`Prokka Docker warnings: ${stderr}`);

    // Update progress after Docker completion
    job.progress = 60;
    console.log(`Starting analysis [${analysisType}] for task ID: ${job.id}`);

    // Mapping of analysis types to execution functions
    const executionMap = {
      'ph': executePhpredictScripts,
      'nutrition': executeNutritionScripts,
      'tempOxygen': executeTempOxygenScripts,
      'growth': executeGrowthScripts
    };

    // Check if analysis type is supported
    if (!executionMap.hasOwnProperty(analysisType)) {
      throw new Error(`Unsupported analysis type: ${analysisType}`);
    }

    // Execute corresponding script
    const scriptResult = await executionMap[analysisType](job.id);
    console.log(`${analysisType} script execution completed, result file: ${scriptResult.resultFile}`);

    // Read result file content (parse according to CSV format)
    let parsedResult = {};
    try {
      const csvContent = await fs.promises.readFile(scriptResult.resultFile, 'utf8');
      const lines = csvContent.trim().split('\n');
      const headers = lines[0].split(',').map(h => h.trim());
      const values = lines[1]?.split(',') || [];

      // Convert to key-value pairs
      headers.forEach((header, index) => {
        parsedResult[header] = values[index]?.trim() || '';
      });
    } catch (readError) {
      console.error(`Failed to read result file:`, readError);
      parsedResult = { error: 'Result file parsing failed' };
    }

    // Update progress
    job.progress = 80;

    // Build job result
    job.result = {
      analysis_id: job.id,
      analysis_type: analysisType,
      input_files_count: job.files.length,
      input_files: job.files.map(f => f.fileName),
      output_directory: scriptResult.outputDir,
      result_file: scriptResult.resultFile,
      analysis_time: Date.now(),
      script_output: scriptResult.stdout.trim(),
      docker_output: stdout.trim(), // Add Docker output to results
      ...(scriptResult.stderr && { script_stderr: scriptResult.stderr.trim() }),
      ...(stderr && { docker_stderr: stderr.trim() }), // Add Docker errors to results
      ...parsedResult // Merge CSV parsing results
    };

    if (job.email) {
      try {
        await sendResultsEmail(job.id, job.email, job.id,  job.analysis_type);
        console.log(`Result email sent to ${job.email}`);
      } catch (emailError) {
        console.error('Email sending failed:', emailError);
      }
    }

    // Complete job
    job.status = 'completed';
    job.progress = 100;
    return job;

  } catch (error) {
    console.error(`Task ${job.id} failed:`, error);
    job.status = 'failed';
    job.error = error.message || 'Unknown error';
    job.progress = 0;
    return job;
  }
}

async function startJob(job) {
    job.status = 'queued';
    job.progress = 0;
    job.eta_seconds = 10;
    setTimeout(async () => { await executePythonScript(job); }, 1000);
}

// --- API 路由定义 ---

// 1. 上传 API
router.post('/upload', (req, res, next) => {
    req.jobId = nanoid(12);
    next();
}, upload, async (req, res) => {
    // ... (保留原有 /api/upload 逻辑) ...
    // 注意：引用 jobs Map

});

router.post('/upload', (req, res, next) => {
    req.jobId = nanoid(12);
    next();
}, upload, async (req, res) => {
    try {
        if (!req.files || req.files.length === 0) {
            return res.status(400).json({ success: false, error: 'No files uploaded' });
        }
        const analysisType = req.body.analysis_type || 'nutrition';
        const email = req.body.email || null;
        const id = req.jobId;
        const job = {
            id,
            analysis_type: analysisType,
            files: req.files.map(file => ({
                filePath: file.path,
                fileName: file.originalname,
                fileSize: file.size,
                mimeType:file.mimetype
            })),
            email: email,
            created_at: Date.now(),
            status: 'created',
            progress: 0,
            file_count: req.files.length
        };
        jobs.set(id, job);
        startJob(job);
        res.json({
            success: true,
            analysis_id: id,
            file_count: req.files.length,
            message: `Analysis job started successfully!`
        });
    } catch (e) {
        console.error(e);
        res.status(500).json({ success: false, error: e.message || 'Upload failed' });
    }
});


// 2. 状态 API
router.get('/status/:id', (req, res) => {
    const id = req.params.id;
    const job = jobs.get(id);
    if (!job) return res.status(404).json({ success: false, error: 'Job does not exist' });
    const response = {
        success: true,
        status: job.status,
        progress: job.progress,
        queue_position: job.status === 'queued' ? 1 : 0,
        eta_seconds: job.eta_seconds,
        file_count: job.file_count || 0,
        files:job.files ? job.files.map(f => f.fileName) : []
    };
    if (job.status === 'completed' && job.result) response.result = job.result;
    if (job.status === 'failed' && job.error) response.error = job.error;
    res.json(response);
});

// 3. 停止任务 API
router.post('/stop/:id', async (req, res) => {
    const jobId = req.params.id;
    const job = jobs.get(jobId);

    if (!job) {
        return res.status(404).json({ success: false, error: 'Job not found' });
    }

    // No need to stop if task is already completed or failed
    if (job.status === 'completed' || job.status === 'failed') {
        return res.json({
            success: true,
            message: `Job ${jobId} is already ${job.status}`
        });
    }

    try {
        // 1. Update task status
        job.status = 'failed';
        job.error = 'Analysis stopped by user';
        job.progress = 0;
        jobs.set(jobId, job);

        // 2. Terminate related Docker containers (critical step)
        // Get container IDs related to current task
        const getContainersCmd = `docker ps --filter "volume=${config.htmlDir}/results/${jobId}" --format "{{.ID}}"`;
        const { stdout: containerIds } = await execPromise(getContainersCmd);

        if (containerIds) {
            const containers = containerIds.trim().split('\n');
            for (const containerId of containers) {
                if (containerId) {
                    console.log(`Stopping container ${containerId} for job ${jobId}`);
                    await execPromise(`docker stop ${containerId}`);
                    console.log(`Container ${containerId} stopped`);
                }
            }
        }

        res.json({
            success: true,
            message: `Job ${jobId} stopped successfully`
        });
    } catch (error) {
        console.error(`Error stopping job ${jobId}:`, error);
        res.status(500).json({
            success: false,
            error: `Failed to stop job: ${error.message}`
        });
    }
});

// 4. 下载 API
router.get('/download/:id', (req, res) => {
    const id = req.params.id;
    const job = jobs.get(id);
    if (!job) return res.status(404).json({ success: false, error: 'Job does not exist' });
    if (job.status !== 'completed') return res.status(400).json({ success: false, error: 'Job not completed' });
    res.setHeader('Content-Type', 'text/csv; charset=utf-8');
    res.setHeader('Content-Disposition', `attachment; filename=analysis_result_${id}_${job.file_count}files.csv`);
    res.send(job.csv || '');
});

// 5. 邮件 API
router.post('/send-results-email', async (req, res) => {
    try {
        const { analysis_id, email } = req.body;

        if (!analysis_id || !email) {
            return res.status(400).json({ success: false, error: 'analysis_id and email are required' });
        }

        const job = jobs.get(analysis_id);
        if (!job) {
            return res.status(404).json({ success: false, error: 'Job does not exist' });
        }

        if (job.status !== 'completed') {
            return res.status(400).json({ success: false, error: 'Job not completed yet' });
        }

        if (!job.csv) {
            return res.status(400).json({ success: false, error: 'No results available' });
        }

        await sendResultsEmail(analysis_id, email, job.id, job.analysis_type);

        res.json({ success: true, message: 'Email sent successfully' });
    } catch (error) {
        console.error('Error in send-results-email endpoint:', error);
        res.status(500).json({ success: false, error: error.message || 'Failed to send email' });
    }
});

// 6. 浏览数据 API (Browse)
router.get('/browse', async (req, res) => {
    try {
        console.log('Received a browsing request:', req.query);
        const { page = 1, pageSize = 20, search = '', ph = '', temperature = '', o2 = '', taxonomy = '', taxonomyValue = '', microbial = '', chartData = false, cultured_type = 'cultured' } = req.query;
        const offset = (page - 1) * pageSize;

        // Build query conditions
        let whereConditions = [];
        let queryParams = [];

        // Add cultured_type filter (default to 'cultured' if not specified)
        if (cultured_type) {
            whereConditions.push('m.cultured_type = ?');
            queryParams.push(cultured_type);
        }

        // Search conditions
        if (search) {
            whereConditions.push('(m.microbial_name LIKE ? OR m.taxonomy LIKE ?)');
            queryParams.push(`%${search}%`, `%${search}%`);
        }

        // pH range filtering - use predict_pH, separate predicted value and probability by the last space
        if (ph) {
            // Extract the predicted value part of predict_pH (content before the last space; use the entire string if no space exists)
            const phPredictionExpr = `TRIM(CASE
                WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
                    SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END)
                ELSE NULL
            END)`;

            if (ph.includes('-')) {
                const [phMin, phMax] = ph.split('-').map(Number);
                whereConditions.push(`${phPredictionExpr} IS NOT NULL AND CAST(${phPredictionExpr} AS DECIMAL(4,2)) >= ? AND CAST(${phPredictionExpr} AS DECIMAL(4,2)) <= ?`);
                queryParams.push(phMin, phMax);
            } else {
                const targetPh = parseFloat(ph);
                whereConditions.push(`${phPredictionExpr} IS NOT NULL AND CAST(${phPredictionExpr} AS DECIMAL(4,2)) >= ? AND CAST(${phPredictionExpr} AS DECIMAL(4,2)) <= ?`);
                queryParams.push(targetPh - 0.5, targetPh + 0.5);
            }
        }

        // Temperature category filtering - use predict_temperature, separate predicted value and probability by the last space
        if (temperature) {
            // Extract the predicted value part of predict_temperature (content before the last space; use the entire string if no space exists)
            const tempPredictionExpr = `TRIM(CASE
                WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
                    SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END)
                ELSE NULL
            END)`;
            whereConditions.push(`${tempPredictionExpr} = ?`);
            queryParams.push(temperature);
        }

        // O2 type filtering - use predict_o2, separate predicted value and probability by the last space
        if (o2) {
            // Extract the predicted value part of predict_o2 (content before the last space; use the entire string if no space exists)
            const o2PredictionExpr = `TRIM(CASE
                WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
                    SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END)
                ELSE NULL
            END)`;
            whereConditions.push(`${o2PredictionExpr} LIKE ?`);
            queryParams.push(`%${o2}%`);
        }

        // Taxonomy classification filtering
        if (taxonomy && taxonomy !== 'ALL' && taxonomy !== '') {
            const validTaxonomyLevels = ['Bacteria', 'Domain', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'];
            if (validTaxonomyLevels.includes(taxonomy)) {
                let taxonomyField = '';
                switch(taxonomy) {
                    case 'Bacteria':
                    case 'Domain':
                        taxonomyField = 'm.domain';
                        break;
                    case 'Phylum':
                        taxonomyField = 'm.phylum';
                        break;
                    case 'Class':
                        taxonomyField = 'm.class';
                        break;
                    case 'Order':
                        taxonomyField = 'm.order';
                        break;
                    case 'Family':
                        taxonomyField = 'm.family';
                        break;
                    case 'Genus':
                        taxonomyField = 'm.genus';
                        break;
                    case 'Species':
                        taxonomyField = 'm.species';
                        break;
                }
                if (taxonomyField) {
                    // If taxonomyValue is provided, filter by value; otherwise only check non-empty
                    if (taxonomyValue && taxonomyValue.trim() !== '') {
                        whereConditions.push(`${taxonomyField} LIKE ?`);
                        queryParams.push(`%${taxonomyValue.trim()}%`);
                    } else {
                        whereConditions.push(`${taxonomyField} IS NOT NULL AND ${taxonomyField} != ''`);
                    }
                }
            }
        }

        // Microbial name filtering
        if (microbial && microbial.trim() !== '') {
            whereConditions.push('m.microbial_name LIKE ?');
            queryParams.push(`%${microbial.trim()}%`);
        }

        // Build WHERE clause
        const whereClause = whereConditions.length > 0 ? `WHERE ${whereConditions.join(' AND ')}` : '';

        // Get total count
        const countQuery = `
            SELECT COUNT(DISTINCT m.species_dive_id) as total
            FROM cultured_data m
            ${whereClause}
        `;
        console.log('Execute count query:', countQuery, 'parameter:', queryParams);
        const [countResult] = await pool.execute(countQuery, queryParams);
        const total = countResult[0].total;
        console.log('Total number of records:', total);

        // Get paginated data
        // Extract predicted values and probabilities from predict_pH, predict_temperature, predict_o2 (separated by the last space)
        const dataQuery = `
            SELECT
                m.species_dive_id,
                m.microbial_name,
                m.nutrition,
                CONCAT(m domain) as domain,
                m.pH as pH,
                m.temperature as temperature,
                m.o2 as o2,
                m.predict_pH as ppH_raw,
                m.predict_temperature as ptemperature_raw,
                m.predict_o2 as po2_raw,
                -- Extract predicted value of predict_pH (before the last space)
                CASE
                    WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
                        TRIM(SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END))
                    ELSE NULL
                END as ppH,
                -- Extract probability of predict_pH (after the last space)
                CASE
                    WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' AND LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN
                        TRIM(SUBSTRING(m.predict_pH, LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) + 2))
                    ELSE NULL
                END as ppH_probability,
                -- Extract predicted value of predict_temperature (before the last space)
                CASE
                    WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
                        TRIM(SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END))
                    ELSE NULL
                END as ptemperature,
                -- Extract probability of predict_temperature (after the last space)
                CASE
                    WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' AND LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN
                        TRIM(SUBSTRING(m.predict_temperature, LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) + 2))
                    ELSE NULL
                END as ptemperature_probability,
                -- Extract predicted value of predict_o2 (before the last space)
                CASE
                    WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
                        TRIM(SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END))
                    ELSE NULL
                END as po2,
                -- Extract probability of predict_o2 (after the last space)
                CASE
                    WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' AND LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN
                        TRIM(SUBSTRING(m.predict_o2, LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) + 2))
                    ELSE NULL
                END as po2_probability,
                m.genome
            FROM cultured_data m
            ${whereClause}
            GROUP BY m.species_dive_id, m.microbial_name, m.pH, m.temperature, m.o2, m domain, m.phylum, m.class, m.order, m.family, m.genus, m.species, m.nutrition, m.genome, m.predict_pH, m.predict_temperature, m.predict_o2
            ORDER BY m.species_dive_id
            LIMIT ${parseInt(pageSize)} OFFSET ${parseInt(offset)}
        `;

        console.log('Execute data query:', dataQuery, 'Parameters:', queryParams);
        const [rows] = await pool.execute(dataQuery, queryParams);
        console.log('Query result row count:', rows.length);

        // Format data to match frontend expected format
        // Use extracted predicted values; if predicted values don't exist, use original values
        // Also return predicted value fields (ppH, ptemperature, po2) for frontend direct use
        const formattedData = rows.map((row, index) => ({
            id: row.species_dive_id,
            microbial_name: row.microbial_name || '',
            nutrition: row.nutrition || '',
            domain: row.domain || '',
            pH: row.ppH ? row.ppH.toString() : (row.pH ? row.pH.toString() : ''),
            pH_probability: row.ppH_probability || null,
            temperature: row.ptemperature ? extractTemperatureCategory(row.ptemperature.split(',')[0].trim()) : (row.temperature ? extractTemperatureCategory(row.temperature.split(',')[0].trim()) : ''),
            temperature_probability: row.ptemperature_probability || null,
            o2: row.po2 ? extractO2Category(row.po2) : (row.o2 ? extractO2Category(row.o2) : ''),
            o2_probability: row.po2_probability || null,
            // Return predicted value fields for frontend direct use
            ppH: row.ppH || null,
            ptemperature: row.ptemperature || null,
            po2: row.po2 || null
        }));

        // If Sankey chart data is needed, build Sankey chart data
        let sankeyData = null;
        if (chartData === 'true') {
            try {
                console.log('Start building Sankey chart data, whereClause:', whereClause, 'queryParams:', queryParams);
                sankeyData = await buildSankeyData(whereClause, queryParams);
                console.log('Sankey chart data built successfully');
            } catch (error) {
                console.error('Error building Sankey chart data:', error);
                console.error('Error stack:', error.stack);
                // Return empty Sankey chart data instead of null
                sankeyData = { nodes: [], links: [] };
            }
        }

        res.json({
            success: true,
            data: formattedData,
            pagination: {
                currentPage: parseInt(page),
                pageSize: parseInt(pageSize),
                total: total,
                totalPages: Math.ceil(total / pageSize)
            },
            sankeyData: sankeyData
        });

    } catch (error) {
        console.error('Error getting data:', error);
        console.error('Error stack:', error.stack);

        // If it is a database connection error, return more detailed error information
        if (error.code === 'ECONNREFUSED' || error.code === 'ER_ACCESS_DENIED_ERROR') {
            res.status(500).json({
                success: false,
                message: 'Database connection failed',
                error: error.message,
                suggestion: 'Please check if MySQL service is running and database connection configuration is correct'
            });
        } else if (error.code === 'ER_NO_SUCH_TABLE') {
            res.status(500).json({
                success: false,
                message: 'Database table does not exist',
                error: error.message,
                suggestion: 'Please check if the medium table exists in the transformerMedia database'
            });
        } else {
            res.status(500).json({
                success: false,
                message: 'Failed to get data',
                error: error.message,
                code: error.code
            });
        }
    }
});

// 7. 详情 API (Microbial Detail)
router.get('/microbial-detail', async (req, res) => {
    try {
        const { name, level = 'genus' } = req.query;
        if (!name) {
            return res.status(400).json({ success: false, message: 'Microbial name cannot be empty' });
        }

        console.log('Get microbial detail information:', name, 'Taxonomy Level:', level);

        // Define allowed levels for safety
        const allowedLevels = ['genus', 'family', 'order', 'class', 'phylum'];
        const safeLevel = allowedLevels.includes(level.toLowerCase()) ? level.toLowerCase() : 'genus';

        // First try exact match
        let detailQuery = `
            SELECT
                m.species_dive_id,
                m.microbial_name,
                m.domain,
                m.phylum,
                m.class,
                m.\`order\`,
                m.family,
                m.genus,
                m.species,
                m.taxonomy,
                m.pH as pH,
                m.temperature as temperature,
                m.o2 as o2,
                m.gram_stain,
                m.synonym,
                m.genome,
                m.nutrition,
                TRIM(CASE
                    WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
                        SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END)
                    ELSE NULL
                END) as ppH,
                CASE
                    WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' AND LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN
                        TRIM(SUBSTRING(m.predict_pH, LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) + 2))
                    ELSE NULL
                END as ppH_probability,
                TRIM(CASE
                    WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
                        SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END)
                    ELSE NULL
                END) as ptemperature,
                CASE
                    WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' AND LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN
                        TRIM(SUBSTRING(m.predict_temperature, LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) + 2))
                    ELSE NULL
                END as ptemperature_probability,
                TRIM(CASE
                    WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
                        SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END)
                    ELSE NULL
                END) as po2,
                CASE
                    WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' AND LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN
                        TRIM(SUBSTRING(m.predict_o2, LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) + 2))
                    ELSE NULL
                END as po2_probability
            FROM cultured_data m
            WHERE m.microbial_name = ?
            LIMIT 1
        `;

        let [rows] = await pool.execute(detailQuery, [name]);

        // If exact match fails, try fuzzy match
        if (rows.length === 0) {
            detailQuery = detailQuery.replace('m.microbial_name = ?', 'm.microbial_name LIKE ?');
            [rows] = await pool.execute(detailQuery, [`%${name}%`]);
        }

        if (rows.length === 0) {
            return res.json({ success: false, message: 'Microbial details not found' });
        }

        const microbialData = rows[0];
        const levelValue = microbialData[safeLevel]; // Get the value for the requested taxonomy level (e.g., the family name)

        let nutritionInfo = microbialData.nutrition || '';
        let relatedRows = [];

        // Check if the taxonomy level value exists (e.g., if we want family, ensure family is not null)
        if (levelValue) {
            try {
                // Use the safeLevel to build the query dynamically.
                // Since safeLevel is checked against a whitelist, this is safe from SQL injection.
                // We handle 'order' specially because it is a reserved keyword in SQL.
                const columnRef = safeLevel === 'order' ? '`order`' : safeLevel;

                const relatedQuery = `
                    SELECT
                        m.species_dive_id,
                        m.microbial_name,
                        m.domain,
                        m.phylum,
                        m.class,
                        m.\`order\`,
                        m.family,
                        m.genus,
                        m.species,
                        m.taxonomy,
                        m.pH as pH,
                        m.temperature as temperature,
                        m.o2 as o2,
                        m.gram_stain,
                        m.synonym,
                        m.genome,
                        m.nutrition,
                        m.cultured_type,
                        TRIM(CASE
                            WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
                                SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END)
                            ELSE NULL
                        END) as ppH,
                        TRIM(CASE
                            WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
                                SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END)
                            ELSE NULL
                        END) as ptemperature,
                        TRIM(CASE
                            WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
                                SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END)
                            ELSE NULL
                        END) as po2
                    FROM cultured_data m
                    WHERE m.${columnRef} = ? AND m.microbial_name != ?
                    ORDER BY m.microbial_name
                `;

                // console.log(`Executing related query for level ${safeLevel}: ${levelValue}`);
                const [relatedResult] = await pool.execute(relatedQuery, [levelValue, microbialData.microbial_name]);
                relatedRows = relatedResult;
                // console.log('Related microbes found:', relatedRows.length);
            } catch (relatedError) {
                console.error('Related query failed:', relatedError);
            }
        }

        const responseData = {
            ...microbialData,
            pH: microbialData.pH,
            temperature: microbialData.temperature,
            o2: microbialData.o2,
            ppH: microbialData.ppH || null,
            ptemperature: microbialData.ptemperature || null,
            po2: microbialData.po2 || null,
            ppH_probability: microbialData.ppH_probability || null,
            ptemperature_probability: microbialData.ptemperature_probability || null,
            po2_probability: microbialData.po2_probability || null,
            nutrition: nutritionInfo,
            culture_type: microbialData.cultured_type,
            related_microbes: relatedRows.map(row => ({
                ...row,
                pH: row.pH,
                temperature: row.temperature,
                o2: row.o2,
                ppH: row.ppH || null,
                ptemperature: row.ptemperature || null,
                po2: row.po2 || null
            }))
        };

        res.json({
            success: true,
            data: responseData
        });

    } catch (error) {
        console.error('Error fetching details:', error);
        res.status(500).json({
            success: false,
            message: 'Internal Server Error',
            error: error.message
        });
    }
});

// 8. 统计数据 API (Charts)
router.get('/taxonomy-stats', async (req, res) => {
    try {
        const { level = 'phylum', cultured_type = 'cultured' } = req.query;

        // Validate taxonomy level
        const validLevels = ['phylum', 'class', 'order', 'family', 'genus', 'species'];
        if (!validLevels.includes(level)) {
            return res.status(400).json({
                success: false,
                message: 'Invalid taxonomy level',
                validLevels: validLevels
            });
        }

        // First get the total number of classifications for this taxonomy level
        const countQuery = `
            SELECT COUNT(DISTINCT \`${level}\`) as total_count
            FROM cultured_data m
            WHERE \`${level}\` IS NOT NULL AND \`${level}\` != '' AND m.cultured_type = ?
        `;

        console.log('Execute taxonomy total count query:', countQuery);
        const [countResult] = await pool.execute(countQuery, [cultured_type]);
        const totalCount = countResult[0].total_count;

        // Build query to get detailed information for all classifications
        const query = `
            SELECT
                \`${level}\` as name,
                COUNT(DISTINCT m.species_dive_id) as value
            FROM cultured_data m
            WHERE \`${level}\` IS NOT NULL AND \`${level}\` != '' AND m.cultured_type = ?
            GROUP BY \`${level}\`
            ORDER BY value DESC
        `;

        console.log('Execute taxonomy statistics query:', query);
        const [rows] = await pool.execute(query, [cultured_type]);

        const data = rows.map(row => ({
            name: row.name,
            value: parseInt(row.value)
        }));

        res.json({
            success: true,
            data: data,
            level: level,
            total: totalCount,  // Return the total number of classifications for this taxonomy level
            displayed: data.length  // Return the actual number of classifications displayed (now equals total)
        });

    } catch (error) {
        console.error('Error getting taxonomy statistics:', error);
        res.status(500).json({
            success: false,
            message: 'Failed to get taxonomy statistics',
            error: error.message
        });
    }
});
router.get('/physchem-stats', async (req, res) => {
    try {
        const { type = 'o2', cultured_type = 'cultured' } = req.query;

        // Validate type
        const validTypes = ['o2', 'ph', 'temperature'];
        if (!validTypes.includes(type)) {
            return res.status(400).json({
                success: false,
                message: 'Invalid physical and chemical properties type',
                validTypes: validTypes
            });
        }

        let data = {};

        if (type === 'o2') {
            // O2 type statistics (extract category only)
            const query = `
                SELECT
                    CASE
                        WHEN m.o2 IS NOT NULL AND m.o2 != '' THEN
                            SUBSTRING(m.o2, 1, LOCATE(' ', CONCAT(m.o2, ' ')) - 1)
                        ELSE m.o2
                    END as name,
                    COUNT(DISTINCT m.species_dive_id) as count
                FROM cultured_data m
                WHERE m.o2 IS NOT NULL AND m.o2 != '' AND m.cultured_type = ?
                GROUP BY name
                ORDER BY count DESC
            `;

            const [rows] = await pool.execute(query, [cultured_type]);
            const total = rows.reduce((sum, row) => sum + parseInt(row.count), 0);

            data = {
                aerobe: 0,
                anaerobe: 0,
                facultative: 0,
                total: total
            };

            rows.forEach(row => {
                const name = row.name.toLowerCase();
                const count = parseInt(row.count);

                if (name.includes('aerobe') && !name.includes('anaerobe')) {
                    data.aerobe += count;
                } else if (name.includes('anaerobe')) {
                    data.anaerobe += count;
                } else if (name.includes('facultative')) {
                    data.facultative += count;
                }
            });

        } else if (type === 'ph') {
            // pH distribution statistics
            const query = `
                SELECT
                    CASE
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 1 AND 2 THEN 1
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 2 AND 3 THEN 2
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 3 AND 4 THEN 3
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 4 AND 5 THEN 4
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 5 AND 6 THEN 5
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 6 AND 7 THEN 6
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 7 AND 8 THEN 7
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 8 AND 9 THEN 8
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 9 AND 10 THEN 9
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 10 AND 11 THEN 10
                        WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 11 AND 12 THEN 11
                        ELSE 12
                    END as ph_range,
                    COUNT(DISTINCT m.species_dive_id) as count
                FROM cultured_data m
                WHERE m.pH IS NOT NULL AND m.pH != '' AND m.pH != 'Unknown' AND m.cultured_type = ?
                GROUP BY ph_range
                ORDER BY ph_range
            `;

            const [rows] = await pool.execute(query, [cultured_type]);
            data = rows.map(row => ({
                x: parseInt(row.ph_range),
                count: parseInt(row.count)
            }));

        } else if (type === 'temperature') {
            // Temperature category distribution statistics
            const query = `
                SELECT
                    CASE
                        WHEN m.temperature IS NOT NULL AND m.temperature != '' THEN
                            SUBSTRING(m.temperature, 1, LOCATE(' ', CONCAT(m.temperature, ' ')) - 1)
                        ELSE m.temperature
                    END as temp_category,
                    COUNT(DISTINCT m.species_dive_id) as count
                FROM cultured_data m
                WHERE m.temperature IS NOT NULL AND m.temperature != '' AND m.temperature != 'Unknown' AND m.cultured_type = ?
                GROUP BY temp_category
                ORDER BY count DESC
            `;

            const [rows] = await pool.execute(query, [cultured_type]);
            data = rows.map(row => ({
                category: row.temp_category,
                count: parseInt(row.count)
            }));
        }

        res.json({
            success: true,
            data: data,
            type: type
        });

    } catch (error) {
        console.error('Error getting physical and chemical properties statistics:', error);
        res.status(500).json({
            success: false,
            message: 'Failed to get physical and chemical properties statistics',
            error: error.message
        });
    }
});

router.get('/sunburst-stats', async (req, res) => {
        try {
        const { taxonomy = '', microbial = '', ph = '', temperature = '', o2 = '', cultured_type = 'cultured' } = req.query;

        // Build query conditions (same logic as browse interface)
        let whereConditions = [];
        let queryParams = [];

        // Add cultured_type filter
        if (cultured_type) {
            whereConditions.push('m.cultured_type = ?');
            queryParams.push(cultured_type);
        }

        // Search conditions
        if (taxonomy) {
            whereConditions.push('m.taxonomy LIKE ?');
            queryParams.push(`%${taxonomy}%`);
        }

        if (microbial) {
            whereConditions.push('m.microbial_name LIKE ?');
            queryParams.push(`%${microbial}%`);
        }

        // pH range filtering
        if (ph) {
            if (ph.includes('-')) {
                const [phMin, phMax] = ph.split('-').map(Number);
                whereConditions.push('CAST(m.pH AS DECIMAL(4,2)) >= ? AND CAST(m.pH AS DECIMAL(4,2)) <= ?');
                queryParams.push(phMin, phMax);
            } else {
                whereConditions.push('CAST(m.pH AS DECIMAL(4,2)) = ?');
                queryParams.push(Number(ph));
            }
        }

        // Temperature category filtering (extract category only)
        if (temperature) {
            whereConditions.push(`(
                CASE
                    WHEN m.temperature IS NOT NULL AND m.temperature != '' THEN
                        SUBSTRING(m.temperature, 1, LOCATE(' ', CONCAT(m.temperature, ' ')) - 1)
                    ELSE m.temperature
                END
            ) = ?`);
            queryParams.push(temperature);
        }

        // O2 type filtering (extract category only)
        if (o2) {
            whereConditions.push(`(
                CASE
                    WHEN m.o2 IS NOT NULL AND m.o2 != '' THEN
                        SUBSTRING(m.o2, 1, LOCATE(' ', CONCAT(m.o2, ' ')) - 1)
                    ELSE m.o2
                END
            ) LIKE ?`);
            queryParams.push(o2);
        }

        // Build WHERE clause
        const whereClause = whereConditions.length > 0 ? `WHERE ${whereConditions.join(' AND ')}` : '';

        // Get all data for sunburst chart
        const query = `
            SELECT
                CASE
                    WHEN m.o2 IS NOT NULL AND m.o2 != '' THEN
                        SUBSTRING(m.o2, 1, LOCATE(' ', CONCAT(m.o2, ' ')) - 1)
                    ELSE m.o2
                END as o2,
                CASE
                    WHEN m.temperature IS NOT NULL AND m.temperature != '' THEN
                        SUBSTRING(m.temperature, 1, LOCATE(' ', CONCAT(m.temperature, ' ')) - 1)
                    ELSE NULL
                END as temperature,
                CASE
                    WHEN m.pH IS NOT NULL THEN
                        CASE
                            WHEN CAST(m.pH AS DECIMAL(4,2)) >= 1 AND CAST(m.pH AS DECIMAL(4,2)) < 6 THEN 'acidic'
                            WHEN CAST(m.pH AS DECIMAL(4,2)) >= 6 AND CAST(m.pH AS DECIMAL(4,2)) <= 8 THEN 'neutral'
                            WHEN CAST(m.pH AS DECIMAL(4,2)) > 8 AND CAST(m.pH AS DECIMAL(4,2)) <= 12 THEN 'alkaline'
                            ELSE m.pH
                        END
                    ELSE m.pH
                END as ph,
                COUNT(*) as count
            FROM cultured_data m
            ${whereClause}
            GROUP BY o2, temperature, ph
            ORDER BY count DESC
        `;

        console.log('Execute sunburst query:', query, 'Parameters:', queryParams);
        const [rows] = await pool.execute(query, queryParams);

        res.json({
            success: true,
            data: rows,
            total: rows.length
        });

    } catch (error) {
        console.error('Error getting sunburst chart data:', error);
        res.status(500).json({
            success: false,
            message: 'Failed to get sunburst chart data',
            error: error.message
        });
    }
});

router.get('/nutrition-stats', async (req, res) => {
    try {
        const { cultured_type = 'cultured' } = req.query;

        // First get nutrition data for all culture media
        const query = `
            SELECT
                m.nutrition,
                COUNT(DISTINCT m.species_dive_id) as species_count
            FROM cultured_data m
            WHERE m.nutrition IS NOT NULL AND m.nutrition != '' AND m.cultured_type = ?
            GROUP BY m.nutrition
        `;

        console.log('Execute nutrition statistics query:', query);
        const [rows] = await pool.execute(query, [cultured_type]);

        // Parse JSON format nutrition data and count frequency
        const nutritionFrequency = {};
        let totalSpecies = 0;

        rows.forEach(row => {
            const nutritionStr = row.nutrition;
            const speciesCount = parseInt(row.species_count);
            totalSpecies += speciesCount;

            try {
                // Parse Python list format string
                let nutritionList = [];

                if (nutritionStr && nutritionStr.trim()) {
                    // Remove square brackets and split string
                    const cleanStr = nutritionStr.trim().replace(/^\[|\]$/g, '');
                    if (cleanStr) {
                        // Split string, handle content within quotes
                        nutritionList = cleanStr.split(',').map(item => {
                            // Remove quotes and spaces
                            return item.trim().replace(/^['"]|['"]$/g, '');
                        }).filter(item => item.length > 0);
                    }
                }

                // Count usage frequency of each nutrient
                nutritionList.forEach(nutrient => {
                    if (nutrient && nutrient.trim()) {
                        const cleanNutrient = nutrient.trim();
                        // Each culture medium is counted as many times as the number of species using it
                        nutritionFrequency[cleanNutrient] = (nutritionFrequency[cleanNutrient] || 0) + speciesCount;
                    }
                });
            } catch (error) {
                console.warn('Failed to parse nutrition data:', nutritionStr, error);
            }
        });

        // Convert to array format and sort by frequency
        const data = Object.entries(nutritionFrequency)
            .map(([name, value]) => ({ name, value }))
            .sort((a, b) => b.value - a.value);

        console.log(`Parsing completed, found ${Object.keys(nutritionFrequency).length} different nutrients`);
        console.log(`Total species count: ${totalSpecies}`);

        res.json({
            success: true,
            data: data,
            total: data.length,
            totalNutrients: Object.keys(nutritionFrequency).length,
            totalSpecies: totalSpecies
        });

    } catch (error) {
        console.error('Error getting nutrition statistics:', error);
        res.status(500).json({
            success: false,
            message: 'Failed to get nutrition statistics',
            error: error.message
        });
    }
});


router.get('/taxonomy-sankey', async (req, res) => {
    try {
        const { taxonomy = '', microbial = '', ph = '', temperature = '', o2 = '', cultured_type = 'cultured' } = req.query;

        // Build query conditions (same logic as browse interface)
        let whereConditions = [];
        let queryParams = [];

        // Add cultured_type filter
        if (cultured_type) {
            whereConditions.push('m.cultured_type = ?');
            queryParams.push(cultured_type);
        }

        // Taxonomy classification filtering
        if (taxonomy && taxonomy !== 'ALL') {
            const validTaxonomyLevels = ['Domain', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'];
            if (validTaxonomyLevels.includes(taxonomy)) {
                let taxonomyField = '';
                switch(taxonomy) {
                    case 'Domain':
                        taxonomyField = 'm.domain';
                        break;
                    case 'Phylum':
                        taxonomyField = 'm.phylum';
                        break;
                    case 'Class':
                        taxonomyField = 'm.class';
                        break;
                    case 'Order':
                        taxonomyField = 'm.order';
                        break;
                    case 'Family':
                        taxonomyField = 'm.family';
                        break;
                    case 'Genus':
                        taxonomyField = 'm.genus';
                        break;
                    case 'Species':
                        taxonomyField = 'm.species';
                        break;
                }
                if (taxonomyField) {
                    whereConditions.push(`${taxonomyField} IS NOT NULL AND ${taxonomyField} != ''`);
                }
            }
        }

        // Microbial name filtering
        if (microbial) {
            whereConditions.push('m.microbial_name LIKE ?');
            queryParams.push(`%${microbial}%`);
        }

        // pH range filtering
        if (ph) {
            if (ph.includes('-')) {
                const [phMin, phMax] = ph.split('-').map(Number);
                whereConditions.push('CAST(m.pH AS DECIMAL(4,2)) >= ? AND CAST(m.pH AS DECIMAL(4,2)) <= ?');
                queryParams.push(phMin, phMax);
            } else {
                const targetPh = parseFloat(ph);
                whereConditions.push('CAST(m.pH AS DECIMAL(4,2)) >= ? AND CAST(m.pH AS DECIMAL(4,2)) <= ?');
                queryParams.push(targetPh - 0.5, targetPh + 0.5);
            }
        }

        // Temperature category filtering (extract category only)
        if (temperature) {
            whereConditions.push(`(
                CASE
                    WHEN m.temperature IS NOT NULL AND m.temperature != '' THEN
                        SUBSTRING(m.temperature, 1, LOCATE(' ', CONCAT(m.temperature, ' ')) - 1)
                    ELSE m.temperature
                END
            ) = ?`);
            queryParams.push(temperature);
        }

        // O2 type filtering (extract category only)
        if (o2) {
            whereConditions.push(`(
                CASE
                    WHEN m.o2 IS NOT NULL AND m.o2 != '' THEN
                        SUBSTRING(m.o2, 1, LOCATE(' ', CONCAT(m.o2, ' ')) - 1)
                    ELSE m.o2
                END
            ) LIKE ?`);
            queryParams.push(`%${o2}%`);
        }

        const whereClause = whereConditions.length > 0 ? `WHERE ${whereConditions.join(' AND ')}` : '';

        const sankeyData = await buildSankeyData(whereClause, queryParams);

        res.json({
            success: true,
            data: sankeyData
        });

    } catch (error) {
        console.error('Error getting Sankey chart data:', error);
        res.status(500).json({
            success: false,
            message: 'Failed to get Sankey chart data',
            error: error.message
        });
    }
});

// 9. 健康检查
router.get('/health', async (req, res) => {
    try {
        const connection = await pool.getConnection();
        await connection.ping();
        connection.release();
        res.json({ success: true, message: 'Database connection normal' });
    } catch (error) {
        res.status(500).json({ success: false, error: error.message });
    }
});

// 10. 下载数据 API
router.get('/download-data/:type', async (req, res) => {
    try {
        const { type } = req.params;
        const { cultured_type = 'cultured' } = req.query;

        // 定义允许的下载类型
        const allowedTypes = ['all_data', 'ph', 'temperature', 'oxygen', 'culture_medium', 'max_growth_rate'];
        if (!allowedTypes.includes(type)) {
            return res.status(400).json({
                success: false,
                message: 'Invalid download type',
                allowedTypes: allowedTypes
            });
        }

        // 根据类型定义需要查询的字段
        let selectFields = '';
        let headers = [];

        switch (type) {
            case 'all_data':
                selectFields = `
                    m.species_dive_id,
                    m.microbial_name,
                    m.genome,
                    m.domain,
                    m.phylum,
                    m.class,
                    m.\`order\`,
                    m.family,
                    m.genus,
                    m.species,
                    m.taxonomy,
                    m.pH as pH,
                    m.temperature as temperature,
                    m.o2 as o2,
                    m.nutrition,
                    m.gram_stain,
                    m.synonym,
                    m.media_name,
                    m.media_source,
                    m.mediadive_id,
                    TRIM(CASE
                        WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
                            SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END)
                        ELSE NULL
                    END) as ppH,
                    TRIM(CASE
                        WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
                            SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END)
                        ELSE NULL
                    END) as ptemperature,
                    TRIM(CASE
                        WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
                            SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END)
                        ELSE NULL
                    END) as po2
                `;
                headers = ['species_dive_id', 'microbial_name', 'genome', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'taxonomy', 'pH', 'temperature', 'o2', 'nutrition', 'gram_stain', 'synonym', 'media_name', 'media_source', 'mediadive_id', 'predict_pH', 'predict_temperature', 'predict_o2'];
                break;
            case 'ph':
                selectFields = `
                    m.species_dive_id,
                    m.microbial_name,
                    m.genome,
                    m.domain,
                    m.phylum,
                    m.class,
                    m.\`order\`,
                    m.family,
                    m.genus,
                    m.species,
                    m.pH as pH,
                    TRIM(CASE
                        WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
                            SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END)
                        ELSE NULL
                    END) as ppH
                `;
                headers = ['species_dive_id', 'microbial_name', 'genome', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'pH', 'predict_pH'];
                break;
            case 'temperature':
                selectFields = `
                    m.species_dive_id,
                    m.microbial_name,
                    m.genome,
                    m.domain,
                    m.phylum,
                    m.class,
                    m.\`order\`,
                    m.family,
                    m.genus,
                    m.species,
                    m.temperature as temperature,
                    TRIM(CASE
                        WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
                            SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END)
                        ELSE NULL
                    END) as ptemperature
                `;
                headers = ['species_dive_id', 'microbial_name', 'genome', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'temperature', 'predict_temperature'];
                break;
            case 'oxygen':
                selectFields = `
                    m.species_dive_id,
                    m.microbial_name,
                    m.genome,
                    m.domain,
                    m.phylum,
                    m.class,
                    m.\`order\`,
                    m.family,
                    m.genus,
                    m.species,
                    m.o2 as o2,
                    TRIM(CASE
                        WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
                            SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END)
                        ELSE NULL
                    END) as po2
                `;
                headers = ['species_dive_id', 'microbial_name', 'genome', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'o2', 'predict_o2'];
                break;
            case 'culture_medium':
                selectFields = `
                    m.species_dive_id,
                    m.microbial_name,
                    m.genome,
                    m.domain,
                    m.phylum,
                    m.class,
                    m.\`order\`,
                    m.family,
                    m.genus,
                    m.species,
                    m.nutrition,
                    m.media_name,
                    m.media_source,
                    m.mediadive_id
                `;
                headers = ['species_dive_id', 'microbial_name', 'genome', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'nutrition', 'media_name', 'media_source', 'mediadive_id'];
                break;
            case 'max_growth_rate':
                // 注意：如果数据库中没有 max_growth_rate 字段，这里需要根据实际情况调整
                selectFields = `
                    m.species_dive_id,
                    m.microbial_name,
                    m.genome,
                    m.domain,
                    m.phylum,
                    m.class,
                    m.\`order\`,
                    m.family,
                    m.genus,
                    m.species
                `;
                headers = ['species_dive_id', 'microbial_name', 'genome', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species'];
                break;
        }

        // 构建查询
        const query = `
            SELECT ${selectFields}
            FROM cultured_data m
            WHERE m.cultured_type = ?
            GROUP BY ${selectFields.split(',').map(f => f.trim().split(' as ')[0].replace('m.', 'm.')).join(', ')}
            ORDER BY m.species_dive_id
        `;

        console.log('Execute download query for type:', type);
        const [rows] = await pool.execute(query, [cultured_type]);
        console.log('Download query result row count:', rows.length);

        // 将数据转换为 CSV 格式
        const escapeCSVValue = (value) => {
            if (value === null || value === undefined) return '';
            const str = String(value);
            if (str.includes(',') || str.includes('"') || str.includes('\n')) {
                return `"${str.replace(/"/g, '""')}"`;
            }
            return str;
        };

        const csvLines = [
            headers.join(','),
            ...rows.map(row => {
                return headers.map(header => {
                    // 处理字段名映射（如 pH -> pH, ppH -> predict_pH）
                    let value = row[header];
                    if (value === undefined && header === 'predict_pH') {
                        value = row['ppH'];
                    } else if (value === undefined && header === 'predict_temperature') {
                        value = row['ptemperature'];
                    } else if (value === undefined && header === 'predict_o2') {
                        value = row['po2'];
                    }
                    return escapeCSVValue(value);
                }).join(',');
            })
        ];

        const csvContent = csvLines.join('\n');

        // 设置响应头
        res.setHeader('Content-Type', 'text/csv; charset=utf-8');
        res.setHeader('Content-Disposition', `attachment; filename="${type}.csv"`);
        res.send(csvContent);

    } catch (error) {
        console.error('Error downloading data:', error);
        res.status(500).json({
            success: false,
            message: 'Failed to download data',
            error: error.message
        });
    }
});

module.exports = router;