Files
labweb/utils/mediaModel.js
2025-12-16 11:39:15 +08:00

975 lines
43 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// mediaModel.js
const { pool } = require('../config/db'); // 假设你的db配置路径
// --- 内部辅助函数:构建 WHERE 子句 ---
// 这个函数被 browse, sunburst, sankey 多个接口复用
const buildWhereClause = (filters) => {
let whereConditions = [];
let queryParams = [];
const { search, ph, temperature, o2, taxonomy, taxonomyValue, microbial, cultured_type } = filters;
// 1. Cultured Type
if (cultured_type) {
whereConditions.push('m.cultured_type = ?');
queryParams.push(cultured_type);
}
// 2. Search (通用搜索)
if (search) {
whereConditions.push('(m.microbial_name LIKE ? OR m.taxonomy LIKE ?)');
queryParams.push(`%${search}%`, `%${search}%`);
}
// 3. pH Range (处理预测值逻辑)
if (ph) {
const phExpr = `TRIM(CASE
WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END)
ELSE NULL
END)`;
if (ph.includes('-')) {
const [min, max] = ph.split('-').map(Number);
whereConditions.push(`${phExpr} IS NOT NULL AND CAST(${phExpr} AS DECIMAL(4,2)) >= ? AND CAST(${phExpr} AS DECIMAL(4,2)) <= ?`);
queryParams.push(min, max);
} else {
const val = parseFloat(ph);
whereConditions.push(`${phExpr} IS NOT NULL AND CAST(${phExpr} AS DECIMAL(4,2)) >= ? AND CAST(${phExpr} AS DECIMAL(4,2)) <= ?`);
queryParams.push(val - 0.5, val + 0.5);
}
}
// 4. Temperature (处理预测值逻辑)
if (temperature) {
const tempExpr = `TRIM(CASE
WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END)
ELSE NULL
END)`;
whereConditions.push(`${tempExpr} = ?`);
queryParams.push(temperature);
}
// 5. O2 (处理预测值逻辑)
if (o2) {
const o2Expr = `TRIM(CASE
WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END)
ELSE NULL
END)`;
whereConditions.push(`${o2Expr} LIKE ?`);
queryParams.push(`%${o2}%`);
}
// 6. Taxonomy
if (taxonomy && taxonomy !== 'ALL' && taxonomy !== '') {
const validLevels = ['Bacteria', 'Domain', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'];
if (validLevels.includes(taxonomy)) {
let field = taxonomy === 'Bacteria' ? 'm.domain' : `m.${taxonomy.toLowerCase()}`;
// 特殊处理 Order 关键字
if (taxonomy.toLowerCase() === 'order') field = 'm.`order`';
if (taxonomyValue && taxonomyValue.trim() !== '') {
whereConditions.push(`${field} LIKE ?`);
queryParams.push(`%${taxonomyValue.trim()}%`);
} else {
whereConditions.push(`${field} IS NOT NULL AND ${field} != ''`);
}
}
}
// 7. Microbial Name
if (microbial && microbial.trim() !== '') {
whereConditions.push('m.microbial_name LIKE ?');
queryParams.push(`%${microbial.trim()}%`);
}
return {
clause: whereConditions.length > 0 ? `WHERE ${whereConditions.join(' AND ')}` : '',
params: queryParams
};
};
// --- 数据清洗辅助函数 ---
const extractTemperatureCategory = (str) => str ? str.split(',')[0].trim() : '';
const extractO2Category = (str) => str ? str.split(' ')[0] : '';
// --- 核心 Model 对象 ---
const mediaModel = {
// 1. 获取总数
async getTotalCount(filters) {
const { clause, params } = buildWhereClause(filters);
const query = `SELECT COUNT(DISTINCT m.species_dive_id) as total FROM cultured_data m ${clause}`;
const [result] = await pool.execute(query, params);
return result[0].total;
},
// 2. 获取浏览列表数据
async getBrowseData(filters, page, pageSize) {
const { clause, params } = buildWhereClause(filters);
const offset = (page - 1) * pageSize;
// 处理排序参数
const { sortBy, sortOrder } = filters;
let orderByClause = 'ORDER BY m.species_dive_id';
if (sortBy && sortOrder) {
// 定义允许排序的字段映射
const sortFieldMap = {
'microbial': 'm.microbial_name',
'nutrition': 'm.nutrition',
'domain': 'm.domain',
'ph': 'm.pH',
'temperature': 'm.temperature',
'o2': 'm.o2'
};
const sortField = sortFieldMap[sortBy.toLowerCase()];
if (sortField) {
const direction = sortOrder.toUpperCase() === 'DESC' ? 'DESC' : 'ASC';
orderByClause = `ORDER BY ${sortField} ${direction}`;
}
}
const query = `
SELECT
m.species_dive_id,
m.microbial_name,
m.nutrition,
m.domain as domain,
m.pH as pH,
m.temperature as temperature,
m.o2 as o2,
m.predict_pH as ppH_raw,
m.predict_temperature as ptemperature_raw,
m.predict_o2 as po2_raw,
-- Extract predicted value of predict_pH (before the last space)
CASE
WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
TRIM(SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END))
ELSE NULL
END as ppH,
-- Extract probability of predict_pH (after the last space)
CASE
WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' AND LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN
TRIM(SUBSTRING(m.predict_pH, LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) + 2))
ELSE NULL
END as ppH_probability,
-- Extract predicted value of predict_temperature (before the last space)
CASE
WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
TRIM(SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END))
ELSE NULL
END as ptemperature,
-- Extract probability of predict_temperature (after the last space)
CASE
WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' AND LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN
TRIM(SUBSTRING(m.predict_temperature, LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) + 2))
ELSE NULL
END as ptemperature_probability,
-- Extract predicted value of predict_o2 (before the last space)
CASE
WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
TRIM(SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END))
ELSE NULL
END as po2,
-- Extract probability of predict_o2 (after the last space)
CASE
WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' AND LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN
TRIM(SUBSTRING(m.predict_o2, LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) + 2))
ELSE NULL
END as po2_probability,
m.genome
FROM cultured_data m
${clause}
GROUP BY m.species_dive_id, m.microbial_name, m.nutrition, m.domain, m.pH, m.temperature, m.o2, m.predict_pH, m.predict_temperature, m.predict_o2, m.genome
${orderByClause}
LIMIT ${parseInt(pageSize)} OFFSET ${parseInt(offset)}
`;
const [rows] = await pool.execute(query, params);
// 格式化返回
return rows.map(row => ({
id: row.species_dive_id,
microbial_name: row.microbial_name || '',
nutrition: row.nutrition || '',
domain: row.domain || '',
pH: row.ppH ? row.ppH.toString() : (row.pH ? row.pH.toString() : ''),
pH_probability: row.ppH_probability || null,
temperature: row.ptemperature ? extractTemperatureCategory(row.ptemperature.split(',')[0].trim()) : (row.temperature ? extractTemperatureCategory(row.temperature.split(',')[0].trim()) : ''),
temperature_probability: row.ptemperature_probability || null,
o2: row.po2 ? extractO2Category(row.po2) : (row.o2 ? extractO2Category(row.o2) : ''),
o2_probability: row.po2_probability || null,
ppH: row.ppH || null,
ptemperature: row.ptemperature || null,
po2: row.po2 || null
}));
},
// 3. 微生物详情
async getMicrobialDetail(name, level = 'genus') {
if (!name) {
throw new Error('Microbial name cannot be empty');
}
console.log('Get microbial detail information:', name, 'Taxonomy Level:', level);
// 定义允许的层级
const allowedLevels = ['genus', 'family', 'order', 'class', 'phylum'];
const safeLevel = allowedLevels.includes(level.toLowerCase()) ? level.toLowerCase() : 'genus';
// 构建基础查询字段
const baseFields = `
m.species_dive_id,
m.microbial_name,
m.domain,
m.phylum,
m.class,
m.\`order\`,
m.family,
m.genus,
m.species,
m.taxonomy,
m.pH as pH,
m.temperature as temperature,
m.o2 as o2,
m.gram_stain,
m.synonym,
m.genome,
m.nutrition,
m.cultured_type,
TRIM(CASE
WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' THEN
SUBSTRING(m.predict_pH, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) ELSE LENGTH(m.predict_pH) END)
ELSE NULL
END) as ppH,
CASE
WHEN m.predict_pH IS NOT NULL AND m.predict_pH != '' AND LOCATE(' ', REVERSE(m.predict_pH)) > 0 THEN
TRIM(SUBSTRING(m.predict_pH, LENGTH(m.predict_pH) - LOCATE(' ', REVERSE(m.predict_pH)) + 2))
ELSE NULL
END as ppH_probability,
TRIM(CASE
WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' THEN
SUBSTRING(m.predict_temperature, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) ELSE LENGTH(m.predict_temperature) END)
ELSE NULL
END) as ptemperature,
CASE
WHEN m.predict_temperature IS NOT NULL AND m.predict_temperature != '' AND LOCATE(' ', REVERSE(m.predict_temperature)) > 0 THEN
TRIM(SUBSTRING(m.predict_temperature, LENGTH(m.predict_temperature) - LOCATE(' ', REVERSE(m.predict_temperature)) + 2))
ELSE NULL
END as ptemperature_probability,
TRIM(CASE
WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' THEN
SUBSTRING(m.predict_o2, 1, CASE WHEN LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) ELSE LENGTH(m.predict_o2) END)
ELSE NULL
END) as po2,
CASE
WHEN m.predict_o2 IS NOT NULL AND m.predict_o2 != '' AND LOCATE(' ', REVERSE(m.predict_o2)) > 0 THEN
TRIM(SUBSTRING(m.predict_o2, LENGTH(m.predict_o2) - LOCATE(' ', REVERSE(m.predict_o2)) + 2))
ELSE NULL
END as po2_probability
`;
// 首先尝试精确匹配
let detailQuery = `
SELECT ${baseFields}
FROM cultured_data m
WHERE m.microbial_name = ?
LIMIT 1
`;
console.log('Execute exact match query:', detailQuery, 'Parameters:', [name]);
let [rows] = await pool.execute(detailQuery, [name]);
console.log('Exact match query result:', rows.length, 'records');
// 如果精确匹配失败,尝试模糊匹配
if (rows.length === 0) {
detailQuery = `
SELECT ${baseFields}
FROM cultured_data m
WHERE m.microbial_name LIKE ?
LIMIT 1
`;
console.log('Execute fuzzy match query:', detailQuery, 'Parameters:', [`%${name}%`]);
[rows] = await pool.execute(detailQuery, [`%${name}%`]);
console.log('Fuzzy match query result:', rows.length, 'records');
}
if (rows.length === 0) {
return null; // 返回 null 表示未找到
}
const microbialData = rows[0];
console.log('Found microbial data:', microbialData.microbial_name, 'Genus:', microbialData.genus);
// 获取 nutrition 信息
let nutritionInfo = microbialData.nutrition || '';
// 查询相关微生物(基于指定的层级)
let relatedRows = [];
const levelValue = microbialData[safeLevel]; // 获取指定层级的值
if (levelValue) {
try {
// 处理 'order' 关键字
const columnRef = safeLevel === 'order' ? '`order`' : safeLevel;
const relatedQuery = `
SELECT ${baseFields}
FROM cultured_data m
WHERE m.${columnRef} = ? AND m.microbial_name != ?
ORDER BY m.microbial_name
`;
console.log('Execute related microbes query:', relatedQuery, 'Parameters:', [levelValue, microbialData.microbial_name]);
const [relatedResult] = await pool.execute(relatedQuery, [levelValue, microbialData.microbial_name]);
relatedRows = relatedResult;
console.log('Related microbes query result:', relatedRows.length, 'records');
} catch (relatedError) {
console.error('Related microbes query failed:', relatedError);
// 继续执行,使用空数组
}
}
// 构建并返回格式化数据
const responseData = {
...microbialData,
// 保存原始值
pH: microbialData.pH,
temperature: microbialData.temperature,
o2: microbialData.o2,
// 保存预测值(如果存在)
ppH: microbialData.ppH || null,
ptemperature: microbialData.ptemperature || null,
po2: microbialData.po2 || null,
// 保存预测概率(如果存在)
ppH_probability: microbialData.ppH_probability || null,
ptemperature_probability: microbialData.ptemperature_probability || null,
po2_probability: microbialData.po2_probability || null,
nutrition: nutritionInfo,
culture_type: microbialData.cultured_type,
related_microbes: relatedRows.map(row => ({
...row,
// 保存原始值
pH: row.pH,
temperature: row.temperature,
o2: row.o2,
// 保存预测值(如果存在)
ppH: row.ppH || null,
ptemperature: row.ptemperature || null,
po2: row.po2 || null,
// 保存预测概率(如果存在)
ppH_probability: row.ppH_probability || null,
ptemperature_probability: row.ptemperature_probability || null,
po2_probability: row.po2_probability || null
}))
};
console.log('Return data:', {
microbial_name: responseData.microbial_name,
level: safeLevel,
levelValue: levelValue,
related_count: responseData.related_microbes.length
});
return responseData;
},
// 4. 获取 Sankey 数据
async getSankeyData(filters) {
console.log('开始构建6层桑基图数据...');
const { clause, params } = buildWhereClause(filters);
try {
// 查询完整的6层分类层级数据
const hierarchyQuery = `
SELECT
m.domain,
m.phylum,
m.class,
m.\`order\`,
m.family,
m.genus,
COUNT(DISTINCT m.species_dive_id) as species_count
FROM cultured_data m
${clause ? clause + ' AND' : 'WHERE'}
m.domain IS NOT NULL AND m.domain != '' AND
m.phylum IS NOT NULL AND m.phylum != '' AND
m.class IS NOT NULL AND m.class != '' AND
m.\`order\` IS NOT NULL AND m.\`order\` != '' AND
m.family IS NOT NULL AND m.family != '' AND
m.genus IS NOT NULL AND m.genus != ''
GROUP BY m.domain, m.phylum, m.class, m.\`order\`, m.family, m.genus
ORDER BY species_count DESC
`;
console.log('执行6层分类查询:', hierarchyQuery);
const [hierarchyRows] = await pool.execute(hierarchyQuery, params);
console.log('查询结果:', hierarchyRows.length, '条记录');
if (hierarchyRows.length === 0) {
console.log('没有找到数据,返回空桑基图');
return { nodes: [], links: [] };
}
// 定义层级顺序
const levelOrder = ['domain', 'phylum', 'class', 'order', 'family', 'genus'];
// 构建每个层级的分类统计
const levelCounts = new Map(); // 存储每个分类的物种总数
const parentChildMap = new Map(); // 存储父子关系映射
// 处理每条完整的分类路径
for (const row of hierarchyRows) {
const speciesCount = parseInt(row.species_count);
// 为每个层级的分类累加物种数量
for (let i = 0; i < levelOrder.length; i++) {
const level = levelOrder[i];
const categoryName = row[level];
if (categoryName) {
const key = `${level}:${categoryName}`;
levelCounts.set(key, (levelCounts.get(key) || 0) + speciesCount);
// 建立父子关系
if (i > 0) {
const parentLevel = levelOrder[i - 1];
const parentName = row[parentLevel];
if (parentName) {
const parentKey = `${parentLevel}:${parentName}`;
const childKey = `${level}:${categoryName}`;
if (!parentChildMap.has(parentKey)) {
parentChildMap.set(parentKey, new Set());
}
parentChildMap.get(parentKey).add(childKey);
}
}
}
}
}
console.log('层级统计完成:', {
levelCountsSize: levelCounts.size,
parentChildMapSize: parentChildMap.size
});
// 构建节点 - 每层最多10个节点确保节点名称唯一
const nodes = [];
const nodeMap = new Map();
let nodeIndex = 0;
// 计算总物种数量
const totalSpeciesCount = hierarchyRows.reduce((sum, row) => sum + parseInt(row.species_count), 0);
// 添加Total节点
nodes.push({
name: "Total",
level: "total",
levelName: "总计",
depth: 0, // Total节点在第0层
itemStyle: {
color: '#5470c6'
}
});
nodeMap.set("Total", nodeIndex++);
// 为每个层级选择前10个最多的分类
const selectedCategories = new Map(); // 存储每层选中的分类
// 颜色方案和层级中文名称
const levelColors = {
'domain': '#91cc75',
'phylum': '#fac858',
'class': '#ee6666',
'order': '#73c0de',
'family': '#3ba272',
'genus': '#fc8452'
};
const levelNames = {
'domain': 'domain',
'phylum': 'phylum',
'class': 'class',
'order': 'order',
'family': 'family',
'genus': 'genus'
};
// 层级深度映射
const levelDepth = {
'domain': 1,
'phylum': 2,
'class': 3,
'order': 4,
'family': 5,
'genus': 6
};
for (const level of levelOrder) {
// 获取当前层级的所有分类,按物种数量排序
const levelCategories = [];
for (const [key, count] of levelCounts) {
if (key.startsWith(`${level}:`)) {
const categoryName = key.substring(level.length + 1);
levelCategories.push({ name: categoryName, count: count, key: key });
}
}
// 按物种数量降序排序,根据层级决定选择数量
levelCategories.sort((a, b) => b.count - a.count);
// family和genus层级选择前20个其他层级选择前10个
const maxNodes = (level === 'family' || level === 'genus') ? 20 : 10;
const topCategories = levelCategories.slice(0, maxNodes);
console.log(`${level}层级: 总共${levelCategories.length}个分类,选择前${topCategories.length}`);
// 存储选中的分类
const selectedKeys = new Set();
for (const category of topCategories) {
selectedKeys.add(category.key);
// 为节点添加层级信息包括depth属性
nodes.push({
name: category.name,
level: level,
levelName: levelNames[level],
depth: levelDepth[level], // 添加depth属性指定层级
count: category.count,
itemStyle: {
color: levelColors[level] || '#5470c6'
}
});
nodeMap.set(category.key, nodeIndex++);
}
selectedCategories.set(level, selectedKeys);
}
console.log('节点构建完成:', {
totalNodes: nodes.length,
nodesByLevel: Object.fromEntries(
levelOrder.map(level => [level, selectedCategories.get(level).size])
)
});
// 对每个depth层级的节点value进行归一化
// 先计算每个节点的初始value基于count再按层级归一化
const BASE_VALUE = 1000; // 每层的基准总值
const depthGroups = {}; // 按depth分组节点
// 将节点按depth分组并初始化value
for (let i = 0; i < nodes.length; i++) {
const node = nodes[i];
const depth = node.depth;
if (!depthGroups[depth]) {
depthGroups[depth] = [];
}
depthGroups[depth].push(i); // 存储节点索引
// 初始化value为count
nodes[i].value = nodes[i].count || 0;
}
// 设置Total节点的value为BASE_VALUE
nodes[0].value = BASE_VALUE;
// 构建连接 - 确保连接完整性
const links = [];
// 1. Total到domain层的连接使用比例关系
let domainTotalCount = 0;
const domainNodes = [];
for (const [key, count] of levelCounts) {
if (key.startsWith('domain:') && nodeMap.has(key)) {
const targetIndex = nodeMap.get(key);
domainNodes.push({ index: targetIndex, count: count });
domainTotalCount += count;
}
}
// 按比例从Total分配到domain
for (const domainNode of domainNodes) {
const ratio = domainNode.count / domainTotalCount;
links.push({
source: 0, // Total的索引
target: domainNode.index,
value: BASE_VALUE * ratio
});
}
// 2. 相邻层级间的连接
for (let i = 0; i < levelOrder.length - 1; i++) {
const currentLevel = levelOrder[i];
const nextLevel = levelOrder[i + 1];
const currentSelected = selectedCategories.get(currentLevel);
const nextSelected = selectedCategories.get(nextLevel);
// 为每个父子关系创建连接
for (const [parentKey, children] of parentChildMap) {
if (parentKey.startsWith(`${currentLevel}:`) &&
currentSelected.has(parentKey) &&
nodeMap.has(parentKey)) {
const parentIndex = nodeMap.get(parentKey);
// 计算该父节点下所有选中子节点的count总和
let childrenTotalCount = 0;
const validChildren = [];
for (const childKey of children) {
if (childKey.startsWith(`${nextLevel}:`) &&
nextSelected.has(childKey) &&
nodeMap.has(childKey)) {
const childCount = levelCounts.get(childKey) || 0;
childrenTotalCount += childCount;
validChildren.push({
key: childKey,
count: childCount,
index: nodeMap.get(childKey)
});
}
}
// 存储父节点的流出比例信息
for (const child of validChildren) {
const ratio = childrenTotalCount > 0 ? child.count / childrenTotalCount : 0;
links.push({
source: parentIndex,
target: child.index,
ratio: ratio, // 暂存比例后续计算value
childCount: child.count
});
}
}
}
}
console.log('连接关系构建完成:', links.length);
// 3. 从Total开始逐层计算value
// 先计算每个节点的流入value
const nodeInflow = new Array(nodes.length).fill(0);
// 第一层Total -> domain
for (const link of links) {
if (link.source === 0 && link.value !== undefined) {
nodeInflow[link.target] += link.value;
}
}
// 更新domain节点的value
for (const domainNode of domainNodes) {
nodes[domainNode.index].value = nodeInflow[domainNode.index];
}
// 后续层级根据父节点的value和比例计算连接value
for (const link of links) {
if (link.source !== 0 && link.ratio !== undefined) {
const parentValue = nodes[link.source].value;
link.value = parentValue * link.ratio;
nodeInflow[link.target] += link.value;
// 更新目标节点的value
nodes[link.target].value = nodeInflow[link.target];
}
}
// 清理ratio字段
for (const link of links) {
delete link.ratio;
delete link.childCount;
}
// 4. 对每层进行归一化调整
for (const depth in depthGroups) {
if (depth === '0') continue; // 跳过Total
const nodeIndices = depthGroups[depth];
let currentTotal = 0;
for (const idx of nodeIndices) {
currentTotal += nodes[idx].value || 0;
}
// 归一化系数
const normalizationFactor = currentTotal > 0 ? BASE_VALUE / currentTotal : 1;
// 调整该层所有节点的value
for (const idx of nodeIndices) {
nodes[idx].value = (nodes[idx].value || 0) * normalizationFactor;
}
// 同时调整流入该层节点的连接value
for (const link of links) {
if (nodeIndices.includes(link.target)) {
link.value = (link.value || 0) * normalizationFactor;
}
}
console.log(`Depth ${depth} 归一化: 原始total=${currentTotal.toFixed(2)}, 归一化后=${BASE_VALUE}, 系数=${normalizationFactor.toFixed(4)}`);
}
// 验证每层的总value
const depthTotals = {};
for (const node of nodes) {
const depth = node.depth;
if (!depthTotals[depth]) {
depthTotals[depth] = 0;
}
depthTotals[depth] += node.value;
}
console.log('最终每层节点value总和:', depthTotals);
// 验证数据完整性
const invalidLinks = links.filter(link =>
link.source === undefined ||
link.target === undefined ||
link.value === undefined ||
isNaN(link.source) ||
isNaN(link.target) ||
isNaN(link.value) ||
link.source >= nodes.length ||
link.target >= nodes.length ||
link.source < 0 ||
link.target < 0
);
if (invalidLinks.length > 0) {
console.error('发现无效连接:', invalidLinks);
// 过滤掉无效连接
const validLinks = links.filter(link =>
link.source !== undefined &&
link.target !== undefined &&
link.value !== undefined &&
!isNaN(link.source) &&
!isNaN(link.target) &&
!isNaN(link.value) &&
link.source >= 0 &&
link.target >= 0 &&
link.source < nodes.length &&
link.target < nodes.length
);
console.log('过滤后的有效连接数量:', validLinks.length);
return {
nodes: nodes,
links: validLinks
};
}
console.log('6层桑基图数据构建完成:', {
nodesCount: nodes.length,
linksCount: links.length,
totalSpeciesCount: totalSpeciesCount,
sampleNodes: nodes.slice(0, 3).map((n, i) => ({index: i, ...n})),
sampleLinks: links.slice(0, 3)
});
// 详细验证数据结构
console.log('=== Data validation ===');
console.log('All nodes:', nodes.map((n, i) =>
`[${i}] ${n.name} (${n.levelName || n.level}) - counts:${n.count || 'N/A'}`
).slice(0, 20).join(', ') + (nodes.length > 20 ? ` ... ${nodes.length}nodes in total` : ''));
console.log('Top 10 connections:', links.slice(0, 10).map(l => {
const source = nodes[l.source];
const target = nodes[l.target];
return `${source?.name}(${source?.levelName})[${l.source}] -> ${target?.name}(${target?.levelName})[${l.target}] (${(l.value * 100).toFixed(2)}%)`;
}));
// 检查是否有重复的节点名称
const nodeNames = nodes.map(n => n.name);
const duplicates = nodeNames.filter((name, index) => nodeNames.indexOf(name) !== index);
if (duplicates.length > 0) {
console.warn('Warning: Duplicate node names detected:', [...new Set(duplicates)]);
}
return {
nodes: nodes,
links: links
};
} catch (error) {
console.error('An error occurred while constructing the Sankey chart data:', error);
return { nodes: [], links: [] };
}
},
// 5. 统计数据 (Taxonomy)
async getTaxonomyStats(level, cultured_type) {
// 先获取总数
const countQuery = `
SELECT COUNT(DISTINCT \`${level}\`) as total_count
FROM cultured_data m
WHERE \`${level}\` IS NOT NULL AND \`${level}\` != '' AND m.cultured_type = ?
`;
const [countResult] = await pool.execute(countQuery, [cultured_type]);
const total = countResult[0].total_count;
// 获取详细数据
const query = `
SELECT \`${level}\` as name, COUNT(DISTINCT m.species_dive_id) as value
FROM cultured_data m
WHERE \`${level}\` IS NOT NULL AND \`${level}\` != '' AND m.cultured_type = ?
GROUP BY \`${level}\`
ORDER BY value DESC
`;
const [rows] = await pool.execute(query, [cultured_type]);
const data = rows.map(r => ({ name: r.name, value: parseInt(r.value) }));
return { data, total };
},
// 6. 营养统计 (包含复杂的 Python List 解析)
async getNutritionStats(cultured_type) {
const query = `SELECT m.nutrition, COUNT(DISTINCT m.species_dive_id) as count FROM cultured_data m WHERE m.nutrition IS NOT NULL AND m.nutrition != '' AND m.cultured_type = ? GROUP BY m.nutrition`;
const [rows] = await pool.execute(query, [cultured_type]);
const frequency = {};
let totalSpecies = 0;
rows.forEach(row => {
totalSpecies += parseInt(row.count);
let str = row.nutrition.trim().replace(/^\[|\]$/g, '');
if(str) {
const list = str.split(',').map(i => i.trim().replace(/^['"]|['"]$/g, '')).filter(i => i);
list.forEach(nut => {
frequency[nut] = (frequency[nut] || 0) + parseInt(row.count);
});
}
});
const data = Object.entries(frequency)
.map(([name, value]) => ({ name, value }))
.sort((a, b) => b.value - a.value);
return { data, total: data.length, totalSpecies };
},
// 7. 理化性质统计
async getPhyschemStats(type, cultured_type) {
let data = {};
if (type === 'o2') {
// O2 类型统计
const query = `
SELECT
CASE
WHEN m.o2 IS NOT NULL AND m.o2 != '' THEN
SUBSTRING(m.o2, 1, CASE WHEN LOCATE(' ', m.o2) > 0 THEN LOCATE(' ', m.o2) - 1 ELSE LENGTH(m.o2) END)
ELSE NULL
END as name,
COUNT(DISTINCT m.species_dive_id) as count
FROM cultured_data m
WHERE m.o2 IS NOT NULL AND m.o2 != '' AND m.cultured_type = ?
GROUP BY
CASE
WHEN m.o2 IS NOT NULL AND m.o2 != '' THEN
SUBSTRING(m.o2, 1, CASE WHEN LOCATE(' ', m.o2) > 0 THEN LOCATE(' ', m.o2) - 1 ELSE LENGTH(m.o2) END)
ELSE NULL
END
ORDER BY count DESC
`;
const [rows] = await pool.execute(query, [cultured_type]);
const total = rows.reduce((sum, row) => sum + parseInt(row.count || 0), 0);
data = {
aerobe: 0,
anaerobe: 0,
facultative: 0,
total: total
};
rows.forEach(row => {
if (!row.name) return; // 跳过空值
const name = row.name.toLowerCase();
const count = parseInt(row.count || 0);
if (name.includes('aerobe') && !name.includes('anaerobe')) {
data.aerobe += count;
} else if (name.includes('anaerobe')) {
data.anaerobe += count;
} else if (name.includes('facultative')) {
data.facultative += count;
}
});
} else if (type === 'ph') {
// pH 分布统计
const query = `
SELECT
CASE
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 1 AND 2 THEN 1
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 2 AND 3 THEN 2
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 3 AND 4 THEN 3
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 4 AND 5 THEN 4
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 5 AND 6 THEN 5
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 6 AND 7 THEN 6
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 7 AND 8 THEN 7
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 8 AND 9 THEN 8
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 9 AND 10 THEN 9
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 10 AND 11 THEN 10
WHEN CAST(m.pH AS DECIMAL(4,2)) BETWEEN 11 AND 12 THEN 11
ELSE 12
END as ph_range,
COUNT(DISTINCT m.species_dive_id) as count
FROM cultured_data m
WHERE m.pH IS NOT NULL AND m.pH != '' AND m.pH != 'Unknown' AND m.cultured_type = ?
GROUP BY ph_range
ORDER BY ph_range
`;
const [rows] = await pool.execute(query, [cultured_type]);
data = rows.map(row => ({
x: parseInt(row.ph_range),
count: parseInt(row.count)
}));
} else if (type === 'temperature') {
// 温度类别分布统计
const query = `
SELECT
CASE
WHEN m.temperature IS NOT NULL AND m.temperature != '' THEN
SUBSTRING(m.temperature, 1, CASE WHEN LOCATE(' ', m.temperature) > 0 THEN LOCATE(' ', m.temperature) - 1 ELSE LENGTH(m.temperature) END)
ELSE NULL
END as temp_category,
COUNT(DISTINCT m.species_dive_id) as count
FROM cultured_data m
WHERE m.temperature IS NOT NULL AND m.temperature != '' AND m.temperature != 'Unknown' AND m.cultured_type = ?
GROUP BY
CASE
WHEN m.temperature IS NOT NULL AND m.temperature != '' THEN
SUBSTRING(m.temperature, 1, CASE WHEN LOCATE(' ', m.temperature) > 0 THEN LOCATE(' ', m.temperature) - 1 ELSE LENGTH(m.temperature) END)
ELSE NULL
END
ORDER BY count DESC
`;
const [rows] = await pool.execute(query, [cultured_type]);
data = rows.map(row => ({
x: row.temp_category || '',
count: parseInt(row.count || 0)
}));
}
return data;
},
// 8. 健康检查
async checkHealth() {
const conn = await pool.getConnection();
await conn.ping();
conn.release();
return true;
}
};
module.exports = mediaModel;