feat: 添加CSV文件比较可视化功能和pixi配置更新

主要改动:
1. 新增CSV文件比较可视化功能:
   - 添加了src/visualization/comparison.py模块,支持比较两个CSV文件并使用不同颜色可视化
   - 支持命令行和API两种使用方式
   - 可生成静态图像或启动交互式查看器
   - 支持自定义标签、模型和UMAP参数

2. 更新pixi.toml配置:
   - 添加linux-64平台支持
   - 增加多个依赖项:ipykernel, anywidget, rdkit, selfies, fastapi, fastmcp, docker等
   - 完善依赖版本约束

3. 更新README.md文档:
   - 添加CSV文件比较可视化功能说明和使用示例
This commit is contained in:
2025-10-23 19:27:13 +08:00
parent deecbfe0fc
commit 60c5ce152b
3 changed files with 99 additions and 8 deletions

View File

@@ -12,7 +12,7 @@ import os
from typing import Optional, List, Dict, Any
import numpy as np
def launch_interactive_viewer(df: pd.DataFrame, text_column: str, port: int = 5055, host: str = "localhost"):
def launch_interactive_viewer(df: pd.DataFrame, text_column: str, port: int = 5055, host: str = "0.0.0.0"):
"""使用Python API启动交互式服务器"""
try:
from embedding_atlas.server import make_server
@@ -66,7 +66,7 @@ def create_embedding_service(
texts2: List[str],
labels: tuple = ("Group1", "Group2"),
port: int = 5055,
host: str = "localhost",
host: str = "0.0.0.0",
text_column: str = "text",
model: str = "all-MiniLM-L6-v2",
batch_size: int = 32,
@@ -162,7 +162,7 @@ def visualize_csv_comparison(
label2: Optional[str] = None,
launch_interactive: bool = False,
port: int = 5055,
host: str = "localhost",
host: str = "0.0.0.0",
model: str = "all-MiniLM-L6-v2",
batch_size: int = 32,
umap_args: Optional[Dict[str, Any]] = None
@@ -180,7 +180,7 @@ def visualize_csv_comparison(
label2: Label for the second dataset (default: filename)
launch_interactive: Whether to launch interactive viewer (default: False)
port: Port for interactive viewer (default: 5055)
host: Host for interactive viewer (default: "localhost")
host: Host for interactive viewer (default: "0.0.0.0")
model: Embedding model to use (default: "all-MiniLM-L6-v2")
batch_size: Batch size for embedding computation (default: 32)
umap_args: UMAP arguments as dictionary (default: None)
@@ -333,7 +333,7 @@ Examples:
parser.add_argument("--label2", help="Label for the second dataset (default: filename)")
parser.add_argument("--interactive", "-i", action="store_true", help="Launch interactive viewer")
parser.add_argument("--port", "-p", type=int, default=5055, help="Port for interactive viewer (default: 5055)")
parser.add_argument("--host", default="localhost", help="Host for interactive viewer (default: localhost)")
parser.add_argument("--host", default="0.0.0.0", help="Host for interactive viewer (default: 0.0.0.0)")
parser.add_argument("--model", default="all-MiniLM-L6-v2", help="Embedding model to use (default: all-MiniLM-L6-v2)")
parser.add_argument("--batch-size", type=int, default=32, help="Batch size for embedding computation (default: 32)")