From 991bcc491fb6089cc80719f026e79da13748d72b Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Thu, 23 Oct 2025 18:00:10 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dinteractive=20viewer=E4=B8=AD?= =?UTF-8?q?=E7=9A=84metadata=E9=85=8D=E7=BD=AE=E5=92=8C=E5=88=97=E5=90=8D?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 在metadata中添加database.load配置,确保前端能正确加载数据到DuckDB 2. 将neighbors列名从'neighbors'改为'__neighbors'以符合embedding-atlas标准 3. 更新launch_interactive_viewer和visualize_csv_comparison函数中的相关配置 --- src/visualization/comparison.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/visualization/comparison.py b/src/visualization/comparison.py index 03ef9b3..0182a15 100644 --- a/src/visualization/comparison.py +++ b/src/visualization/comparison.py @@ -20,13 +20,17 @@ def launch_interactive_viewer(df: pd.DataFrame, text_column: str, port: int = 50 from embedding_atlas.utils import Hasher import pathlib - # 创建metadata + # 创建metadata - 添加database配置 metadata = { "columns": { "id": "_row_index", "text": text_column, "embedding": {"x": "projection_x", "y": "projection_y"}, - "neighbors": "neighbors" + "neighbors": "__neighbors" # 使用双下划线 + }, + "database": { + "type": "wasm", + "load": True # 关键: 告诉前端加载数据 } } @@ -109,7 +113,7 @@ def create_embedding_service( text=text_column, x="projection_x", y="projection_y", - neighbors="neighbors", + neighbors="__neighbors", # 改为双下划线 model=model, batch_size=batch_size, umap_args=umap_args @@ -234,7 +238,7 @@ def visualize_csv_comparison( text=text_column, x="projection_x", y="projection_y", - neighbors="neighbors", + neighbors="__neighbors", # 改为双下划线 model=model, batch_size=batch_size, umap_args=umap_args @@ -298,11 +302,11 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - python visualize_csv_comparison.py file1.csv file2.csv - python visualize_csv_comparison.py file1.csv file2.csv --column1 smiles --column2 SMILES - python visualize_csv_comparison.py file1.csv file2.csv --label1 "Dataset A" --label2 "Dataset B" - python visualize_csv_comparison.py file1.csv file2.csv --output comparison.png - python visualize_csv_comparison.py file1.csv file2.csv --interactive --port 8080 + python -m visualization.comparison file1.csv file2.csv + python -m visualization.comparison file1.csv file2.csv --column1 smiles --column2 SMILES + python -m visualization.comparison file1.csv file2.csv --label1 "Dataset A" --label2 "Dataset B" + python -m visualization.comparison file1.csv file2.csv --output comparison.png + python -m visualization.comparison file1.csv file2.csv --interactive --port 8080 """ )