194 lines
5.8 KiB
Python
194 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script to verify the pixi environment and dependencies are working correctly.
|
|
"""
|
|
|
|
def test_imports():
|
|
"""Test that all required packages can be imported."""
|
|
print("Testing package imports...")
|
|
|
|
try:
|
|
import rdkit
|
|
from rdkit import Chem
|
|
from rdkit.Chem import SDMolSupplier, AllChem
|
|
print("✅ RDKit imported successfully")
|
|
print(f" RDKit version: {rdkit.__version__}")
|
|
except ImportError as e:
|
|
print(f"❌ RDKit import failed: {e}")
|
|
return False
|
|
|
|
try:
|
|
import joblib
|
|
print("✅ Joblib imported successfully")
|
|
print(f" Joblib version: {joblib.__version__}")
|
|
except ImportError as e:
|
|
print(f"❌ Joblib import failed: {e}")
|
|
return False
|
|
|
|
try:
|
|
import pandas
|
|
print("✅ Pandas imported successfully")
|
|
print(f" Pandas version: {pandas.__version__}")
|
|
except ImportError as e:
|
|
print(f"❌ Pandas import failed: {e}")
|
|
return False
|
|
|
|
try:
|
|
import tqdm
|
|
print("✅ TQDM imported successfully")
|
|
print(f" TQDM version: {tqdm.__version__}")
|
|
except ImportError as e:
|
|
print(f"❌ TQDM import failed: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
def test_rdkit_functionality():
|
|
"""Test basic RDKit functionality."""
|
|
print("\nTesting RDKit functionality...")
|
|
|
|
try:
|
|
from rdkit import Chem
|
|
from rdkit.Chem import SDMolSupplier
|
|
|
|
# Test SMILES parsing
|
|
mol = Chem.MolFromSmiles('CCO')
|
|
if mol is not None:
|
|
print("✅ SMILES parsing works")
|
|
else:
|
|
print("❌ SMILES parsing failed")
|
|
return False
|
|
|
|
# Test SMARTS matching
|
|
pattern = Chem.MolFromSmarts('CCO')
|
|
if mol.HasSubstructMatch(pattern):
|
|
print("✅ SMARTS matching works")
|
|
else:
|
|
print("❌ SMARTS matching failed")
|
|
return False
|
|
|
|
# Test substructure matching with common patterns
|
|
benzene = Chem.MolFromSmiles('c1ccccc1')
|
|
benzene_pattern = Chem.MolFromSmarts('c1ccccc1')
|
|
if benzene.HasSubstructMatch(benzene_pattern):
|
|
print("✅ Benzene pattern matching works")
|
|
else:
|
|
print("❌ Benzene pattern matching failed")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ RDKit functionality test failed: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
def test_multiprocessing():
|
|
"""Test multiprocessing capabilities."""
|
|
print("\nTesting multiprocessing...")
|
|
|
|
try:
|
|
import multiprocessing
|
|
from joblib import Parallel, delayed
|
|
|
|
# Test basic parallel processing
|
|
def square(x):
|
|
return x * x
|
|
|
|
results = Parallel(n_jobs=2)(delayed(square)(i) for i in range(5))
|
|
expected = [0, 1, 4, 9, 16]
|
|
|
|
if results == expected:
|
|
print("✅ Joblib parallel processing works")
|
|
else:
|
|
print(f"❌ Joblib parallel processing failed: got {results}, expected {expected}")
|
|
return False
|
|
|
|
# Check CPU count
|
|
cpu_count = multiprocessing.cpu_count()
|
|
print(f"✅ Available CPU cores: {cpu_count}")
|
|
|
|
if cpu_count >= 220:
|
|
print("✅ Sufficient CPU cores for 220 processes")
|
|
else:
|
|
print(f"⚠️ Only {cpu_count} CPU cores available, consider reducing N_PROCESSES")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Multiprocessing test failed: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
def test_file_operations():
|
|
"""Test file operations and paths."""
|
|
print("\nTesting file operations...")
|
|
|
|
try:
|
|
from pathlib import Path
|
|
import pandas as pd
|
|
|
|
# Test path operations
|
|
data_dir = Path('./data')
|
|
if data_dir.exists():
|
|
print("✅ Data directory found")
|
|
else:
|
|
print("⚠️ Data directory not found (will be created)")
|
|
|
|
notebooks_dir = Path('./notebooks')
|
|
if notebooks_dir.exists():
|
|
print("✅ Notebooks directory found")
|
|
|
|
# Check if notebooks exist
|
|
extract_nb = notebooks_dir / '01_extract_sdf_files.ipynb'
|
|
matching_nb = notebooks_dir / '02_rdkit_substructure_matching.ipynb'
|
|
|
|
if extract_nb.exists():
|
|
print("✅ Extraction notebook found")
|
|
else:
|
|
print("❌ Extraction notebook not found")
|
|
return False
|
|
|
|
if matching_nb.exists():
|
|
print("✅ Matching notebook found")
|
|
else:
|
|
print("❌ Matching notebook not found")
|
|
return False
|
|
else:
|
|
print("❌ Notebooks directory not found")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ File operations test failed: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
print("🧪 Testing pixi environment setup...\n")
|
|
|
|
all_tests_passed = True
|
|
|
|
# Run all tests
|
|
tests = [
|
|
test_imports,
|
|
test_rdkit_functionality,
|
|
test_multiprocessing,
|
|
test_file_operations
|
|
]
|
|
|
|
for test in tests:
|
|
if not test():
|
|
all_tests_passed = False
|
|
|
|
print("\n" + "="*50)
|
|
if all_tests_passed:
|
|
print("🎉 All tests passed! Environment is ready for use.")
|
|
print("\nNext steps:")
|
|
print("1. Run: pixi shell")
|
|
print("2. Start jupyter: jupyter notebook notebooks/")
|
|
print("3. Open 01_extract_sdf_files.ipynb to begin extraction")
|
|
else:
|
|
print("❌ Some tests failed. Please check the errors above.")
|
|
print("Try running: pixi install")
|
|
|
|
print("="*50)
|