search_macro/test_environment.py

#!/usr/bin/env python3
"""
Test script to verify the pixi environment and dependencies are working correctly.
"""

def test_imports():
    """Test that all required packages can be imported."""
    print("Testing package imports...")

    try:
        import rdkit
        from rdkit import Chem
        from rdkit.Chem import SDMolSupplier, AllChem
        print("✅ RDKit imported successfully")
        print(f"   RDKit version: {rdkit.__version__}")
    except ImportError as e:
        print(f"❌ RDKit import failed: {e}")
        return False

    try:
        import joblib
        print("✅ Joblib imported successfully")
        print(f"   Joblib version: {joblib.__version__}")
    except ImportError as e:
        print(f"❌ Joblib import failed: {e}")
        return False

    try:
        import pandas
        print("✅ Pandas imported successfully")
        print(f"   Pandas version: {pandas.__version__}")
    except ImportError as e:
        print(f"❌ Pandas import failed: {e}")
        return False

    try:
        import tqdm
        print("✅ TQDM imported successfully")
        print(f"   TQDM version: {tqdm.__version__}")
    except ImportError as e:
        print(f"❌ TQDM import failed: {e}")
        return False

    return True

def test_rdkit_functionality():
    """Test basic RDKit functionality."""
    print("\nTesting RDKit functionality...")

    try:
        from rdkit import Chem
        from rdkit.Chem import SDMolSupplier

        # Test SMILES parsing
        mol = Chem.MolFromSmiles('CCO')
        if mol is not None:
            print("✅ SMILES parsing works")
        else:
            print("❌ SMILES parsing failed")
            return False

        # Test SMARTS matching
        pattern = Chem.MolFromSmarts('CCO')
        if mol.HasSubstructMatch(pattern):
            print("✅ SMARTS matching works")
        else:
            print("❌ SMARTS matching failed")
            return False

        # Test substructure matching with common patterns
        benzene = Chem.MolFromSmiles('c1ccccc1')
        benzene_pattern = Chem.MolFromSmarts('c1ccccc1')
        if benzene.HasSubstructMatch(benzene_pattern):
            print("✅ Benzene pattern matching works")
        else:
            print("❌ Benzene pattern matching failed")
            return False

    except Exception as e:
        print(f"❌ RDKit functionality test failed: {e}")
        return False

    return True

def test_multiprocessing():
    """Test multiprocessing capabilities."""
    print("\nTesting multiprocessing...")

    try:
        import multiprocessing
        from joblib import Parallel, delayed

        # Test basic parallel processing
        def square(x):
            return x * x

        results = Parallel(n_jobs=2)(delayed(square)(i) for i in range(5))
        expected = [0, 1, 4, 9, 16]

        if results == expected:
            print("✅ Joblib parallel processing works")
        else:
            print(f"❌ Joblib parallel processing failed: got {results}, expected {expected}")
            return False

        # Check CPU count
        cpu_count = multiprocessing.cpu_count()
        print(f"✅ Available CPU cores: {cpu_count}")

        if cpu_count >= 220:
            print("✅ Sufficient CPU cores for 220 processes")
        else:
            print(f"⚠️  Only {cpu_count} CPU cores available, consider reducing N_PROCESSES")

    except Exception as e:
        print(f"❌ Multiprocessing test failed: {e}")
        return False

    return True

def test_file_operations():
    """Test file operations and paths."""
    print("\nTesting file operations...")

    try:
        from pathlib import Path
        import pandas as pd

        # Test path operations
        data_dir = Path('./data')
        if data_dir.exists():
            print("✅ Data directory found")
        else:
            print("⚠️  Data directory not found (will be created)")

        notebooks_dir = Path('./notebooks')
        if notebooks_dir.exists():
            print("✅ Notebooks directory found")

            # Check if notebooks exist
            extract_nb = notebooks_dir / '01_extract_sdf_files.ipynb'
            matching_nb = notebooks_dir / '02_rdkit_substructure_matching.ipynb'

            if extract_nb.exists():
                print("✅ Extraction notebook found")
            else:
                print("❌ Extraction notebook not found")
                return False

            if matching_nb.exists():
                print("✅ Matching notebook found")
            else:
                print("❌ Matching notebook not found")
                return False
        else:
            print("❌ Notebooks directory not found")
            return False

    except Exception as e:
        print(f"❌ File operations test failed: {e}")
        return False

    return True

if __name__ == "__main__":
    print("🧪 Testing pixi environment setup...\n")

    all_tests_passed = True

    # Run all tests
    tests = [
        test_imports,
        test_rdkit_functionality,
        test_multiprocessing,
        test_file_operations
    ]

    for test in tests:
        if not test():
            all_tests_passed = False

    print("\n" + "="*50)
    if all_tests_passed:
        print("🎉 All tests passed! Environment is ready for use.")
        print("\nNext steps:")
        print("1. Run: pixi shell")
        print("2. Start jupyter: jupyter notebook notebooks/")
        print("3. Open 01_extract_sdf_files.ipynb to begin extraction")
    else:
        print("❌ Some tests failed. Please check the errors above.")
        print("Try running: pixi install")

    print("="*50)