from pathlib import Path import subprocess import os def generate_gmx_top_files(directory, num_cores, forcefield="amber99sb-ildn", water_model="tip3p"): """ Generate GROMACS topology (.top) files for each PDB file in the specified directory by running the 'gmx_mpi pdb2gmx' command in batch, with the working directory set to the specified directory. Args: directory (str): Directory containing PDB files. num_cores (int): Number of cores to use for mpirun. forcefield (str): Forcefield parameter for GROMACS. Default is "amber99sb-ildn". water_model (str): Water model parameter for GROMACS. Default is "tip3p". """ # Find all PDB files in the specified directory pdb_files = Path(directory).glob("*.pdb") # Iterate over each PDB file and execute the GROMACS command for pdb_file in pdb_files: name = pdb_file.stem # Extract the name without the '.pdb' extension parent = pdb_file.parent output_gro = f"{name}.gro" topol_file = f"{name}_topol.top" # Check if topol_file already exists, and if so, skip this task if Path(topol_file).exists(): continue # Construct the command cmd = [ "mpirun", "-np", str(num_cores), "gmx_mpi", "pdb2gmx", "-f", pdb_file.absolute().as_posix(), "-o", output_gro, "-ff", forcefield, "-water", water_model, "-ignh", "-p", topol_file ] # Run the command and capture the standard error (stderr) to a log file log_file = parent.joinpath(f"{name}_topol.log") try: with open(log_file, "w") as log: subprocess.run(cmd, check=True, cwd=directory, stderr=log) except subprocess.CalledProcessError as e: print(f"Error processing {pdb_file.name}: {e}") # Example usage directory = "pdb_top" # Replace with your actual directory path num_cores = max(os.cpu_count() - 2, 1) # Determine the number of cores to use generate_gmx_top_files(directory, num_cores) # Run the function