attached the webapplication GUI

This commit is contained in:
zinph
2020-01-26 18:42:57 -05:00
parent 4784da6f77
commit 1c84b966db
8 changed files with 391 additions and 100 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
uploads/
LIBRARIES/
*.pyc

1
Data/ery_core.txt Normal file
View File

@@ -0,0 +1 @@
CC[C@H]1OC(=O)[*][*sugar*][C@H](C)[*sugar*][*]C[*]C(=O)[*][C@@H](O)[*]1

28
README.md Normal file
View File

@@ -0,0 +1,28 @@
# PKS Enumerator V2
HTML GUI for PKS enumerator V2 and flask.
## Setup
For better results, create a conda environment and activate it like:
```sh
conda create -n v2pks python
```
then:
```sh
conda activate v2pks
```
Then install all needed dependencies from `requirements.txt` in the following way:
```sh
pip install -r requirements.txt
```
## How to Use
Start Flask server :
```sh
python server.py
```
Then go to localhost:5000 on the browser to load GUI.

226
SIME.py
View File

@@ -12,6 +12,7 @@
import os
import re
import os.path
import datetime
from molvs import standardize_smiles
from random import *
from rdkit import Chem
@@ -24,61 +25,67 @@ from operator import itemgetter
class SIME:
def __init__(self):
def __init__(self, structural_motifs_file, sugars_file, max_repeat_motifs, minimal_sugars, library_size, enumerate_all_SMs, enumerate_all_sugars):
## self.info = {} # to record number of compounds for each length
self.total_numcompounds = 0
self.load_sugars()
self.load_extenders()
self.library_size = int(input('Desired Library Size (numbers only) :'))
## self.SM_info = {}
self.smile_file_name = self.create_directory()+'/mcrl'
self.max_repeat = int(input('Maximum occurrence of the same structural motifs per scaffold (number only) :'))
self.min_sugar = int(input('Minimal number of sugars per scaffold (number only) :'))
self.ext_question = input('Generate all possible stereocenters for extender structural motifs at joining carbons? y or n :')
self.sugar_question = input('Generate all possible stereocenters for sugars at joining carbons? y or n :')
self.total_numcompounds = 0
self.structural_motifs_file = structural_motifs_file
self.sugars_file = sugars_file
self.max_repeat_motifs = max_repeat_motifs
self.minimal_sugars = minimal_sugars
self.library_size = library_size
self.enumerate_all_SMs = enumerate_all_SMs
self.enumerate_all_sugars = enumerate_all_sugars
date_stamp = datetime.datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S")
self.smile_file_name = 'LIBRARIES/'+date_stamp+'_mcrl'
self.info_manager = open(self.smile_file_name + '_info','a+')
self.info_manager.write('Desired Library Size (numbers only) :'+ self.library_size + '\n')
self.info_manager.write('Maximum occurrence of the same structural motifs per scaffold (number only) :'+ str(self.max_repeat) + '\n')
self.info_manager.write('Minimal number of sugars per scaffold (number only) :'+ str(self.min_sugar) + '\n')
self.info_manager.write('Generate all possible stereocenters for extender structural motifs at joining carbons? y or n :y'+ self.ext_question + '\n')
self.info_manager.write('Generate all possible stereocenters for sugars at joining carbons? y or n :' + self.sugar_question + '\n')
self.info_manager.write(f'Desired Library Size (numbers only) : {self.library_size}\n')
self.info_manager.write(f'Maximum occurrence of the same structural motifs per scaffold (number only) : {self.max_repeat_motifs}\n')
self.info_manager.write(f'Minimal number of sugars per scaffold (number only) :{self.minimal_sugars}\n')
self.info_manager.write(f'Generate all possible stereocenters for extender structural motifs at joining carbons? {self.enumerate_all_SMs}\n')
self.info_manager.write(f'Generate all possible stereocenters for sugars at joining carbons? {self.enumerate_all_sugars}\n')
def create_directory(self):
old_directory = os.getcwd()
newfolder = input('Peferred Directory Name for Output Files: ')
new_directory = os.path.join(old_directory, newfolder)
while os.path.exists(new_directory):
print('This folder exists or input is invalid. Try again.')
newfolder = input('Peferred Directory Name for Output Files: ')
new_directory = os.path.join(old_directory,newfolder)
os.mkdir(new_directory)
return new_directory
self.load_sugars()
self.load_extenders()
# def create_directory(self):
# old_directory = os.getcwd()
# newfolder = input('Peferred Directory Name for Output Files: ')
# new_directory = os.path.join(old_directory, newfolder)
# while os.path.exists(new_directory):
# print('This folder exists or input is invalid. Try again.')
# newfolder = input('Peferred Directory Name for Output Files: ')
# new_directory = os.path.join(old_directory,newfolder)
# os.mkdir(new_directory)
# return new_directory
def load_sugars(self):
f = open('Data/sugars','r')
original_sugars = f.read().splitlines()
if self.sugar_question.lower() == 'no' or self.sugar_question.lower() == 'n':
self.sugars = [r.replace('[*R*]','') for r in original_sugars]
if self.sugars_file == None:
with open('Data/sugars', 'r') as f:
original_sugars = f.read().splitlines()
else:
original_sugars = self.sugars_file.read().splitlines()
if self.enumerate_all_sugars.lower() == 'yes':
sugars = []
for i in original_sugars:
sugars.append(self.ENUMERATE_sugar_stereocenters(i))
self.sugars = [r.replace('[*R*]','') for r in list(chain(*sugars))]
else:
self.sugars = [r.replace('[*R*]','') for r in original_sugars]
self.make_full_sugar_list() # make self.full_list by adding hydroxyl to self.sugars
self.info_manager.write('\n\nSugars\n'+'\n'.join(original_sugars)+'\n')
def load_extenders(self):
f = open('Data/selected_extenders.txt','r')
original_extenders = f.read().splitlines()
if self.ext_question.lower() == 'no' or self.ext_question.lower() == 'n':
self.extenders = [r.replace('[*R*]','') for r in original_extenders]
if self.structural_motifs_file == None:
with open('Data/selected_extenders.txt','r') as f:
original_extenders = f.read().splitlines()
else:
original_extenders = self.structural_motifs_file.read().splitlines()
if self.enumerate_all_SMs.lower() == 'yes':
self.extenders = [r.replace('[*R*]','') for r in self.enumerate_SM_stereocenters(original_extenders)]
else:
self.extenders = [r.replace('[*R*]','') for r in original_extenders]
self.info_manager.write('\n\nStructural Motifs\n'+'\n'.join(original_extenders)+'\n')
self.info_manager.close()
@@ -87,7 +94,7 @@ class SIME:
'''
self.full_list contains all the sugars and hydroxyl groups.
'''
hydroxyl = ['[C@H](O)','[C@@H](O)']
hydroxyl = ["[C@H](O)","[C@@H](O)"]
self.full_list = self.sugars.copy()
self.full_list += hydroxyl # contains all sugars and hydroxyl groups
@@ -96,10 +103,9 @@ class SIME:
'''
Take in sugar strings that start and end with [*R*], and return a list of sugars with two different stereoceters for the joining carbon.
'''
sugar_stereocenters = []
# if the stereocenter of the joining carbon isn't defined
if smile[5] is 'C':
if smile[5] is "C":
# template = smile[0:5] + '[C@H]' + smile[6:]
template = smile.replace(smile[5], "[C@@H]", 1)
sugar_stereocenters.append(template)
@@ -109,48 +115,51 @@ class SIME:
sugar_stereocenters.append(smile)
if "@" in smile[:10]:
if "@@" in smile[:10]: # for clockwise
template = smile.replace('@@', '@', 1)
template = smile.replace("@@", "@", 1)
else:
template = smile.replace('@', '@@', 1)
template = smile.replace("@", "@@", 1)
sugar_stereocenters.append(template)
return sugar_stereocenters
def locate_SM_replace_points(self, smile):
def remove_SM_digits(self, smile):
'''
Take a string, and locate places for replacement. They are indicated by [1*], [2*], etc.... Return the string with all these joints replaced with [*]s.
Take a string, and locate places for replacement. They are indicated by [1*], [2*], etc....
Return the string with all these joints replaced with [*]s.
'''
numbers = set(re.findall(r'\d+', smile))
possible_joints = ['['+str(m) +'*]' for m in numbers]
possible_joints = ["["+str(m) +"*]" for m in numbers]
for each in possible_joints:
smile = smile.replace(each, '[*]')
smile = smile.replace(each, "[*]")
return smile
def generate_templates_withextenders(self, smile):
'''
Generate all possible templates. Takes in a smile string (structural core). This function only deals with extenders or structural motifs.
Then, insert all possible extenders at those joint positions.
'''
smile_with_stars = [[r] for r in self.locate_SM_replace_points(smile).split('[*]')] # take a string with [*]s and split into different fragments, convert each fragment into a list
counter = 1
# smile_with_stars = [ fragment1, fragment2, fragment3 ,...] all are split at joint positions
# insert self.extenders in between all fragments (except for the first and last blocks).
# so it will be something like [fragment1, [self.extenders], fragment2, [self.extenders], fragment3, [self.extenders] ,...]
for i in range(len(smile_with_stars)-1):
shuffle(self.extenders)
smile_with_stars.insert(counter,self.extenders)
counter+=2
template = [x for x in smile_with_stars if x != ['']]
self.make_compounds(template)
return template
# def generate_templates_withextenders(self, smile):
# '''
# Generate all possible templates. Takes in a smile string (structural core).
# This function only deals with extenders or structural motifs.
# Then, insert all possible extenders at those joint positions.
# '''
# smile_with_stars = [[r] for r in self.remove_SM_digits(smile).split('[*]')] # take a string with [*]s and split into different fragments, convert each fragment into a list
# counter = 1
# # smile_with_stars = [ fragment1, fragment2, fragment3 ,...] all are split at joint positions
# # insert self.extenders in between all fragments (except for the first and last blocks).
# # so it will be something like [fragment1, [self.extenders], fragment2, [self.extenders], fragment3, [self.extenders] ,...]
# for i in range(len(smile_with_stars)-1):
# shuffle(self.extenders)
# smile_with_stars.insert(counter,self.extenders)
# counter+=2
#
# template = [x for x in smile_with_stars if x != ['']]
# self.make_compounds(template)
# return template
def generate_templates_withExtendersNSugars(self,smile):
'''
Generate all possible templates. Takes in a smile string (structural core).
This function deals with both structural motifs and sugars.
Then, insert all possible SMs and sugars at those joint positions.
'''
smile_with_stars = self.string_splitter(self.locate_SM_replace_points(smile), '[*]')
smile_with_stars = self.string_splitter(self.remove_SM_digits(smile), '[*]')
template = []
# create a template holder that will have a list of extenders or sugars at the respective split location points and the rest of the core will remain the same.
# The position of all these fragments (core, extenders, sugars) have to be in the correct order.
@@ -166,40 +175,74 @@ class SIME:
template = [x for x in template if x != ['']] # [stable_fragment1, [possible extender motifs], stable_fragment2, [possible sugar moieties], ...]
SM_template = self.insert_SMs(template) # a list of possible extenders inserted at SM locations
SGR_order = self.generate_dummy_sugar_templates(SM_template,minimal_sugars=self.min_sugar) # At least how many sugars do you want in the macrolide scaffold? because of this, more complications.
SGR_order = self.generate_dummy_sugar_templates(SM_template,minimal_sugars=self.minimal_sugars) # At least how many sugars do you want in the macrolide scaffold? because of this, more complications.
for each in SGR_order:
current_SYMBOLsugar_template = self.add_SYMBOLsugars_to_dummy_templates(each,SM_template) # add the lists of sugars and full_list at the dummy positions
current_SYMBOLsugar_template = self.replace_SYMBOLsugars_with_dummies(each,SM_template) # add the lists of sugars and full_list at the dummy positions
current_sugar_template = self.insert_sugars_to_dummies(current_SYMBOLsugar_template)
self.make_compounds(current_sugar_template)
def make_compounds(self,template):
written = []
def make_compounds(self, template):
max_per_file = 1000000
file_counter = 1
file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' # attempts to split files because they get too large. Name of the first file will be "file_" + this variable
file_handler = open(file_temp,'a+')
written = []
for item in product(*template):
if self.max_occurrence(list(item))[1] <= self.max_repeat: # If the count of most common SM is less than or equal to the number set up by the user
if self.total_numcompounds <= self.library_size:
if len(written) < 1000000:
if self.max_occurrence(list(item))[1] <= self.max_repeat_motifs: # If the count of most common SM is less than or equal to the number set up by the user
if self.library_size <= max_per_file:
if self.total_numcompounds <= self.library_size:
temp = ''.join([str(r) for r in item])
m = Chem.MolFromSmiles(temp)
self.total_numcompounds += 1
written.append(temp)
else:
file_handler.write('\n'.join(written)) # write smiles in written list
file_handler.close()
file_counter +=1
file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles'
file_handler = open(file_temp,'a+')
written = []
else:
break
file_handler.close()
file_handler.close()
self.write_to_file(written, file_counter)
break
elif self.library_size > max_per_file:
if self.total_numcompounds <= self.library_size:
if len(written) <= max_per_file:
temp = ''.join([str(r) for r in item])
self.total_numcompounds += 1
written.append(temp)
else:
self.write_to_file(written, file_counter)
file_counter +=1
written = []
else:
break
def write_to_file(self, compound_list, file_counter):
'''
When total compound is 1000000 or library size, this function will be called
to write compounds to file.
'''
print(compound_list)
file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' # attempts to split files because they get too large. Name of the first file will be "file_" + this variable
with open(file_temp,'a+') as file_handler:
file_handler.write('\n'.join(compound_list))
# def make_compounds(self,template):
# written = []
# file_counter = 1
# file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' # attempts to split files because they get too large. Name of the first file will be "file_" + this variable
# file_handler = open(file_temp,'a+')
# for item in product(*template):
# if self.max_occurrence(list(item))[1] <= self.max_repeat_motifs: # If the count of most common SM is less than or equal to the number set up by the user
# if self.total_numcompounds <= self.library_size:
# if len(written) < 1000000:
# temp = ''.join([str(r) for r in item])
# # m = Chem.MolFromSmiles(temp)
# self.total_numcompounds += 1
# written.append(temp)
# else:
# file_handler.write('\n'.join(written))
# file_handler.close()
# file_counter +=1
# file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles'
# file_handler = open(file_temp,'a+')
# written = []
# else:
# file_handler.write('\n'.join(written)) # write smiles in written list
# break
# file_handler.close()
def RS_check(self,smile,ringsize):
@@ -301,10 +344,9 @@ class SIME:
n = the least number of sugars the users want in each macrolide.
Default is one, i.e. there will be at least one sugar in each macrolide.
Generate a list of all possible templates using dummys as 'SUGARS' (intended for only sugars) and 'FULL_LIST (intended for sugars + hydroxy).'
'''
num_sugars = template.count(['[*sugar*]'])
list_with_atLeast_nSugars = n*['SUGARS']+(num_sugars-n)*['FULL_LIST'] # Make a new list with at least "n" "SUGARS" and the rest "FULL_LIST"
list_with_atLeast_nSugars = minimal_sugars*['SUGARS']+(num_sugars-minimal_sugars)*['FULL_LIST'] # Make a new list with at least "n" "SUGARS" and the rest "FULL_LIST"
sugar_lists_in_order = [] # make a new list to hold all possible sugar templates at each position
# now create all different arrangements of sugars, full_list. The positions of these blocks matter.
for i in permutations(list_with_atLeast_nSugars):
@@ -313,7 +355,7 @@ class SIME:
return sugar_lists_in_order
def add_SYMBOLsugars_to_dummy_templates(self, sugar_dummy_order,template_with_sugarinlist):
def replace_SYMBOLsugars_with_dummies(self, sugar_dummy_order,template_with_sugarinlist):
'''
each sugar_dummy_order looks like ('SUGARS', 'FULL_LIST', 'FULL_LIST', 'FULL_LIST')
template_with_sugarinlist looks like = [['1'], [ext1,ext2,...], ['2'],['[*sugar*]'],['3'], [ext1,ext2,...], ['[*sugar*]'],['4'], [ext1,ext2,...], ['5'], [ext1,ext2,...], ['6']]

84
main.py
View File

@@ -10,6 +10,75 @@
# Licence: <your licence>
#-------------------------------------------------------------------------------
import time
from flask import Flask, render_template, url_for, request
from werkzeug import secure_filename
import os
from SIME import *
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
@app.route('/')
def main_page():
return render_template('index.html')
@app.route('/collect_data',methods=['POST'])
def collect_data():
form_data = request.form
form_files = request.files
macrolide_core = form_files.get('macrolide_core')
structural_motifs_file = form_files.get('structural_motifs_file')
sugars_file = form_files.get('sugars_file')
#more options
max_repeat_motifs = int(form_data.get("max_repeat_motifs"))
minimal_sugars = int(form_data.get("minimal_sugars"))
library_size = int(form_data.get("library_size"))
if form_data.get("enumerate_all_SMs"):
enumerate_all_SMs = form_data.get("enumerate_all_SMs")
else:
enumerate_all_SMs = 'no'
if form_data.get("enumerate_all_sugars"):
enumerate_all_sugars = form_data.get("enumerate_all_sugars")
else:
enumerate_all_sugars = 'no'
#enumarate_all = form_data["enumerate_all"]
if macrolide_core:
filename = secure_filename(macrolide_core.filename)
macrolide_core.save(os.path.join(app.config["UPLOAD_FOLDER"],filename))
macrolide_core = open(os.path.join(UPLOAD_FOLDER,filename),'r')
else:
macrolide_core = None
if structural_motifs_file:
filename = secure_filename(structural_motifs_file.filename)
structural_motifs_file.save(os.path.join(app.config["UPLOAD_FOLDER"],filename))
structural_motifs_file =open(os.path.join(UPLOAD_FOLDER,filename),'r')
else:
structural_motifs_file = None
if sugars_file:
filename = secure_filename(sugars_file.filename)
sugars_file.save(os.path.join(app.config["UPLOAD_FOLDER"],filename))
sugars_file = open(os.path.join(UPLOAD_FOLDER,filename),'r')
else:
sugars_file = None
sample = SIME(structural_motifs_file, sugars_file, max_repeat_motifs, minimal_sugars, library_size, enumerate_all_SMs, enumerate_all_sugars)
if macrolide_core == None:
with open("Data/ery_core.txt", 'r') as f:
smile = f.readline()
else:
smile = macrolide_core.readline()
start_time = time.time()
sample.generate_templates_withExtendersNSugars(smile)
duration = convert_time(time.time()-start_time)
f'Time Elapsed for Enumeration: {duration}'
return f'''Time Elapsed for Enumeration: {duration}.
Your chemical libraries have been generated.
Please check in LIBRARIES folder. The resulting files for info and smiles should be there.'''
#return form_data, form_files
def convert_time(second):
'''
@@ -21,13 +90,12 @@ def convert_time(second):
second = round((minute - int(minute))*60,4)
return(str(int(day)) + ' DAYS: '+ str(int(hour)) + ' HOURS: '+ str(int(minute)) + ' MINUTES: ' + str(second) + ' SECONDS')
from SIME import *
#ERY_core = 'CC[C@H]1OC(=O)[*][*sugar*][*][*sugar*][*]C[*]C(=O)[*][C@@H](O)[*]1'
ERY_core = 'CC[C@H]1OC(=O)[*][*sugar*][C@H](C)[*sugar*][*]C[*]C(=O)[*][C@@H](O)[*]1'
if __name__ == '__main__':
sample = SIME()
start_time = time.time()
sample.generate_templates_withExtendersNSugars(ERY_core)
duration = convert_time(time.time()-start_time)
print('Time Elapsed for Enumeration: ' + str(duration))
app.jinja_env.auto_reload = True
app.config['TEMPLATES_AUTO_RELOAD']=True #forces flask to reload html templates
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 #prevents browsers from caching static files served by flask, such as js code
app.run(debug=True,use_reloader=True)
#app.run(host='0.0.0.0',use_reloader=True)

17
requirements.txt Normal file
View File

@@ -0,0 +1,17 @@
certifi==2019.11.28
Click==7.0
Flask==1.1.1
itsdangerous==1.1.0
Jinja2==2.10.3
MarkupSafe==1.1.1
MolVS==0.1.1
numpy==1.17.5
olefile==0.46
pandas==0.25.3
Pillow==7.0.0
pycairo==1.19.0
python-dateutil==2.8.1
pytz==2019.3
six==1.14.0
Werkzeug==0.16.0
wincertstore==0.2

View File

@@ -0,0 +1,74 @@
body {
padding: 20px;
}
.app-form {
display: flex;
/**width: 40%;**/
}
.color-label {
color: #ffffff;
background-color: #838383;
padding: 10px;
border: 1px solid #CCC;
display: flex;
text-align: center;
}
.more-options {
padding: 10px;
border: 1px solid #CCC;
}
.more_options_label {
position: relative;
display:flex;
top:-30px;
left: auto;
width: 150px;
text-align: center;
border: 1px solid #CCC;
/**background-color:white;**/
}
.more-options-settings {
border: 1px solid #CCC;
}
.upper-part {
padding:25px;
}
label {
/* To make sure that all labels have the same size and are properly aligned */
display: inline-block;
width: 300px;
text-align: left;
}
textarea {
/* To make sure that all text fields have the same font settings
By default, textareas have a monospace font */
font: 1em sans-serif;
/* To give the same size to all text fields */
width: 75px;
box-sizing: border-box;
/* To harmonize the look & feel of text field border */
border: 1px solid #999;
}
input:focus, textarea:focus {
/* To give a little highlight on active elements */
border-color: #00B3FF;
}
textarea {
/* To properly align multiline text fields with their labels */
vertical-align: top;
/* To give enough room to type some text */
height: 5em;
}

58
templates/index.html Normal file
View File

@@ -0,0 +1,58 @@
<!DOCTYPE html>
<html lang="en" dir="ltr">
<head>
<meta charset="utf-8">
<link rel= "stylesheet" type= "text/css" href= "{{ url_for('static',filename='styles/main_page.css') }}">
<title></title>
</head>
<body>
<form class="app-form" enctype=multipart/form-data action="{{url_for('collect_data')}}" method="post">
<div class="upper-part">
<label class="color-label" for="macrolide_core">Upload Macrolide Core</label>
<input type="file" name="macrolide_core" value="">
<br>
<label class="color-label" for="structural_motifs_file">Upload Structural Motifs</label>
<input type="file" name="structural_motifs_file" value="">
<br>
<label class="color-label" for="sugars_file">Upload Sugars</label>
<input type="file" name="sugars_file" value="">
<br>
</div>
<div class="more-options">
<label class="color-label more_options_label">More Options</label>
<br>
<div class="more-options-settings">
<label for="max_repeat_motifs"># Maximal Repeat for Structural Motifs</label>
<input type="number" name="max_repeat_motifs" value="2">
<br>
</div>
<div class="more-options-settings">
<label for="minimal_sugars">Minimal # of sugars</label>
<input type="number" name="minimal_sugars" value="1">
<br>
</div>
<div class="more-options-settings">
<label for="library_size">Library Size</label>
<input type="number" name="library_size" value="10">
<br>
</div>
<div class="more-options-settings">
<input type="checkbox" name="enumerate_all_SMs" value="yes"> enumerate all possible sterocenters for SMs<br>
</div>
<div class="more-options-settings">
<input type="checkbox" name="enumerate_all_sugars" value="yes"> enumerate all possible sterocenters for sugars<br>
</div>
</div>
<input type="submit" name="" value="submit">
</form>
</body>
</html>