diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..256edf4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +uploads/ +LIBRARIES/ +*.pyc \ No newline at end of file diff --git a/Data/ery_core.txt b/Data/ery_core.txt new file mode 100644 index 0000000..6305ed5 --- /dev/null +++ b/Data/ery_core.txt @@ -0,0 +1 @@ +CC[C@H]1OC(=O)[*][*sugar*][C@H](C)[*sugar*][*]C[*]C(=O)[*][C@@H](O)[*]1 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2e8556c --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# PKS Enumerator V2 + +HTML GUI for PKS enumerator V2 and flask. + +## Setup +For better results, create a conda environment and activate it like: + +```sh +conda create -n v2pks python +``` + +then: +```sh +conda activate v2pks +``` + +Then install all needed dependencies from `requirements.txt` in the following way: +```sh +pip install -r requirements.txt +``` + +## How to Use +Start Flask server : +```sh +python server.py +``` + +Then go to localhost:5000 on the browser to load GUI. diff --git a/SIME.py b/SIME.py index 52db618..ed6c485 100644 --- a/SIME.py +++ b/SIME.py @@ -12,6 +12,7 @@ import os import re import os.path +import datetime from molvs import standardize_smiles from random import * from rdkit import Chem @@ -24,61 +25,67 @@ from operator import itemgetter class SIME: - def __init__(self): + def __init__(self, structural_motifs_file, sugars_file, max_repeat_motifs, minimal_sugars, library_size, enumerate_all_SMs, enumerate_all_sugars): ## self.info = {} # to record number of compounds for each length - self.total_numcompounds = 0 - self.load_sugars() - self.load_extenders() - self.library_size = int(input('Desired Library Size (numbers only) :')) - -## self.SM_info = {} - self.smile_file_name = self.create_directory()+'/mcrl' - self.max_repeat = int(input('Maximum occurrence of the same structural motifs per scaffold (number only) :')) - self.min_sugar = int(input('Minimal number of sugars per scaffold (number only) :')) - self.ext_question = input('Generate all possible stereocenters for extender structural motifs at joining carbons? y or n :') - self.sugar_question = input('Generate all possible stereocenters for sugars at joining carbons? y or n :') - + self.total_numcompounds = 0 + self.structural_motifs_file = structural_motifs_file + self.sugars_file = sugars_file + self.max_repeat_motifs = max_repeat_motifs + self.minimal_sugars = minimal_sugars + self.library_size = library_size + self.enumerate_all_SMs = enumerate_all_SMs + self.enumerate_all_sugars = enumerate_all_sugars + date_stamp = datetime.datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S") + self.smile_file_name = 'LIBRARIES/'+date_stamp+'_mcrl' self.info_manager = open(self.smile_file_name + '_info','a+') - self.info_manager.write('Desired Library Size (numbers only) :'+ self.library_size + '\n') - self.info_manager.write('Maximum occurrence of the same structural motifs per scaffold (number only) :'+ str(self.max_repeat) + '\n') - self.info_manager.write('Minimal number of sugars per scaffold (number only) :'+ str(self.min_sugar) + '\n') - self.info_manager.write('Generate all possible stereocenters for extender structural motifs at joining carbons? y or n :y'+ self.ext_question + '\n') - self.info_manager.write('Generate all possible stereocenters for sugars at joining carbons? y or n :' + self.sugar_question + '\n') + self.info_manager.write(f'Desired Library Size (numbers only) : {self.library_size}\n') + self.info_manager.write(f'Maximum occurrence of the same structural motifs per scaffold (number only) : {self.max_repeat_motifs}\n') + self.info_manager.write(f'Minimal number of sugars per scaffold (number only) :{self.minimal_sugars}\n') + self.info_manager.write(f'Generate all possible stereocenters for extender structural motifs at joining carbons? {self.enumerate_all_SMs}\n') + self.info_manager.write(f'Generate all possible stereocenters for sugars at joining carbons? {self.enumerate_all_sugars}\n') - def create_directory(self): - old_directory = os.getcwd() - newfolder = input('Peferred Directory Name for Output Files: ') - new_directory = os.path.join(old_directory, newfolder) - while os.path.exists(new_directory): - print('This folder exists or input is invalid. Try again.') - newfolder = input('Peferred Directory Name for Output Files: ') - new_directory = os.path.join(old_directory,newfolder) - os.mkdir(new_directory) - return new_directory + self.load_sugars() + self.load_extenders() + # def create_directory(self): + # old_directory = os.getcwd() + # newfolder = input('Peferred Directory Name for Output Files: ') + # new_directory = os.path.join(old_directory, newfolder) + # while os.path.exists(new_directory): + # print('This folder exists or input is invalid. Try again.') + # newfolder = input('Peferred Directory Name for Output Files: ') + # new_directory = os.path.join(old_directory,newfolder) + # os.mkdir(new_directory) + # return new_directory def load_sugars(self): - f = open('Data/sugars','r') - original_sugars = f.read().splitlines() - if self.sugar_question.lower() == 'no' or self.sugar_question.lower() == 'n': - self.sugars = [r.replace('[*R*]','') for r in original_sugars] + if self.sugars_file == None: + with open('Data/sugars', 'r') as f: + original_sugars = f.read().splitlines() else: + original_sugars = self.sugars_file.read().splitlines() + if self.enumerate_all_sugars.lower() == 'yes': sugars = [] for i in original_sugars: sugars.append(self.ENUMERATE_sugar_stereocenters(i)) self.sugars = [r.replace('[*R*]','') for r in list(chain(*sugars))] + else: + self.sugars = [r.replace('[*R*]','') for r in original_sugars] self.make_full_sugar_list() # make self.full_list by adding hydroxyl to self.sugars self.info_manager.write('\n\nSugars\n'+'\n'.join(original_sugars)+'\n') def load_extenders(self): - f = open('Data/selected_extenders.txt','r') - original_extenders = f.read().splitlines() - if self.ext_question.lower() == 'no' or self.ext_question.lower() == 'n': - self.extenders = [r.replace('[*R*]','') for r in original_extenders] + if self.structural_motifs_file == None: + with open('Data/selected_extenders.txt','r') as f: + original_extenders = f.read().splitlines() else: + original_extenders = self.structural_motifs_file.read().splitlines() + if self.enumerate_all_SMs.lower() == 'yes': self.extenders = [r.replace('[*R*]','') for r in self.enumerate_SM_stereocenters(original_extenders)] + else: + self.extenders = [r.replace('[*R*]','') for r in original_extenders] self.info_manager.write('\n\nStructural Motifs\n'+'\n'.join(original_extenders)+'\n') self.info_manager.close() @@ -87,7 +94,7 @@ class SIME: ''' self.full_list contains all the sugars and hydroxyl groups. ''' - hydroxyl = ['[C@H](O)','[C@@H](O)'] + hydroxyl = ["[C@H](O)","[C@@H](O)"] self.full_list = self.sugars.copy() self.full_list += hydroxyl # contains all sugars and hydroxyl groups @@ -96,10 +103,9 @@ class SIME: ''' Take in sugar strings that start and end with [*R*], and return a list of sugars with two different stereoceters for the joining carbon. ''' - sugar_stereocenters = [] # if the stereocenter of the joining carbon isn't defined - if smile[5] is 'C': + if smile[5] is "C": # template = smile[0:5] + '[C@H]' + smile[6:] template = smile.replace(smile[5], "[C@@H]", 1) sugar_stereocenters.append(template) @@ -109,48 +115,51 @@ class SIME: sugar_stereocenters.append(smile) if "@" in smile[:10]: if "@@" in smile[:10]: # for clockwise - template = smile.replace('@@', '@', 1) + template = smile.replace("@@", "@", 1) else: - template = smile.replace('@', '@@', 1) + template = smile.replace("@", "@@", 1) sugar_stereocenters.append(template) return sugar_stereocenters - def locate_SM_replace_points(self, smile): + def remove_SM_digits(self, smile): ''' - Take a string, and locate places for replacement. They are indicated by [1*], [2*], etc.... Return the string with all these joints replaced with [*]s. + Take a string, and locate places for replacement. They are indicated by [1*], [2*], etc.... + Return the string with all these joints replaced with [*]s. ''' numbers = set(re.findall(r'\d+', smile)) - possible_joints = ['['+str(m) +'*]' for m in numbers] + possible_joints = ["["+str(m) +"*]" for m in numbers] for each in possible_joints: - smile = smile.replace(each, '[*]') + smile = smile.replace(each, "[*]") return smile - def generate_templates_withextenders(self, smile): - ''' - Generate all possible templates. Takes in a smile string (structural core). This function only deals with extenders or structural motifs. - Then, insert all possible extenders at those joint positions. - ''' - smile_with_stars = [[r] for r in self.locate_SM_replace_points(smile).split('[*]')] # take a string with [*]s and split into different fragments, convert each fragment into a list - counter = 1 - # smile_with_stars = [ fragment1, fragment2, fragment3 ,...] all are split at joint positions - # insert self.extenders in between all fragments (except for the first and last blocks). - # so it will be something like [fragment1, [self.extenders], fragment2, [self.extenders], fragment3, [self.extenders] ,...] - for i in range(len(smile_with_stars)-1): - shuffle(self.extenders) - smile_with_stars.insert(counter,self.extenders) - counter+=2 - - template = [x for x in smile_with_stars if x != ['']] - self.make_compounds(template) - return template - + # def generate_templates_withextenders(self, smile): + # ''' + # Generate all possible templates. Takes in a smile string (structural core). + # This function only deals with extenders or structural motifs. + # Then, insert all possible extenders at those joint positions. + # ''' + # smile_with_stars = [[r] for r in self.remove_SM_digits(smile).split('[*]')] # take a string with [*]s and split into different fragments, convert each fragment into a list + # counter = 1 + # # smile_with_stars = [ fragment1, fragment2, fragment3 ,...] all are split at joint positions + # # insert self.extenders in between all fragments (except for the first and last blocks). + # # so it will be something like [fragment1, [self.extenders], fragment2, [self.extenders], fragment3, [self.extenders] ,...] + # for i in range(len(smile_with_stars)-1): + # shuffle(self.extenders) + # smile_with_stars.insert(counter,self.extenders) + # counter+=2 + # + # template = [x for x in smile_with_stars if x != ['']] + # self.make_compounds(template) + # return template def generate_templates_withExtendersNSugars(self,smile): ''' - + Generate all possible templates. Takes in a smile string (structural core). + This function deals with both structural motifs and sugars. + Then, insert all possible SMs and sugars at those joint positions. ''' - smile_with_stars = self.string_splitter(self.locate_SM_replace_points(smile), '[*]') + smile_with_stars = self.string_splitter(self.remove_SM_digits(smile), '[*]') template = [] # create a template holder that will have a list of extenders or sugars at the respective split location points and the rest of the core will remain the same. # The position of all these fragments (core, extenders, sugars) have to be in the correct order. @@ -166,40 +175,74 @@ class SIME: template = [x for x in template if x != ['']] # [stable_fragment1, [possible extender motifs], stable_fragment2, [possible sugar moieties], ...] SM_template = self.insert_SMs(template) # a list of possible extenders inserted at SM locations - SGR_order = self.generate_dummy_sugar_templates(SM_template,minimal_sugars=self.min_sugar) # At least how many sugars do you want in the macrolide scaffold? because of this, more complications. + SGR_order = self.generate_dummy_sugar_templates(SM_template,minimal_sugars=self.minimal_sugars) # At least how many sugars do you want in the macrolide scaffold? because of this, more complications. for each in SGR_order: - current_SYMBOLsugar_template = self.add_SYMBOLsugars_to_dummy_templates(each,SM_template) # add the lists of sugars and full_list at the dummy positions + current_SYMBOLsugar_template = self.replace_SYMBOLsugars_with_dummies(each,SM_template) # add the lists of sugars and full_list at the dummy positions current_sugar_template = self.insert_sugars_to_dummies(current_SYMBOLsugar_template) self.make_compounds(current_sugar_template) - - - def make_compounds(self,template): - written = [] + def make_compounds(self, template): + max_per_file = 1000000 file_counter = 1 - file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' # attempts to split files because they get too large. Name of the first file will be "file_" + this variable - file_handler = open(file_temp,'a+') + written = [] for item in product(*template): - if self.max_occurrence(list(item))[1] <= self.max_repeat: # If the count of most common SM is less than or equal to the number set up by the user - - if self.total_numcompounds <= self.library_size: - if len(written) < 1000000: + if self.max_occurrence(list(item))[1] <= self.max_repeat_motifs: # If the count of most common SM is less than or equal to the number set up by the user + if self.library_size <= max_per_file: + if self.total_numcompounds <= self.library_size: temp = ''.join([str(r) for r in item]) - m = Chem.MolFromSmiles(temp) self.total_numcompounds += 1 written.append(temp) else: - file_handler.write('\n'.join(written)) # write smiles in written list - file_handler.close() - file_counter +=1 - file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' - file_handler = open(file_temp,'a+') - written = [] - else: - break - file_handler.close() - file_handler.close() + self.write_to_file(written, file_counter) + break + elif self.library_size > max_per_file: + if self.total_numcompounds <= self.library_size: + if len(written) <= max_per_file: + temp = ''.join([str(r) for r in item]) + self.total_numcompounds += 1 + written.append(temp) + else: + self.write_to_file(written, file_counter) + file_counter +=1 + written = [] + else: + break + + def write_to_file(self, compound_list, file_counter): + ''' + When total compound is 1000000 or library size, this function will be called + to write compounds to file. + ''' + print(compound_list) + file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' # attempts to split files because they get too large. Name of the first file will be "file_" + this variable + with open(file_temp,'a+') as file_handler: + file_handler.write('\n'.join(compound_list)) + + # def make_compounds(self,template): + # written = [] + # file_counter = 1 + # file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' # attempts to split files because they get too large. Name of the first file will be "file_" + this variable + # file_handler = open(file_temp,'a+') + # for item in product(*template): + # if self.max_occurrence(list(item))[1] <= self.max_repeat_motifs: # If the count of most common SM is less than or equal to the number set up by the user + # if self.total_numcompounds <= self.library_size: + # if len(written) < 1000000: + # temp = ''.join([str(r) for r in item]) + # # m = Chem.MolFromSmiles(temp) + # self.total_numcompounds += 1 + # written.append(temp) + # else: + # file_handler.write('\n'.join(written)) + # file_handler.close() + # file_counter +=1 + # file_temp = self.smile_file_name + '_'+str(file_counter)+'.smiles' + # file_handler = open(file_temp,'a+') + # written = [] + # else: + # file_handler.write('\n'.join(written)) # write smiles in written list + # break + # file_handler.close() def RS_check(self,smile,ringsize): @@ -301,10 +344,9 @@ class SIME: n = the least number of sugars the users want in each macrolide. Default is one, i.e. there will be at least one sugar in each macrolide. Generate a list of all possible templates using dummys as 'SUGARS' (intended for only sugars) and 'FULL_LIST (intended for sugars + hydroxy).' - ''' num_sugars = template.count(['[*sugar*]']) - list_with_atLeast_nSugars = n*['SUGARS']+(num_sugars-n)*['FULL_LIST'] # Make a new list with at least "n" "SUGARS" and the rest "FULL_LIST" + list_with_atLeast_nSugars = minimal_sugars*['SUGARS']+(num_sugars-minimal_sugars)*['FULL_LIST'] # Make a new list with at least "n" "SUGARS" and the rest "FULL_LIST" sugar_lists_in_order = [] # make a new list to hold all possible sugar templates at each position # now create all different arrangements of sugars, full_list. The positions of these blocks matter. for i in permutations(list_with_atLeast_nSugars): @@ -313,7 +355,7 @@ class SIME: return sugar_lists_in_order - def add_SYMBOLsugars_to_dummy_templates(self, sugar_dummy_order,template_with_sugarinlist): + def replace_SYMBOLsugars_with_dummies(self, sugar_dummy_order,template_with_sugarinlist): ''' each sugar_dummy_order looks like ('SUGARS', 'FULL_LIST', 'FULL_LIST', 'FULL_LIST') template_with_sugarinlist looks like = [['1'], [ext1,ext2,...], ['2'],['[*sugar*]'],['3'], [ext1,ext2,...], ['[*sugar*]'],['4'], [ext1,ext2,...], ['5'], [ext1,ext2,...], ['6']] diff --git a/main.py b/main.py index 0c966e8..39f89e9 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,75 @@ # Licence: #------------------------------------------------------------------------------- import time +from flask import Flask, render_template, url_for, request +from werkzeug import secure_filename +import os +from SIME import * + +app = Flask(__name__) +UPLOAD_FOLDER = 'uploads' +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +@app.route('/') +def main_page(): + return render_template('index.html') + +@app.route('/collect_data',methods=['POST']) +def collect_data(): + form_data = request.form + form_files = request.files + macrolide_core = form_files.get('macrolide_core') + structural_motifs_file = form_files.get('structural_motifs_file') + sugars_file = form_files.get('sugars_file') + + #more options + max_repeat_motifs = int(form_data.get("max_repeat_motifs")) + minimal_sugars = int(form_data.get("minimal_sugars")) + library_size = int(form_data.get("library_size")) + if form_data.get("enumerate_all_SMs"): + enumerate_all_SMs = form_data.get("enumerate_all_SMs") + else: + enumerate_all_SMs = 'no' + if form_data.get("enumerate_all_sugars"): + enumerate_all_sugars = form_data.get("enumerate_all_sugars") + else: + enumerate_all_sugars = 'no' + #enumarate_all = form_data["enumerate_all"] + + if macrolide_core: + filename = secure_filename(macrolide_core.filename) + macrolide_core.save(os.path.join(app.config["UPLOAD_FOLDER"],filename)) + macrolide_core = open(os.path.join(UPLOAD_FOLDER,filename),'r') + else: + macrolide_core = None + if structural_motifs_file: + filename = secure_filename(structural_motifs_file.filename) + structural_motifs_file.save(os.path.join(app.config["UPLOAD_FOLDER"],filename)) + structural_motifs_file =open(os.path.join(UPLOAD_FOLDER,filename),'r') + else: + structural_motifs_file = None + if sugars_file: + filename = secure_filename(sugars_file.filename) + sugars_file.save(os.path.join(app.config["UPLOAD_FOLDER"],filename)) + sugars_file = open(os.path.join(UPLOAD_FOLDER,filename),'r') + else: + sugars_file = None + + sample = SIME(structural_motifs_file, sugars_file, max_repeat_motifs, minimal_sugars, library_size, enumerate_all_SMs, enumerate_all_sugars) + if macrolide_core == None: + with open("Data/ery_core.txt", 'r') as f: + smile = f.readline() + else: + smile = macrolide_core.readline() + start_time = time.time() + sample.generate_templates_withExtendersNSugars(smile) + duration = convert_time(time.time()-start_time) + f'Time Elapsed for Enumeration: {duration}' + + return f'''Time Elapsed for Enumeration: {duration}. + Your chemical libraries have been generated. + Please check in LIBRARIES folder. The resulting files for info and smiles should be there.''' + #return form_data, form_files def convert_time(second): ''' @@ -21,13 +90,12 @@ def convert_time(second): second = round((minute - int(minute))*60,4) return(str(int(day)) + ' DAYS: '+ str(int(hour)) + ' HOURS: '+ str(int(minute)) + ' MINUTES: ' + str(second) + ' SECONDS') -from SIME import * -#ERY_core = 'CC[C@H]1OC(=O)[*][*sugar*][*][*sugar*][*]C[*]C(=O)[*][C@@H](O)[*]1' -ERY_core = 'CC[C@H]1OC(=O)[*][*sugar*][C@H](C)[*sugar*][*]C[*]C(=O)[*][C@@H](O)[*]1' +if __name__ == '__main__': -sample = SIME() -start_time = time.time() -sample.generate_templates_withExtendersNSugars(ERY_core) -duration = convert_time(time.time()-start_time) -print('Time Elapsed for Enumeration: ' + str(duration)) + app.jinja_env.auto_reload = True + app.config['TEMPLATES_AUTO_RELOAD']=True #forces flask to reload html templates + app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 #prevents browsers from caching static files served by flask, such as js code + + app.run(debug=True,use_reloader=True) + #app.run(host='0.0.0.0',use_reloader=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..329cbf8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,17 @@ +certifi==2019.11.28 +Click==7.0 +Flask==1.1.1 +itsdangerous==1.1.0 +Jinja2==2.10.3 +MarkupSafe==1.1.1 +MolVS==0.1.1 +numpy==1.17.5 +olefile==0.46 +pandas==0.25.3 +Pillow==7.0.0 +pycairo==1.19.0 +python-dateutil==2.8.1 +pytz==2019.3 +six==1.14.0 +Werkzeug==0.16.0 +wincertstore==0.2 diff --git a/static/styles/main_page.css b/static/styles/main_page.css new file mode 100644 index 0000000..d8c8770 --- /dev/null +++ b/static/styles/main_page.css @@ -0,0 +1,74 @@ +body { + padding: 20px; +} +.app-form { + display: flex; + /**width: 40%;**/ +} + +.color-label { + color: #ffffff; + background-color: #838383; + padding: 10px; + border: 1px solid #CCC; + display: flex; + text-align: center; +} + +.more-options { + padding: 10px; + border: 1px solid #CCC; +} + +.more_options_label { + position: relative; + display:flex; + top:-30px; + left: auto; + width: 150px; + text-align: center; + border: 1px solid #CCC; + + /**background-color:white;**/ +} + +.more-options-settings { + border: 1px solid #CCC; +} + +.upper-part { + padding:25px; +} + +label { + /* To make sure that all labels have the same size and are properly aligned */ + display: inline-block; + width: 300px; + text-align: left; +} + +textarea { + /* To make sure that all text fields have the same font settings + By default, textareas have a monospace font */ + font: 1em sans-serif; + + /* To give the same size to all text fields */ + width: 75px; + box-sizing: border-box; + + /* To harmonize the look & feel of text field border */ + border: 1px solid #999; +} + +input:focus, textarea:focus { + /* To give a little highlight on active elements */ + border-color: #00B3FF; +} + +textarea { + /* To properly align multiline text fields with their labels */ + vertical-align: top; + + /* To give enough room to type some text */ + height: 5em; +} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..f2c224d --- /dev/null +++ b/templates/index.html @@ -0,0 +1,58 @@ + + + + + + + + +
+
+ + +
+ + + +
+ + + +
+
+ +
+ +
+ +
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ enumerate all possible sterocenters for SMs
+
+ +
+ enumerate all possible sterocenters for sugars
+
+
+ + +
+ +