Source code for coralme.builder.main

# python imports
import io
import os
import re
import sys
import pickle
import shutil
import pathlib
import warnings
import collections
import subprocess
import logging

# third party imports
import tqdm
import sympy
import numpy
import pandas
import anyconfig

import cobra
import coralme

# configuration
from coralme.core.extended_classes import log_format
bar_format = '{desc:<75}: {percentage:.1f}%|{bar:10}| {n_fmt:>5}/{total_fmt:>5} [{elapsed}<{remaining}]'
try:
	warnings.simplefilter(action = 'ignore', category = pandas.errors.SettingWithCopyWarning)
except:
	warnings.warn("This pandas version does not allow for correct warning handling. Pandas >=1.5.1 is suggested.")

[docs] class MEBuilder(object): """ MEBuilder class to coordinate the reconstruction of ME-models. Parameters ---------- *args: Positional arguments are passed as paths to JSON files that update the configuration of the parent class. **kwargs: Further keyword arguments are passed on as dictionaries to update the configuration of the parent class. """ def __init__(self, *args, m_model_path = False, genbank_path = False, locus_tag = 'locus_tag', blast_threads = None, # if zero, no run_bbh_blast df_gene_cplxs_mods_rxns = 'automated-org-with-refs.xlsx', df_TranscriptionalUnits = '', df_matrix_stoichiometry = '', df_matrix_subrxn_stoich = '', df_metadata_orphan_rxns = '', df_metadata_metabolites = '', out_directory = './', log_directory = './', debug = False, **kwargs): """ keyword arguments ME-Model-ID, str m-model-path, JSON or SBML file genbank-path, GenBank file only df_gene_cplxs_mods_rxns, path to output, must end in '.xlsx' df_TranscriptionalUnits df_matrix_stoichiometry df_matrix_subrxn_stoich df_metadata_orphan_rxns df_metadata_metabolites log_directory, str out_directory, str dev_reference, default True user_reference, path. If 'user_reference' is defined, dev_reference is False add_lipoproteins, default True estimate_keffs, default True """ self.available_reference_models = { 'iJL1678b' : 'locus_tag', # E. coli, gram negative 'iJT964' : 'old_locus_tag', # B. subtilis, gram positive } run_bbh_blast = True if blast_threads < 0: run_bbh_blast = False if blast_threads is None: blast_threads = os.cpu_count()-1 config = { 'ME-Model-ID' : 'coralME', 'm-model-path' : m_model_path, 'genbank-path' : genbank_path, 'df_gene_cplxs_mods_rxns' : '{:s}/{:s}'.format(out_directory, df_gene_cplxs_mods_rxns), 'df_TranscriptionalUnits' : df_TranscriptionalUnits, 'df_matrix_stoichiometry' : df_matrix_stoichiometry, 'df_matrix_subrxn_stoich' : df_matrix_subrxn_stoich, 'df_metadata_orphan_rxns' : df_metadata_orphan_rxns, 'df_metadata_metabolites' : df_metadata_metabolites, 'log_directory' : log_directory, 'out_directory' : out_directory, 'locus_tag' : locus_tag, 'run_bbh_blast' : run_bbh_blast, 'blast_threads' : blast_threads, 'dev_reference' : True, 'add_lipoproteins' : True, # 'add_translocases' : False, 'estimate_keffs' : True, 'defer_to_rxn_matrix' : [], 'percent_dna_data' : [0.0592, 0.0512, 0.0330, 0.0252, 0.0222, 0.0208], # E. coli ME-model 'gr_data_doublings_per_hour' : [0, 0.6, 1.0, 1.5, 2.0, 2.5], # E. coli ME-model 'complex_cofactors' : { 'fes_transfers' : [], 'fes_chaperones' : {}, 'bmocogdp_chaperones' : {}, 'acps_subreactions': {'mod_pan4p_c': ['acpP_activation']}, 'biotin_subreactions' : { 'mod_btn_c' : [ 'biotin_ligase' ] }, 'citx_subreactions': {'mod_2tpr3dpcoa_c': ['citx_transfer_to_citd']}, 'glycyl_subreactions': {'mod_glycyl_c': ['gre_activation']}, 'lipoate_subreactions' : { 'mod_lipoyl_c' : [ 'lipoyl_denovo', 'lipoyl_scavenging' ] }, 'FeFe/NiFe' : { 'mod_FeFe_cofactor_c' : '', 'mod_NiFe_cofactor_c' : '' } }, } for input_file in args: with open(input_file, 'r') as infile: config.update(anyconfig.load(infile)) if kwargs: config.update(kwargs) # User must provide values for m-model-path and genbank-path if not config.get('m-model-path', False): raise ValueError('Configuration must supply a \'m-model-path\' key and value.') if not config.get('genbank-path', False): raise ValueError('Configuration must supply a \'genbank-path\' key and value.') self.configuration = config self.me_model = coralme.core.model.MEModel(id_or_model = self.configuration.get('ME-Model-ID', 'coralME'), name = self.configuration.get('ME-Model-ID', 'coralME'), mu = self.configuration.get('growth_key', 'mu')) self.curation_notes = coralme.builder.notes.load_curation_notes( self.configuration['out_directory'] + '/curation_notes.json' ) self.logger = { 'MEBuilder' : coralme.core.extended_classes.ListHandler([]), 'MEReconstruction-step1' : coralme.core.extended_classes.ListHandler([]), 'MEReconstruction-step2' : coralme.core.extended_classes.ListHandler([]), 'METroubleshooter' : coralme.core.extended_classes.ListHandler([]) } data = \ 'code,interpretation,gram\n' \ 'CCI-CW-BAC-POS-GP,Cell_Wall,pos\n' \ 'CCI-OUTER-MEM-GN,Outer_Membrane,neg\n' \ 'CCI-PM-BAC-NEG-GN,Inner_Membrane,neg\n' \ 'CCI-PM-BAC-POS-GP,Plasma_Membrane,pos\n' \ 'CCO-MEMBRANE,Membrane,' self.location_interpreter = pandas.read_csv(io.StringIO(data), index_col = 0) # check user options exists = [] for filename in [ "m-model-path", "genbank-path", "df_TranscriptionalUnits", "df_matrix_stoichiometry", "df_matrix_subrxn_stoich", "df_metadata_orphan_rxns", "df_metadata_metabolites", "df_reaction_keff_consts", "biocyc.genes", "biocyc.prots", "biocyc.TUs", "biocyc.RNAs", "biocyc.seqs", ]: if config.get(filename, None) is None: pass elif config.get(filename, '') == '': pass else: exists.append([config[filename], pathlib.Path(config[filename]).exists()]) if not all([ y for x,y in exists ]): raise FileNotFoundError('Check the path to the {:s} file(s).'.format(', '.join([ x for x,y in exists if y == False ]))) for option in [ "out_directory", "log_directory" ]: if config.get(option, '.') == '': self.configuration[option] = config.get('ME-Model-ID', 'coralME') # debug data self.debug_data = {} def generate_files(self, overwrite = True): """Performs the Synchronize and Complement steps of the reconstruction. This function will read the Organism and the Reference. It will synchronize the input files, complement them, and finally build the OSM for the Organism. Parameters ---------- overwrite : bool If True, overwrite the OSM using the defined path in the configuration. """ config = self.configuration model = config.get('ME-Model-ID', 'coralME') directory = config.get('log_directory', '.') #if overwrite and os.path.exists(directory): #shutil.rmtree(directory + '/blast_files_and_results') #shutil.rmtree(directory + '/building_data') if not os.path.exists(directory): os.mkdir(directory) log = logging.getLogger() # root logger for hdlr in log.handlers[:]: # remove all old handlers log.removeHandler(hdlr) logging.basicConfig(filename = '{:s}/MEBuilder-{:s}.log'.format(directory, model), filemode = 'w', level = logging.WARNING, format = log_format) log.addHandler(self.logger['MEBuilder']) #log.addHandler(logging.StreamHandler(sys.stdout)) logging.captureWarnings(True) sep = '' coralme.core.extended_classes.ListHandler.print_and_log("{}Initiating file processing...".format(sep)) # Read organism self.org = coralme.builder.organism.Organism(config, is_reference = False) self.org.get_organism() self.curation_notes = self.org.curation_notes # self.org.rpod = '' # self.org.get_rna_polymerase(force_RNAP_as='') logging.warning("Modifying and preparing M-model") # self.curate() self.prepare_model() # ## Homology with reference # Curation note: Check which locus tag field in the genbank agrees with those in the biocyc files. # Make sure you have specified that field in the beginning of this notebook. # Reference if bool(config.get('dev_reference', False)) or bool(config.get('user_reference', False)): logging.warning("Reading reference") self.ref = coralme.builder.organism.Organism(config.copy(), is_reference = True, available_reference_models = self.available_reference_models) self.ref.get_organism() folder = self.org.blast_directory if bool(config.get('run_bbh_blast', True)): blast_threads = config.get('blast_threads', 1) coralme.core.extended_classes.ListHandler.print_and_log("~ Running BLAST with {} threads...".format(blast_threads)) self.org.gb_to_faa('org', element_types = {'CDS'}, outdir = self.org.blast_directory) self.ref.gb_to_faa('ref', element_types = {'CDS'}, outdir = self.org.blast_directory) def execute(cmd): if os.name == 'nt': cmd = 'cmd /c {:s}'.format(cmd) cmd = re.findall(r'(?:[^\s,"]|"+(?:=|\\.|[^"])*"+)+', cmd) out, err = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate() # make blast databases execute('makeblastdb -in {:s}/org.faa -dbtype prot -out {:s}/org'.format(folder, folder)) execute('makeblastdb -in {:s}/ref.faa -dbtype prot -out {:s}/ref'.format(folder, folder)) # bidirectional blast execute('blastp -db {:s}/org -query {:s}/ref.faa -num_threads {} -out {:s}/org_as_db.txt -outfmt 6'.format(folder, folder, blast_threads, folder)) execute('blastp -db {:s}/ref -query {:s}/org.faa -num_threads {} -out {:s}/ref_as_db.txt -outfmt 6'.format(folder, folder, blast_threads, folder)) #os.system('{}/auto_blast.sh {}'.format(self.directory,self.org.directory)) coralme.core.extended_classes.ListHandler.print_and_log('BLAST done.') # #### Reciprocal hits logging.warning("Getting homologs") self.get_homology(evalue = self.org.config.get("e_value_cutoff", 1e-10)) self.homology.mutual_hits_df.to_csv('{:s}/mutual_hits.txt'.format(folder)) #self.homology.mutual_hits_df.to_csv(self.org.directory+'{:s}/mutual_hits.txt'.format(folder)) # #### Get enzyme homology self.homology.get_complex_homology() # #### Update model info with homology logging.warning("Updating from homology") self.update_from_homology() filename = self.org.config.get('df_TranscriptionalUnits', self.org.directory + "TUs_from_biocyc.txt") filename = self.org.directory + "TUs_from_biocyc.txt" if filename == '' else filename df = self.org.TU_df df = df.sort_index(inplace = False) if overwrite: with open(filename, 'w') as outfile: self.org.TU_df.to_csv(outfile, sep = '\t') logging.warning('The BioCyc transcriptional data file was processed and overwritten into the {:s} file.'.format(filename)) else: if pathlib.Path(filename).exists(): logging.warning('Set \'overwrite = True\' to overwrite the {:s} file.'.format(filename)) else: with open(filename, 'w') as outfile: self.org.TU_df.to_csv(outfile, sep = '\t') logging.warning('The BioCyc transcriptional data file was saved to the ./{:s} file.'.format(filename)) self.configuration['df_TranscriptionalUnits'] = filename filename = self.org.directory + "subreaction_matrix.txt" if overwrite: with open(filename, 'w') as outfile: self.org.subreaction_matrix.to_csv(outfile, sep = '\t') logging.warning('The subreaction data file was processed and overwritten into the {:s} file.'.format(filename)) else: if pathlib.Path(filename).exists(): logging.warning('Set \'overwrite = True\' to overwrite the {:s} file.'.format(filename)) else: with open(filename, 'w') as outfile: self.org.subreaction_matrix.to_csv(outfile, sep = '\t') logging.warning('The subreaction data file was saved to the ./{:s} file.'.format(filename)) filename = self.org.directory + "me_metabolites.txt" if overwrite: with open(filename, 'w') as outfile: self.org.me_mets.to_csv(outfile, sep = '\t') logging.warning('The M to ME metabolite mapping file was processed and overwritten into the {:s} file.'.format(filename)) else: if pathlib.Path(filename).exists(): logging.warning('Set \'overwrite = True\' to overwrite the {:s} file.'.format(filename)) else: with open(filename, 'w') as outfile: self.org.me_mets.to_csv(outfile, sep = '\t') logging.warning('The M to ME metabolite mapping file was saved to the ./{:s} file.'.format(filename)) logging.warning("Updating enzyme-reaction association") self.update_enzyme_reaction_association() logging.warning("Processing RNA modifications") self.org.process_rna_modifications() logging.warning("Getting tRNA to codon information") self.get_trna_to_codon() # #### Biomass constituents self.org.biomass_constituents = config.get('flux_of_biomass_constituents', {}) # Fill builder with dummy logging.warning("Filling files with CPLX_dummy") self.fill() # Final checks of builder logging.warning("Performing final checks of files") self.check() # Update manual curation files for user reference logging.warning("Generating filled manual curation files") self.org.manual_curation.save() self.org.complexes_df.to_csv(self.org.directory + "reference_files/complexes.txt") self.org.protein_mod.to_csv(self.org.directory + "reference_files/protein_modification.txt") # Update notes logging.warning("Generating curation notes") coralme.builder.notes.save_curation_notes( self.curation_notes, self.configuration['out_directory'] + '/curation_notes.json' ) coralme.builder.notes.publish_curation_notes( self.curation_notes, self.configuration['out_directory']+ '/curation_notes.txt' ) logging.warning("Saving modified M-model") filename = '{:s}/building_data/m_model_modified.json'.format(config.get('out_directory', './')) cobra.io.save_json_model(self.org.m_model, filename) config['m-model-path'] = filename logging.warning("Generating new configuration file") self.input_data(self.org.m_model, overwrite) coralme.core.extended_classes.ListHandler.print_and_log("{}File processing done.".format(sep)) logging.shutdown() # We will remove duplicates entries in the log output with open('{:s}/MEBuilder-{:s}.log'.format(config.get('log_directory', '.'), config.get('ME-Model-ID', 'coralME')), 'w') as outfile: logger = self.logger['MEBuilder'] coralme.core.extended_classes.ListHandler.log_to_file(logger.log_list, logger.debug, outfile) def prepare_model(self): """Performs initial preparation of the M-model. This function will fix some known issues that M-models can Parameters ---------- overwrite : bool If True, overwrite the OSM using the defined path in the configuration. """ m_model = self.org.m_model target_compartments = {"c": "Cytosol", "e": "Extra-organism", "p": "Periplasm"} new_dict = {} warn_compartments = [] for k,v in tqdm.tqdm(m_model.compartments.items(), 'Gathering M-model compartments...', bar_format = bar_format, total=len(m_model.compartments)): if k in target_compartments: new_dict[k] = target_compartments[k] else: warn_compartments.append(k) new_dict[k] = k m_model.compartments = new_dict gene_list = [] for m in tqdm.tqdm(m_model.metabolites, 'Fixing compartments in M-model metabolites...', bar_format = bar_format): if not m.compartment: logging.warning("Fixing compartment for metabolite {}".format(m.id)) m.compartment = m.id[-1] for r in tqdm.tqdm(m_model.reactions, 'Fixing missing names in M-model reactions...', bar_format = bar_format): if not r.name or isinstance(r.name, float): logging.warning("Fixing name for reaction {}".format(r.id)) r.name = r.id # Solve m_model solution = m_model.optimize() if not solution or solution.objective_value < 1e-6: self.org.curation_notes['prepare_model'].append({ 'msg':'M-model cannot grow', 'importance':'critical', 'to_do':'Check that the model you provided works'}) try: self.org.biomass = str(m_model.objective.expression.as_two_terms()[0]).split('*')[1] biomass_rxn = m_model.reactions.get_by_id(self.org.biomass) logging.warning('{} was identified as the biomass reaction'.format(biomass_rxn.id)) except: self.org.biomass = None biomass_rxn = None logging.warning('Could not identify biomass reaction') self.org.GAM = self.configuration.get('gam',None) # Get GAM if self.org.GAM is None: adp = m_model.metabolites.adp_c if biomass_rxn is not None and adp in biomass_rxn.metabolites: self.org.GAM = biomass_rxn.metabolites[adp] logging.warning('GAM identified with value {}'.format(self.org.GAM)) else: self.org.GAM = self.configuration.get('gam', self.me_model._gam) self.org.curation_notes['prepare_model'].append({ 'msg':'GAM could not be identified from biomass reaction, setting a standard value of {:g}. adp_c is not present as a product.'.format(self.me_model._gam), 'importance':'high', 'to_do':'Check whether the biomass reaction was read or defined correctly. You can define GAM with builder.configuration[\'gam\'] = GAM_value'}) self.org.NGAM = self.configuration.get('ngam',None) if self.org.NGAM is None: # Get NGAM NGAMs = ['NGAM','ATPM'] for r in NGAMs: if r in m_model.reactions: rxn = m_model.reactions.get_by_id(r) if rxn.lower_bound <= 0: continue self.org.NGAM = rxn.lower_bound logging.warning('{} was identified as NGAM with value {}'.format(r,self.org.NGAM)) break if self.org.NGAM is None: self.org.NGAM = self.configuration.get('ngam', self.me_model._ngam) self.org.curation_notes['prepare_model'].append({ 'msg':'NGAM could not be identified in M-model, setting a standard value of {:g}.'.format(self.me_model._ngam), 'importance':'high', 'to_do':'Manually define NGAM with builder.configuration[\'ngam\'] = NGAM_value. Check if a reaction with identifier NGAM or ATPM has a zero or negative lower bound.'}) elif self.org.NGAM == 0: self.org.NGAM = self.configuration.get('ngam', self.me_model._ngam) self.org.curation_notes['prepare_model'].append({ 'msg':'NGAM was identified from reaction {}, but its lower bound is 0. NGAM set to a standard value of {:g}.'.format(rxn.id, self.me_model._ngam), 'importance':'high', 'to_do':'Manually define NGAM with builder.configuration[\'ngam\'] = NGAM_value'}) # Warnings if warn_compartments: self.org.curation_notes['prepare_model'].append({ 'msg':'Some compartments in m_model are unknown', 'triggered_by':warn_compartments, 'importance':'medium', 'to_do':'Check whether the compartment is correct. If not, change it in the reaction ID in the m_model.'}) def get_homology(self, evalue=1e-10): """Calculates homology between Organism and Reference. Parameters ---------- evalue : float, default 1e-10 Sets the E-value cutoff for calling protein homologs using BLAST. """ self.homology = coralme.builder.homology.Homology(self.org, self.ref, evalue = evalue) def get_trna_to_codon(self): """Gets tRNA to codon association from the Genome. """ import Bio from Bio import SeqIO, Seq, SeqFeature, SeqUtils me_model = self.me_model contigs = self.org.contigs # Dictionary of tRNA locus ID to the model.metabolite object. It accounts for misacylation trna_to_aa = {} # Dictionary of tRNA locus ID to the model.metabolite object. It accounts for misacylation trna_misacylation = self.configuration.get('trna_misacylation',dict()) # Dictionary of tRNA locus ID to amino acid, one dict of tRNAs per organelle type # aa2trna does not account for misacylation aa2trna = { 'c' : {} } # prokaryotes and eukaryotes if self.configuration.get('domain','prokaryote').lower() not in ['prokaryote', 'bacteria']: aa2trna.update({'m' : {}, 'h' : {}}) # mitochondria and plastids # Translation tables, one table per organelle type transl_tables = { 'c' : set() } # prokaryotes and eukaryotes if self.configuration.get('domain','prokaryote') not in ['prokaryote', 'bacteria']: transl_tables.update({'m' : set(), 'h' : set()}) # mitochondria and plastids # Messages msg1 = 'From the tRNA misacylation dictionary, the {:s} gene [tRNA({:s})] is loaded and converted into {:s}-tRNA({:s}). Make sure a MetabolicReaction to convert a {:s}-tRNA({:s}) into a {:s}-tRNA({:s}) is present in the ME-model.' msg2 = 'From the tRNA misacylation dictionary, the {:s} gene [tRNA({:s})] is loaded and converted into {:s}-tRNA({:s}). No further modification needs to take place.' canonical_aas = [ 'Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu', 'Gly', 'His', 'Ile', 'Leu', 'Lys', 'Met', 'Phe', 'Pro', 'Ser', 'Thr', 'Trp', 'Tyr', 'Val' ] for contig in contigs: iterator = tqdm.tqdm(contig.features, 'Getting tRNA to codon dictionary from {}'.format(contig.id), bar_format = bar_format) if len(contigs) < 10 else contig.features for feature in iterator: # Find organelle in source if feature.type in ['source', 'region']: # genbanks from gff+fna do not contain a 'source' feature organelle = feature.qualifiers.get('organelle', [None])[0] continue seq = feature.extract(contig).seq.replace('-', '') if feature.type == 'CDS': # Add the translation table prot = feature.qualifiers.get('translation', [''])[0] transl_table = feature.qualifiers.get('transl_table', ['1'])[0] # Add the translation table per organelle if organelle is None: transl_tables['c'].add(int(transl_table)) if me_model.global_info['domain'].lower() not in ['prokaryote', 'bacteria']: logging.warning('Contig \'{:s}\' does not report an organelle type.'.format(contig.id)) elif organelle.lower() in ['mitochondria', 'mitochondrion']: transl_tables['m'].add(int(transl_table)) elif organelle.lower() in ['chloroplast', 'plastid']: transl_tables['h'].add(int(transl_table)) else: continue if not feature.type == 'tRNA': continue bnum = self.org._get_feature_locus_tag(feature) if bnum is None: continue aa = feature.qualifiers.get('product', ['tRNA-None'])[0].split('-')[1] if aa in canonical_aas + ['Asx', 'Glx', 'fMet', 'Sec']: pass else: logging.warning('The tRNA \'{:s}\' is not associated to a valid product name (tRNA-Amino acid 3 letters code)'.format(bnum)) continue msg = 'The tRNA \'{:s}\' is associated to two amino acids. The \'trna_misacylation\' dictionary was modified to attempt load the correct amino acid.' # Special tRNA(Asx) that can be loaded with Asn (EC 6.1.1.22) or Asp (EC 6.1.1.12) if aa == 'Asx': trna_misacylation['Asx'] = 'Asp' logging.warning(msg.format(bnum)) # Special tRNA(Glx) that can be loaded with Gln (EC 6.1.1.18) or Glu (EC 6.1.1.17) if aa == 'Glx': trna_misacylation['Glx'] = 'Glu' logging.warning(msg.format(bnum)) if aa in trna_misacylation.keys(): # misacylation only in mitochondria and chloroplasts filter1a = me_model.global_info['domain'].lower() in ['eukarya', 'eukaryote'] filter1b = str(organelle).lower() in ['mitochondria', 'mitochondrion', 'chloroplast', 'plastid'] # misacylation in the cytoplasm of Gram-positive eubacteria (and other bacteria such as cyanobacteria) filter2 = me_model.global_info['domain'].lower() in ['bacteria', 'prokaryote'] if filter1a and filter1b or filter2: trna_to_aa[bnum] = trna_misacylation[aa] if aa.endswith('x'): logging.warning(msg2.format(bnum, aa, trna_misacylation[aa], aa)) else: logging.warning(msg1.format(bnum, aa, trna_misacylation[aa], aa, trna_misacylation[aa], aa, aa, aa)) else: # misacylation is not valid in the compartment and domain trna_to_aa[bnum] = aa else: trna_to_aa[bnum] = aa if organelle is None: aa2trna['c'][bnum] = aa elif organelle.lower() in ['mitochondria', 'mitochondrion']: aa2trna['m'][bnum] = aa elif organelle.lower() in ['chloroplast', 'plastid']: aa2trna['h'][bnum] = aa # trna_to_codon does not account for misacylation: { 'tRNA ID' : 'Amino acid to load into the tRNA' } trna_to_aa = { k:v.replace('fMet', 'Met') for k,v in trna_to_aa.items() } for organelle, aa2trna_dct in aa2trna.items(): aa2trna_dct = { k:v.capitalize().split('_')[0] if 'fMet' not in v else 'fMet' for k,v in aa2trna_dct.items() } aa2trna_df = pandas.DataFrame(data = [aa2trna_dct.values(), aa2trna_dct.keys()]).T aa2trna_df = aa2trna_df.groupby(0).agg({1: lambda x: x.tolist()}) if 'fMet' in aa2trna_df.index: aa2trna_df.loc['Met'] = aa2trna_df.loc['fMet'] + aa2trna_df.loc['Met'] aa2trna[organelle] = aa2trna_df if not aa2trna_df.empty: # assign START tRNAs to every fMet-tRNA (Met-tRNA if not) and check if at least one tRNA was identified if 'fMet' in aa2trna_df.index: if len(me_model.global_info['START_tRNA']) == 0: me_model.global_info['START_tRNA'] = list(aa2trna_df.loc['fMet'])[0] if 'Met' in aa2trna_df.index: if len(me_model.global_info['START_tRNA']) == 0: me_model.global_info['START_tRNA'] = list(aa2trna_df.loc['Met'])[0] # final check if len(me_model.global_info['START_tRNA']) == 0: logging.warning('Unable to identify at least one \'tRNA-Met\' or \'tRNA-fMet\' annotation from the \'Definition\' column in the organism-specific matrix.') else: logging.warning('No tRNA genes were identified from their locus tags.') # DataFrame mapping tRNAs (list) and the encoded amino acid (index), per organelle # aa2trna derives from trna_to_aa, so it also accounts for misacylation: { 'organelle ID' : 'DataFrame of amino acid to load into the tRNA' } aa2codon = {} trna_to_codon = {} for organelle, transl_table in transl_tables.items(): if len(transl_table) == 0: continue codon_table = Bio.Data.CodonTable.generic_by_id[list(transl_table)[0]] dct = { k.replace('T', 'U'):SeqUtils.seq3(v) for k,v in codon_table.forward_table.items() if 'U' not in k } aa2codons = pandas.DataFrame(data = [dct.keys(), dct.values()]).T.groupby(1).agg({0: lambda x: x.tolist()}) # aa2codons derives from the translation table and maps amino acids to the codon aa2codon[organelle] = aa2codons if list(transl_table)[0] == 11: aa2codons.loc['Sec'] = [['UGA']] # an internal UGA encodes selenocysteine df = pandas.concat([aa2codons, aa2trna[organelle]], axis = 1).dropna(how = 'any').explode(1) # Check amino acids warn_trnas = [] for aa in canonical_aas: if aa in aa2trna[organelle].index: pass else: warn_trnas.append(aa) if warn_trnas: self.org.curation_notes['get_trna_to_codon'].append({ 'msg':'Some tRNAs could be missing in the files.', 'triggered_by':warn_trnas, 'importance':'critical', 'to_do':'Identify the respective missing tRNAs and add them to a file (RNAs.txt or genome.gb)'}) trna_to_codon_organelle = { k:v + ['START'] if k in me_model.global_info['START_tRNA'] else v for k,v in zip(df[1].values, df[0].values) } trna_to_codon[organelle] = trna_to_codon_organelle me_model.global_info['trna_to_aa'] = trna_to_aa me_model.global_info['trna_to_codon'] = trna_to_codon me_model.global_info['trna_misacylation'] = trna_misacylation return None def update_enzyme_stoichiometry(self): complexes_df = self.org.complexes_df org_cplx_homolog = self.homology.org_cplx_homolog ref_complexes_df = self.ref.complexes_df mutual_hits = self.homology.mutual_hits cplx_stoich = {} for c, rc in org_cplx_homolog.items(): if "generic" in c: continue if rc not in ref_complexes_df.index: continue for rg in ref_complexes_df["genes"][rc].split(" AND "): rg_id = re.findall(r'.*(?=\(\d*\))', rg)[0] coeff = re.findall(r"(?<=\()[0-9]{1,}", rg) if coeff: coeff = coeff[0] else: coeff = "" if c not in cplx_stoich: cplx_stoich[c] = {} cplx_stoich[c][mutual_hits[rg_id]] = coeff for c, stoich in cplx_stoich.items(): complexes_df.at[c, "genes"] = " AND ".join( [g + "({})".format(coeff) for g, coeff in stoich.items()] ) self.org.complexes_df = complexes_df def update_protein_modification(self): cplx_homolog = self.homology.org_cplx_homolog ref_cplx_homolog = self.homology.ref_cplx_homolog complexes_df = self.org.complexes_df ref_mod_df = self.ref.protein_mod.reset_index().set_index("Core_enzyme") cplx_cofactor_dict = {} protein_mod_dict = {} for c, row in complexes_df.iterrows(): if c in cplx_homolog: rc = cplx_homolog[c] if rc in ref_mod_df.index: if c not in cplx_cofactor_dict: cplx_cofactor_dict[c] = {} ref_mods = ref_mod_df.loc[[rc]] for _,row in ref_mods.iterrows(): mods = row["Modifications"].split(" AND ") cplx = c cofs = [] coeffs = [] for mod in mods: cof = re.findall(r".*(?=\()", mod)[0] coeff = re.findall(r"(?<=\()[0-9]{1,}", mod) if coeff: coeff = coeff[0] cplx += "_mod_{}({})".format(cof, coeff) else: cplx += "_mod_{}".format(cof) coeff = "" cofs.append(cof) coeffs.append(coeff) ref_cplx_homolog[row["Modified_enzyme"]] = cplx cplx_homolog[cplx] = row["Modified_enzyme"] if cplx in self.org.protein_mod.index: continue protein_mod_dict[cplx] = {} protein_mod_dict[cplx]["Core_enzyme"] = c protein_mod_dict[cplx]["Modifications"] = " AND ".join( "{}({})".format(cof, coeff) for cof, coeff in zip(cofs, coeffs) ) protein_mod_dict[cplx]["Source"] = "Homology" protein_mod = pandas.DataFrame.from_dict(protein_mod_dict).T protein_mod.index.name = "Modified_enzyme" self.org.protein_mod = pandas.concat([self.org.protein_mod,protein_mod]) def update_enzyme_reaction_association(self): enz_rxn_assoc_df = self.org.enz_rxn_assoc_df protein_mod = self.org.protein_mod for r,row in tqdm.tqdm(enz_rxn_assoc_df.iterrows(), 'Updating enzyme reaction association...', bar_format = bar_format, total=enz_rxn_assoc_df.shape[0]): if r in self.org.manual_curation.enz_rxn_assoc_df.data.index: # Only update those not in manual curation continue cplxs = row['Complexes'] reaction_cplx_list = [] for cplx_id in cplxs.split(" OR "): cplx_id = cplx_id.split("_mod_")[0] if cplx_id in protein_mod["Core_enzyme"].values: # Use modifications cplx_mods = protein_mod[ protein_mod["Core_enzyme"].eq(cplx_id) ].index for cplx_id in cplx_mods: if "Oxidized" in cplx_id: cplx_id = cplx_id.split("_mod_Oxidized")[0] reaction_cplx_list.append(cplx_id) else: reaction_cplx_list.append(cplx_id) enz_rxn_assoc_df.at[r,'Complexes'] = " OR ".join(reaction_cplx_list) def update_TU_df(self): return NotImplemented def protein_location_from_homology(self): protein_location = self.org.protein_location complexes_df = self.org.complexes_df org_proteins_df = self.org.proteins_df org_cplx_homolog = self.homology.org_cplx_homolog mutual_hits = self.homology.mutual_hits ref_protein_location = self.ref.protein_location if not isinstance(ref_protein_location, pandas.DataFrame): return for rc,row in tqdm.tqdm(ref_protein_location.iterrows(), 'Updating protein location from homology...', bar_format = bar_format, total=ref_protein_location.shape[0]): ref_gene = re.findall(r'.*(?=\(.*\))', row['Protein'])[0] if ref_gene not in mutual_hits: continue org_gene = mutual_hits[ref_gene] ref_info = ref_protein_location[ref_protein_location['Protein'].str.contains(ref_gene)] gene_string = r'{}\('.format(org_gene) org_cplxs = complexes_df[complexes_df['genes'].str.contains(gene_string)].index for org_cplx in org_cplxs: if protein_location.any().any() and \ org_cplx in protein_location.index and \ protein_location.loc[[org_cplx]]['Protein'].str.contains(gene_string).any().any(): # Check if already in protein location, if not add. continue tmp = pandas.DataFrame.from_dict( { org_cplx: { "Complex_compartment": ref_info["Complex_compartment"].values[0], "Protein": '{}()'.format(org_gene), "Protein_compartment": ref_info["Protein_compartment"].values[0], "translocase_pathway": ref_info["translocase_pathway"].values[0], } }).T protein_location = pandas.concat([protein_location, tmp], axis = 0, join = 'outer') protein_location.index.name = 'Complex' self.org.protein_location = protein_location def update_translocation_multipliers(self): ref_multipliers = self.ref.translocation_multipliers ref_cplx_homolog = self.homology.ref_cplx_homolog mutual_hits = self.homology.mutual_hits multipliers = {} for ref_cplx, genes in tqdm.tqdm(ref_multipliers.items(), 'Updating translocation multipliers from homology...', bar_format = bar_format, total=len(ref_multipliers)): if ref_cplx not in ref_cplx_homolog: continue org_cplx = ref_cplx_homolog[ref_cplx] multipliers[org_cplx] = {} for g, value in genes.items(): if not value: continue if g not in mutual_hits: continue org_g = mutual_hits[g] multipliers[org_cplx][org_g] = value self.org.translocation_multipliers = multipliers def update_lipoprotein_precursors(self): ref_lipoprotein_precursors = self.ref.lipoprotein_precursors mutual_hits = self.homology.mutual_hits lipoprotein_precursors = {} for c, g in tqdm.tqdm(ref_lipoprotein_precursors.items(), 'Updating lipoprotein precursors from homology...', bar_format = bar_format, total=len(ref_lipoprotein_precursors)): if g in mutual_hits: og = mutual_hits[g] lipoprotein_precursors[c] = og self.org.lipoprotein_precursors = lipoprotein_precursors def update_cleaved_methionine(self): cleaved_methionine = self.org.cleaved_methionine ref_cleaved_methionine = self.ref.cleaved_methionine mutual_hits = self.homology.mutual_hits for g in tqdm.tqdm(ref_cleaved_methionine, 'Updating cleaved-methionine proteins from homology...', bar_format = bar_format): if g not in mutual_hits: continue cleaved_methionine.append(mutual_hits[g]) self.org.cleaved_methionine = cleaved_methionine def update_me_mets(self): ref_me_mets = self.ref.me_mets ref_cplx_homolog = self.homology.ref_cplx_homolog protein_mod = self.org.protein_mod.reset_index().set_index("Core_enzyme") me_mets = self.org.me_mets m_model = self.org.m_model d = {} warn_skip = [] warn_found = [] for ref_m, row in tqdm.tqdm(ref_me_mets.iterrows(), 'Mapping M-metabolites to E-metabolites...', bar_format = bar_format, total=ref_me_mets.shape[0]): if ref_m not in m_model.metabolites: if "__" in ref_m: ref_m = ref_m.replace("__", "_") if ref_m not in m_model.metabolites: warn_skip.append(ref_m) continue else: warn_found.append(ref_m) if ref_m in me_mets.index: if me_mets.loc[ref_m]['type'] != 'CURATE': continue me_mets.drop(ref_m,inplace=True) ref_me = row["me_id"] ref_changetype = row['type'] d[ref_m] = {} me_id = '' if ref_me in ref_cplx_homolog: org_me = ref_cplx_homolog[ref_me] me_id = org_me changetype = "REPLACE" elif ref_changetype == "REMOVE": changetype = "REMOVE" else: changetype = "CURATE" d[ref_m]["me_id"] = me_id d[ref_m]["type"] = changetype if d: df = pandas.DataFrame.from_dict(d).T df.index.name = "id" me_mets = pandas.concat([me_mets, df], axis = 0, join = 'outer') self.org.me_mets = me_mets.fillna('') if warn_skip: self.org.curation_notes['update_me_mets'].append({ 'msg':'Some metabolites in me_metabolites.txt are not in m_model, so they were skipped.', 'triggered_by':warn_skip, 'importance':'medium', 'to_do':'Confirm these metabolites are correctly defined in me_metabolites.txt'}) if warn_found: self.org.curation_notes['update_me_mets'].append({ 'msg':'Some metabolites in me_metabolites.txt were found in reference m_model after replacing __ with _', 'triggered_by':warn_found, 'importance':'medium', 'to_do':'Confirm these metabolites are correctly defined in me_metabolites.txt'}) def update_generics_from_homology(self): generic_dict = self.org.generic_dict ref_generic_dict = self.ref.generic_dict ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_generic_dict.items(), 'Updating generics from homology...', bar_format = bar_format, total=len(ref_generic_dict)): if k not in generic_dict: generic_dict[k] = {"enzymes":[]} #continue if generic_dict[k]['enzymes']: continue ref_cplxs = v['enzymes'] for i in ref_cplxs: if i in ref_cplx_homolog: homolog = ref_cplx_homolog[i] if homolog not in generic_dict[k]['enzymes']: generic_dict[k]['enzymes'].append(homolog) def update_folding_dict_from_homology(self): org_folding_dict = self.org.folding_dict ref_folding_dict = self.ref.folding_dict mutual_hits = self.homology.mutual_hits for k, v in tqdm.tqdm(ref_folding_dict.items(), 'Updating folding from homology...', bar_format = bar_format, total=len(ref_folding_dict)): ref_cplxs = v['enzymes'] for i in ref_cplxs: if i in mutual_hits: homolog = mutual_hits[i] if homolog not in org_folding_dict[k]['enzymes']: org_folding_dict[k]['enzymes'].append(homolog) def update_ribosome_subreactions_from_homology(self): ref_ribosome_subreactions = self.ref.ribosome_subreactions org_ribosome_subreactions = self.org.ribosome_subreactions ref_cplx_homolog = self.homology.ref_cplx_homolog warn_proteins = [] for k, v in tqdm.tqdm(ref_ribosome_subreactions.items(), 'Updating ribosome subreaction machinery from homology...', bar_format = bar_format, total=len(ref_ribosome_subreactions)): ref_cplx = v["enzyme"] if ref_cplx in ref_cplx_homolog: org_cplx = ref_cplx_homolog[v["enzyme"]] if k not in org_ribosome_subreactions: org_ribosome_subreactions[k] = {"enzyme":""} defined_cplx = org_ribosome_subreactions[k]["enzyme"] if defined_cplx: continue # if not defined_cplx or defined_cplx in org_cplx or 'CPLX_dummy' in defined_cplx: org_ribosome_subreactions[k]["enzyme"] = org_cplx # else: warn_proteins.append({ 'subreaction':k, 'defined_complex':defined_cplx, 'inferred_complex':org_cplx }) # Warnings if warn_proteins: self.org.curation_notes['update_ribosome_subreactions_from_homology'].append({ 'msg':'Some enzymes defined in me_builder.org.ribosome_subreactions are different from the ones inferred from homology', 'triggered_by':warn_proteins, 'importance':'medium', 'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.ribosome_subreactions. Curate the inputs in ribosome_subreactions.txt accordingly.'}) # def update_rrna_modifications_from_homology(self): # ref_rrna_modifications = self.ref.rrna_modifications # org_rrna_modifications = self.org.rrna_modifications # ref_cplx_homolog = self.homology.ref_cplx_homolog # warn_proteins = [] # for k, v in tqdm.tqdm(ref_rrna_modifications.items(), # 'Updating rRNA modifications from homology...', # bar_format = bar_format, # total=len(ref_rrna_modifications)): # ref_cplx = v["machine"] # if ref_cplx in ref_cplx_homolog: # org_cplx = ref_cplx_homolog[v["machine"]] # defined_cplx = org_rrna_modifications[k]["machine"] # if not defined_cplx or defined_cplx in org_cplx or 'CPLX_dummy' in defined_cplx: # org_rrna_modifications[k]["machine"] = org_cplx # else: # warn_proteins.append({ # 'subreaction':k, # 'defined_complex':defined_cplx, # 'inferred_complex':org_cplx # }) # # Warnings # if warn_proteins: # self.org.curation_notes['update_rrna_modifications_from_homology'].append({ # 'msg':'Some enzymes defined in me_builder.org.rrna_modifications are different from the ones inferred from homology', # 'triggered_by':warn_proteins, # 'importance':'medium', # 'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.rrna_modifications. Curate the inputs in rrna_modifications.txt accordingly.'}) def update_amino_acid_trna_synthetases_from_homology(self): ref_amino_acid_trna_synthetase = self.ref.amino_acid_trna_synthetase org_amino_acid_trna_synthetase = self.org.amino_acid_trna_synthetase manual_curation = self.org.manual_curation.amino_acid_trna_synthetase.data protein_mod = self.org.protein_mod ref_cplx_homolog = self.homology.ref_cplx_homolog warn_proteins = [] for k, v in tqdm.tqdm(ref_amino_acid_trna_synthetase.items(), 'Updating tRNA synthetases from homology...', bar_format = bar_format, total=len(ref_amino_acid_trna_synthetase)): ref_cplx = v if ref_cplx in ref_cplx_homolog: org_cplx = ref_cplx_homolog[v] # From homology defined_cplx = manual_curation[k] # From manual if defined_cplx: # If has been manually curated, do not override continue inferred_cplx = org_amino_acid_trna_synthetase[k] # From regex if inferred_cplx: # Catch whether the inferred complex has a modification mod = protein_mod[protein_mod["Core_enzyme"]==inferred_cplx].index if not mod.empty: inferred_cplx = mod[0] if inferred_cplx != org_cplx: warn_proteins.append({ 'amino_acid':k, 'inferred_ligase':inferred_cplx, 'homology_ligase':org_cplx }) org_cplx = inferred_cplx # Use inferred complex instead of homology # Update the complex org_amino_acid_trna_synthetase[k] = org_cplx # Warnings if warn_proteins: self.org.curation_notes['update_amino_acid_trna_synthetases_from_homology'].append({ 'msg':'Some enzymes identified in me_builder.org.amino_acid_trna_synthetase are different from the ones inferred from homology', 'triggered_by':warn_proteins, 'importance':'medium', 'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.amino_acid_trna_synthetase. Curate the inputs in amino_acid_trna_synthetase.txt accordingly.'}) def update_peptide_release_factors_from_homology(self): ref_peptide_release_factors = self.ref.peptide_release_factors org_peptide_release_factors = self.org.peptide_release_factors ref_cplx_homolog = self.homology.ref_cplx_homolog warn_proteins = [] for k, v in tqdm.tqdm(ref_peptide_release_factors.items(), 'Updating peptide release factors from homology...', bar_format = bar_format, total=len(ref_peptide_release_factors)): ref_cplx = v['enzyme'] if ref_cplx in ref_cplx_homolog: org_cplx = ref_cplx_homolog[ref_cplx] defined_cplx = org_peptide_release_factors[k]['enzyme'] if defined_cplx: continue # if not defined_cplx or defined_cplx in org_cplx or 'CPLX_dummy' in defined_cplx: org_peptide_release_factors[k]['enzyme'] = org_cplx # else: warn_proteins.append({ 'subreaction':k, 'defined_complex':defined_cplx, 'inferred_complex':org_cplx }) # Warnings if warn_proteins: self.org.curation_notes['update_peptide_release_factors_from_homology'].append({ 'msg':'Some enzymes defined in me_builder.org.peptide_release_factors are different from the ones inferred from homology', 'triggered_by':warn_proteins, 'importance':'medium', 'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.peptide_release_factors. Curate the inputs in peptide_release_factors.txt accordingly.'}) def update_initiation_subreactions_from_homology(self): ref_initiation_subreactions = self.ref.initiation_subreactions org_initiation_subreactions = self.org.initiation_subreactions ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_initiation_subreactions.items(), 'Updating translation initiation subreactions from homology...', bar_format = bar_format, total=len(ref_initiation_subreactions)): ref_cplxs = v["enzymes"] defined_cplxs = org_initiation_subreactions[k]["enzymes"] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if i in defined_cplxs: continue defined_cplxs.append(i) def update_elongation_subreactions_from_homology(self): ref_elongation_subreactions = self.ref.elongation_subreactions org_elongation_subreactions = self.org.elongation_subreactions ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_elongation_subreactions.items(), 'Updating translation elongation subreactions from homology...', bar_format = bar_format, total=len(ref_elongation_subreactions)): ref_cplxs = v["enzymes"] defined_cplxs = org_elongation_subreactions[k]["enzymes"] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if i in defined_cplxs: continue defined_cplxs.append(i) def update_termination_subreactions_from_homology(self): ref_termination_subreactions = self.ref.termination_subreactions org_termination_subreactions = self.org.termination_subreactions ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_termination_subreactions.items(), 'Updating translation termination subreactions from homology...', bar_format = bar_format, total=len(ref_termination_subreactions)): ref_cplxs = v["enzymes"] defined_cplxs = org_termination_subreactions[k]["enzymes"] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if i in defined_cplxs: continue defined_cplxs.append(i) def update_special_trna_subreactions_from_homology(self): ref_special_trna_subreactions = self.ref.special_trna_subreactions org_special_trna_subreactions = self.org.special_trna_subreactions ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_special_trna_subreactions.items(), 'Updating special tRNA subreactions from homology...', bar_format = bar_format, total=len(ref_special_trna_subreactions)): ref_cplxs = v["enzymes"] defined_cplxs = org_special_trna_subreactions[k]["enzymes"] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if i not in defined_cplxs: defined_cplxs.append(i) def update_rna_degradosome_from_homology(self): ref_rna_degradosome = self.ref.rna_degradosome org_rna_degradosome = self.org.rna_degradosome ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_rna_degradosome.items(), 'Updating RNA degradosome composition from homology...', bar_format = bar_format, total=len(ref_rna_degradosome)): ref_cplxs = v['enzymes'] defined_cplxs = org_rna_degradosome[k]['enzymes'] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if i in defined_cplxs: continue defined_cplxs.append(i) def update_excision_machinery_from_homology(self): ref_excision_machinery = self.ref.excision_machinery org_excision_machinery = self.org.excision_machinery ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_excision_machinery.items(), 'Updating excision machinery from homology...', bar_format = bar_format, total=len(ref_excision_machinery)): ref_cplxs = v['enzymes'] defined_cplxs = org_excision_machinery[k]['enzymes'] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if self._is_base_complex_in_list(i,defined_cplxs): continue defined_cplxs.append(i) def update_special_modifications_from_homology(self): ref_special_trna_subreactions = self.ref.special_modifications org_special_trna_subreactions = self.org.special_modifications ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_special_trna_subreactions.items(), 'Updating tRNA subreactions from homology...', bar_format = bar_format, total=len(ref_special_trna_subreactions)): ref_cplxs = v["enzymes"] defined_cplxs = org_special_trna_subreactions[k]["enzymes"] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: # WARNING: stoichiometry of special modifications is in subreactions.txt # if v["stoich"]: # org_special_trna_subreactions[k]["stoich"] = v["stoich"] if self._is_base_complex_in_list(i,defined_cplxs): continue defined_cplxs.append(i) def _is_base_complex_in_list(self,cplx,lst): return cplx in set(i.split('_mod_')[0] for i in lst) def update_rna_modification_from_homology(self): ref_rna_modification = self.ref.rna_modification_df org_rna_modification = self.org.rna_modification_df ref_cplx_homolog = self.homology.ref_cplx_homolog for mod,row in ref_rna_modification.iterrows(): enzymes = set(row['enzymes'].split('AND')) positions = row['positions'].split(',') if mod in org_rna_modification.index: df = org_rna_modification.loc[[mod]] mod_type = row['type'] if df['type'].str.contains(mod_type).any(): continue hits = enzymes.intersection(set(ref_cplx_homolog.keys())) if len(hits) == len(enzymes): row = row.copy() row['enzymes'] = ' AND '.join([ref_cplx_homolog[i] for i in enzymes]) row['positions'] = ','.join(positions) row['source'] = 'Homology' df = pandas.DataFrame(row).T org_rna_modification = pandas.concat([org_rna_modification,df]) org_rna_modification.index.name = 'modification' self.org.rna_modification_df = org_rna_modification # def update_rna_modification_from_homology(self): # ref_rna_modification = self.ref.rna_modification # org_rna_modification = self.org.rna_modification # ref_cplx_homolog = self.homology.ref_cplx_homolog # defined_mods = set() # for _,v in org_rna_modification.items(): # for i in v: # defined_mods.add(i) # for k, v in tqdm.tqdm(ref_rna_modification.items(), # 'Updating RNA modification machinery from homology...', # bar_format = bar_format, # total=len(ref_rna_modification)): # if k not in ref_cplx_homolog: continue # org_cplx = ref_cplx_homolog[k] # if self._is_base_complex_in_list(org_cplx,list(org_rna_modification.keys())): # continue # if org_cplx not in org_rna_modification: # org_rna_modification[org_cplx] = [] # org_rna_modification[org_cplx] += [i for i in v.copy() if i not in defined_mods] # org_rna_modification[org_cplx] = list(set(org_rna_modification[org_cplx])) # org_rna_modification = {k:v for k,v in org_rna_modification.items() if v} def update_lipid_modifications_from_homology(self): ref_lipid_modifications = self.ref.lipid_modifications org_lipid_modifications = self.org.lipid_modifications ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_lipid_modifications.items(), 'Updating lipid modification machinery from homology...', bar_format = bar_format, total=len(ref_lipid_modifications)): if k not in org_lipid_modifications: org_lipid_modifications[k] = [] for i in v: if i not in ref_cplx_homolog: continue org_cplx = ref_cplx_homolog[i] # if org_cplx not in org_lipid_modifications[k]: if self._is_base_complex_in_list(org_cplx,org_lipid_modifications[k]): continue org_lipid_modifications[k].append(org_cplx) def update_transcription_subreactions_from_homology(self): ref_transcription_subreactions = self.ref.transcription_subreactions org_transcription_subreactions = self.org.transcription_subreactions ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_transcription_subreactions.items(), 'Updating transcription subreactions machinery from homology...', bar_format = bar_format, total=len(ref_transcription_subreactions)): ref_cplxs = v["enzymes"] defined_cplxs = org_transcription_subreactions[k]["enzymes"] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if self._is_base_complex_in_list(i,defined_cplxs): continue defined_cplxs.append(i) def update_translocation_pathways_from_homology(self): ref_translocation_pathways = self.ref.translocation_pathways org_translocation_pathways = self.org.translocation_pathways ref_cplx_homolog = self.homology.ref_cplx_homolog for k, v in tqdm.tqdm(ref_translocation_pathways.items(), 'Updating translocation machinery from homology...', bar_format = bar_format, total=len(ref_translocation_pathways)): ref_cplxs = v["enzymes"] if k not in org_translocation_pathways: org_translocation_pathways[k] = {"enzymes":[]}#v.copy() defined_cplxs = org_translocation_pathways[k]["enzymes"] if defined_cplxs: continue org_cplxs = [ ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog ] for i in org_cplxs: if self._is_base_complex_in_list(i,defined_cplxs): continue defined_cplxs.append(i) def update_m_model(self): org_model = self.org.m_model ref_model = self.ref.m_model for m in tqdm.tqdm(org_model.metabolites, 'Fixing M-model metabolites with homology...', bar_format = bar_format): if m.id not in ref_model.metabolites: continue ref_m = ref_model.metabolites.get_by_id(m.id) if not m.formula: m.formula = ref_m.formula def update_subreaction_matrix(self): org_subreaction_matrix = self.org.subreaction_matrix if not org_subreaction_matrix.empty:return ref_subreaction_matrix = self.ref.subreaction_matrix org_model = self.org.m_model ref_model = self.ref.m_model ref_cplx_homolog = self.homology.ref_cplx_homolog warn_mets = [] warn_cplxs = [] for subrxn,row in tqdm.tqdm(ref_subreaction_matrix.iterrows(), 'Updating subreaction matrix with homology...', bar_format = bar_format, total=ref_subreaction_matrix.shape[0]): d = {} d[subrxn] = {'Metabolites':'','Stoichiometry':''} met = row['Metabolites'].split('_mod_')[0] if met in self.ref.complexes_df.index: if met in ref_cplx_homolog: if 'mod' in row['Metabolites']: mods = '_mod_' + '_mod_'.join(row['Metabolites'].split('_mod_')[1:]) else: mods = '' d[subrxn]['Metabolites'] = ref_cplx_homolog[met] + mods else: warn_cplxs = [] d[subrxn]['Metabolites'] = 'CPLX_dummy' d[subrxn]['Stoichiometry'] = row['Stoichiometry'] else: if not org_model.metabolites.has_id(met): warn_mets.append(met) d[subrxn]['Metabolites'] = met d[subrxn]['Stoichiometry'] = row['Stoichiometry'] org_subreaction_matrix = \ pandas.concat([org_subreaction_matrix, pandas.DataFrame.from_dict(d).T], axis = 0, join = 'outer') self.org.subreaction_matrix = org_subreaction_matrix self.org.subreaction_matrix.index.name = 'Reaction' # self.org.subreaction_matrix.to_csv(self.org.directory + 'subreaction_matrix.txt') if warn_mets: self.curation_notes['update_subreaction_matrix'].append({ 'msg':'Some metabolites in subreaction_matrix were added from reference but are not in M-model', 'triggered_by':warn_mets, 'importance':'high', 'to_do':'Map these metabolites or replace the subreaction'}) if warn_cplxs: self.curation_notes['update_subreaction_matrix'].append({ 'msg':'Some complexes in subreaction_matrix of reference could not be mapped', 'triggered_by':warn_cplxs, 'importance':'high', 'to_do':'Map these complexes or replace the subreaction'}) def update_from_homology(self): self.update_enzyme_stoichiometry() self.update_protein_modification() self.update_TU_df() self.update_translocation_pathways_from_homology() self.protein_location_from_homology() self.update_translocation_multipliers() self.update_lipoprotein_precursors() self.update_cleaved_methionine() self.update_me_mets() self.update_generics_from_homology() self.update_folding_dict_from_homology() self.update_ribosome_subreactions_from_homology() # self.update_rrna_modifications_from_homology() self.update_amino_acid_trna_synthetases_from_homology() self.update_peptide_release_factors_from_homology() self.update_transcription_subreactions_from_homology() self.update_initiation_subreactions_from_homology() self.update_elongation_subreactions_from_homology() self.update_termination_subreactions_from_homology() self.update_special_trna_subreactions_from_homology() self.update_rna_degradosome_from_homology() self.update_excision_machinery_from_homology() self.update_special_modifications_from_homology() self.update_rna_modification_from_homology() self.update_lipid_modifications_from_homology() self.update_m_model() self.update_subreaction_matrix() def fill(self, fill_with='CPLX_dummy'): coralme.builder.troubleshooting.fill_builder(self,fill_with='CPLX_dummy') def check(self): t_pathways = self.org.protein_location['translocase_pathway'].unique() file_t_paths = set() for t in t_pathways: for i in t: file_t_paths.add(i) defined_t_paths = set() for t in tqdm.tqdm(self.org.translocation_pathways.keys(), 'Checking defined translocation pathways...', bar_format = bar_format): defined_t_paths.add(coralme.builder.dictionaries.pathway_to_abbreviation[t]) missing_pathways = file_t_paths - defined_t_paths if missing_pathways: self.org.curation_notes['check'].append({ 'msg':'Some translocase pathways in org.protein_location are not defined in org.translocation_pathways.', 'triggered_by':list(missing_pathways), 'importance':'high', 'to_do':'Fill in translocation pathways in org.translocation_pathways or in translocation_pathways.txt' }) me_mets = self.org.me_mets warn_mets = list(me_mets[me_mets['type'] == 'CURATE'].index) # Warnings if warn_mets: self.org.curation_notes['check'].append({ 'msg':'Some metabolites in me_metabolites.txt need curation', 'triggered_by':warn_mets, 'importance':'medium', 'to_do':'Map or remove these metabolites in me_metabolites.txt'}) def load(self, directory): with open(directory, "rb") as f: tmp = pickle.load(f) return tmp def save(self, directory=False): if not directory: directory = self.org.directory + "builder.pickle" with open(directory, "wb") as f: pickle.dump(self, f) # def load_me(self,filename='me_model.pickle'): # with open(self.org.directory + '/'+filename, "rb") as f: # return pickle.load(f) def save_builder_info(self): include = [float,int,str,pandas.DataFrame,dict] floats = {} dataframes = {} for i in dir(self.org): if i[0] == '_': continue attr = getattr(self.org,i) for c in include: if isinstance(attr,c): if c == float: floats[i] = attr elif c == dict: dataframes[i] = pandas.DataFrame.from_dict({'value':attr}) elif c == pandas.DataFrame: dataframes[i] = attr break dataframes['parameters'] = pandas.DataFrame.from_dict({'value':floats}) directory = self.org.directory + 'builder_info/' if not os.path.exists(directory): os.mkdir(directory) for k,v in dataframes.items(): v.to_csv(directory + k + '.txt') # shortcuts to methods in the MECurator class def find_issue(self,query): coralme.builder.curation.MECurator(self.org).find_issue_with_query(query) # shortcuts to methods in the MEReconstruction and METroubleshooter classes def build_me_model(self, update = True, prune = True, overwrite = False, skip = None): coralme.builder.main.MEReconstruction(self).build_me_model(update = update, prune = prune, overwrite = overwrite, skip = skip) def troubleshoot(self, growth_key_and_value = None, skip = set(), guesses = set(), met_types = set(), platform = None, solver = 'qminos', savefile = None, gapfill_cofactors = False): """ growth_key_and_value: dictionary of Sympy.Symbol and value to replace skip: set of ME-components to not evaluate during gapfilling guesses: set of ME-components to try first before any other set of components solver: 'qminos', 'gurobi' or 'cplex' savefile: file path (absolute or relative) to save the ME-model as a pickle file """ coralme.builder.main.METroubleshooter(self).troubleshoot(growth_key_and_value, skip = skip, guesses = guesses, platform = platform, solver = solver, savefile = savefile,gapfill_cofactors=gapfill_cofactors) coralme.builder.notes.save_curation_notes( self.curation_notes, self.configuration['out_directory'] + '/curation_notes.json' ) coralme.builder.notes.publish_curation_notes( self.curation_notes, self.configuration['out_directory']+ '/curation_notes.txt' ) def input_data(self, gem, overwrite): tmp1, tmp2 = coralme.builder.main.MEReconstruction(self).input_data(gem, overwrite) self.df_tus, self.df_rmsc, self.df_subs, self.df_mets, self.df_keffs = tmp1 self.df_data, self.df_rxns, self.df_cplxs, self.df_ptms, self.df_enz2rxn, self.df_rna_mods, self.df_protloc, self.df_transpaths = tmp2 return tmp1, tmp2
[docs] class MEReconstruction(MEBuilder): """ MEReconstruction class for reconstructing a ME-model from user/automated input Parameters ---------- MEBuilder : coralme.builder.main.MEBuilder """ def __init__(self, builder): # only if builder.generate_files() was run before builder.build_me_model() if hasattr(builder, 'org'): self.org = builder.org if hasattr(builder, 'ref'): self.ref = builder.ref if hasattr(builder, 'homology'): self.homology = builder.homology if hasattr(builder, 'df_data'): self.df_data = builder.df_data self.df_tus = builder.df_tus self.df_rmsc = builder.df_rmsc self.df_subs = builder.df_subs self.df_mets = builder.df_mets self.df_keffs = builder.df_keffs self.df_rxns = builder.df_rxns self.df_cplxs = builder.df_cplxs self.df_ptms = builder.df_ptms self.df_enz2rxn = builder.df_enz2rxn self.df_rna_mods = builder.df_rna_mods self.df_protloc = builder.df_protloc self.df_transpaths = builder.df_transpaths if hasattr(builder, 'debug_data'): self.debug_data = builder.debug_data self.logger = builder.logger self.configuration = builder.configuration # reboot me_model if len(builder.me_model.reactions) == 1 and len(builder.me_model.metabolites): self.me_model = builder.me_model else: self.me_model = coralme.core.model.MEModel(name = self.configuration.get('ME-Model-ID', 'coralME'), mu = self.configuration.get('growth_key', 'mu')) self.curation_notes = builder.curation_notes def input_data(self, m_model, overwrite = False): if hasattr(self, 'df_data'): return (self.df_tus, self.df_rmsc, self.df_subs, self.df_mets, self.df_keffs), (self.df_data, self.df_rxns, self.df_cplxs, self.df_ptms, self.df_enz2rxn, self.df_rna_mods, self.df_protloc, self.df_transpaths) config = self.configuration # Inferred information if hasattr(self, 'org'): config['selenocysteine_enzymes'] = self.org.special_trna_subreactions['sec_addition_at_UGA']['enzymes'] logging.warning('The selenocysteine complex SelAB was set from homology data.') config['pg_pe_160'] = self.org.lipid_modifications.get('pg_pe_160', 'CPLX_dummy') logging.warning('The prolipoprotein diacylglyceryl transferase and the signal peptidase homologs were set from homology data.') config['other_lipids'] = self.org.lipid_modifications.get('other_lipids', 'CPLX_dummy') logging.warning('The apolipoprotein N-acyltransferase homolog was set from homology data.') lst = self.org.generic_dict.get('generic_fes_transfers_complex', {'enzymes' : ['CPLX_dummy']})['enzymes'] # ecolime = ['CPLX0-7617', 'CPLX0-7824', 'IscA_tetra'] # if len(lst) != 3: # lst = lst + ['CPLX_dummy'] * (3 - len(lst)) config['complex_cofactors']['fes_transfers'] = lst logging.warning('The iron-sulfur cluster insertion homologs were set from homology data.') self.org.get_reaction_keffs() # saves a file to <out_directory>/building_data/reaction_median_keffs.txt # config['df_reaction_keff_consts'] = config.get('out_directory', '.') + '/building_data/reaction_median_keffs.txt' # include rna_polymerases, lipids and lipoproteins from automated info and save new configuration file if config.get('rna_polymerases', None) is None or config.get('rna_polymerases') == {}: if hasattr(self, 'org'): config['default_sigma_factor'] = self.org.rpod config['rna_polymerases'] = self.org.rna_polymerase_id_by_sigma_factor logging.warning('The RNA Polymerases (core enzyme and sigma factors) information was set from homology data.') ## replace IDs #for name, rnap in config['rna_polymerases'].items(): #if hasattr(self, 'homology'): #for key, value in rnap.items(): #config['rna_polymerases'][name][key] = self.homology.org_cplx_homolog.get(value, value) #config['rna_polymerases'][name][key] = self.homology.org_cplx_homolog.get(value, value.replace('-MONOMER', '_MONOMER')) if config.get('lipid_modifications', None) is None or len(config.get('lipid_modifications')) == 0: if hasattr(self, 'org'): config['lipid_modifications'] = [ x for x in self.org.lipids if x.endswith('_p') and x.startswith('pg') and not x.startswith('pgp') ] logging.warning('The lipid modifications were set from M-model metabolites.') if config.get('lipoprotein_precursors', None) is None or len(config.get('lipoprotein_precursors')) == 0: if hasattr(self, 'org'): config['lipoprotein_precursors'] = self.org.lipoprotein_precursors logging.warning('The lipoprotein precursors were set from homology data.') if config.get('ngam', None) is None: if hasattr(self, 'org'): config['ngam'] = self.org.NGAM logging.warning('The ATPM value (ATP requirement for maintenance; also NGAM) was set from the M-model.') if config.get('gam', None) is None: if hasattr(self, 'org'): config['gam'] = self.org.GAM logging.warning('The GAM value (ATP requirement for growth) was set from the M-model or default value.') # modify options #config['create_files'] = False #config['run_bbh_blast'] = False #config['dev_reference'] = False if hasattr(self, 'org') and len(config.get('translocation_multipliers', {})) == 0: config['translocation_multipliers'] = { k:{ k:v for k,v in v.items() if v != 0 } for k,v in self.org.translocation_multipliers.items() } logging.warning('The translocation multipliers for yidC and tat homologs were set from homology data.') if hasattr(self, 'org') and len(config.get('amino_acid_trna_synthetase', {})) == 0: config['amino_acid_trna_synthetase'] = self.org.amino_acid_trna_synthetase logging.warning('The tRNA synthetases were set from homology data.') if hasattr(self, 'org') and len(config.get('defer_to_rxn_matrix', [])) == 0: config['defer_to_rxn_matrix'] = [self.org.biomass] if self.org.biomass is not None else [] logging.warning('The biomass reaction will be skipped during the ME reconstruction steps.') if hasattr(self, 'org') and len(config.get('peptide_release_factors', [])) == 0: config['peptide_release_factors'] = { k:v['enzyme'] for k,v in self.org.peptide_release_factors.items() } logging.warning('The peptide release factors were set from homology data.') if not 'FMETTRS' in config.get('defer_to_rxn_matrix', []): config['defer_to_rxn_matrix'].append('FMETTRS') logging.warning('The FMETTRS reaction from the M-model will be replaced by a SubReaction during the ME-model reconstruction steps.') if not 'ATPM' in config.get('defer_to_rxn_matrix', []): config['defer_to_rxn_matrix'].append('ATPM') logging.warning('The ATPM reaction from the M-model will be replaced by a SummaryVariable during the ME-model reconstruction steps.') if hasattr(self, 'org') and len(config.get('braun\'s_lipoproteins', [])) == 0: lst = [ k.split('_mod_')[0] for k,v in self.org.protein_mod.to_dict()['Modifications'].items() if 'palmitate' in v ] config['braun\'s_lipoproteins'] = lst if isinstance(lst, list) else [lst] if len(lst) != 0: logging.warning('The Braun\'s lipoprotein homologs list was set to \'{:s}\'.'.format(', '.join(lst))) def read(filecode, input_type, default, columns = []): filename = config.get(filecode, '') if pathlib.Path(filename).is_file(): file_to_read = filename else: #return pandas.DataFrame(columns = columns) if filecode == 'df_reaction_keff_consts': return pandas.DataFrame(columns = columns) file_to_read = '{:}/building_data/{:s}'.format(config.get('out_directory', '.'), default) df = coralme.builder.flat_files.read(file_to_read) if set(columns).issubset(set(df.columns)): return df else: #logging.warning('Column names in \'{:s}\' does not comply default values: {:s}.'.format(filename, ', '.join(columns))) raise Exception('Column names in \'{:s}\' does not comply default values: {:s}.'.format(filename, ', '.join(columns))) # User inputs # Transcriptional Units cols = ['TU_id', 'replicon', 'genes', 'start', 'stop', 'tss', 'strand', 'rho_dependent', 'rnapol'] df_tus = read('df_TranscriptionalUnits', 'transcriptional units data', 'TUs_from_biocyc.txt', cols) df_tus = df_tus.set_index('TU_id', inplace = False) # Reaction Matrix: reactions, metabolites, compartments, stoichiometric coefficients cols = ['Reaction', 'Metabolites', 'Stoichiometry'] df_rmsc = read('df_matrix_stoichiometry', 'reaction stoichiometry data', 'reaction_matrix.txt', cols) # SubReaction Matrix: subreactions, metabolites, compartments, stoichiometric coefficients cols = ['Reaction', 'Metabolites', 'Stoichiometry'] df_subs = read('df_matrix_subrxn_stoich', 'subreaction stoichiometry data', 'subreaction_matrix.txt', cols) # Orphan and Spontaneous reaction metadata cols = ['name', 'description', 'subsystems', 'is_reversible', 'is_spontaneous'] df_rxns = read('df_metadata_orphan_rxns', 'new reactions metadata', 'orphan_and_spont_reactions.txt', cols) df_rxns = df_rxns.set_index('name', inplace = False) # Metabolites metadata cols = ['id', 'me_id', 'name', 'formula', 'compartment', 'type'] df_mets = read('df_metadata_metabolites', 'new metabolites metadata', 'me_metabolites.txt', cols) df_mets = df_mets.set_index('id', inplace = False) # Effective turnover rates cols = ['reaction', 'direction', 'complex', 'mods', 'keff'] df_keffs = read('df_reaction_keff_consts', 'effective turnover rates', 'reaction_median_keffs.txt', cols) # set new options in the MEBuilder object self.configuration.update(config) # detect if the genbank file was modified using biocyc data gb = '{:s}/building_data/genome_modified.gb'.format(config.get('out_directory', '.')) config['genbank-path'] = gb if pathlib.Path(gb).exists() else config['genbank-path'] if overwrite: new = config.get('new_config_file', 'coralme-config.yaml') yaml = new if new.endswith('.yaml') else '{:s}.yaml'.format(new) with open('{:s}/{:s}'.format(config.get('out_directory', '.'), yaml), 'w') as outfile: anyconfig.dump(config, outfile) logging.warning('New configuration file \'{:s}\' was written with inferred options.'.format(yaml)) #with open('{:s}/{:s}'.format(config.get('out_directory', '.'), new), 'w') as outfile: #anyconfig.dump(config, outfile) #with open('{:s}/{:s}'.format(config.get('out_directory', '.'), new), 'w') as outfile: #anyconfig.dump(config, outfile) # Drop-in replacement of input files: # step1: reactions, complexes, modification of complexes, enzyme-to-reaction mapping # step2a: generics, dnap stoichiometry, ribosome stoichiometry, degradosome stoichiometry, tRNA ligases, RNA modifications # step2b: folding pathways (DnaK, GroEL), N-terminal Methionine Processing, translocation pathways config['df_gene_cplxs_mods_rxns'] = './automated-org-with-refs.xlsx' if config.get('df_gene_cplxs_mods_rxns', '') == '' else config['df_gene_cplxs_mods_rxns'] filename = config['df_gene_cplxs_mods_rxns'] if overwrite: try: pathlib.Path(filename).unlink(missing_ok = True) # python>=3.8 except: if pathlib.Path(filename).exists(): pathlib.Path(filename).unlink() # python==3.7 if pathlib.Path(filename).is_file() and filename.endswith('.xlsx'): df_data = pandas.read_excel(filename, dtype = str).dropna(how = 'all') elif pathlib.Path(filename).is_file() and filename.endswith('.txt'): df_data = pandas.read_csv(filename, sep = '\t', header = 0, dtype = str).dropna(how = 'all') else: # detect if the genbank file was modified using biocyc data gb = '{:s}/building_data/genome_modified.gb'.format(config.get('out_directory', '.')) gb = gb if pathlib.Path(gb).exists() else config['genbank-path'] coralme.core.extended_classes.ListHandler.print_and_log('Writting the Organism-Specific Matrix to {:s}...'.format(config['df_gene_cplxs_mods_rxns'])) # generate a minimal dataframe from the genbank and m-model files df_data = coralme.builder.preprocess_inputs.generate_organism_specific_matrix(gb, config.get('locus_tag', 'locus_tag'), model = m_model) # complete minimal dataframe with automated info from homology df_data = coralme.builder.preprocess_inputs.complete_organism_specific_matrix(self, df_data, model = m_model, output = filename) coralme.core.extended_classes.ListHandler.print_and_log('Organism-Specific Matrix saved to {:s} file.'.format(filename)) # All other inputs and remove unnecessary genes from df_data return (df_tus, df_rmsc, df_subs, df_mets, df_keffs), coralme.builder.preprocess_inputs.get_df_input_from_excel(df_data, df_rxns) def build_me_model(self, update = True, prune = True, overwrite = False, skip = None): """Performs the Build step of the reconstruction. This function will read the synchronized and complemented information in the OSM and build a ME-model. Parameters ---------- update : bool If True, runs the update method of all reactions after building. prune : bool If True, prunes unused reactions and metabolites after building. overwrite : bool If True, overwrites the OSM and other configuration files. """ config = self.configuration model = config.get('ME-Model-ID', 'coralME') out_directory = config.get('out_directory', '.') if not os.path.exists(out_directory): os.mkdir(out_directory) log_directory = config.get('log_directory', '.') if not os.path.exists(log_directory): os.mkdir(log_directory) # ## Part 1: Create a minimum solvable ME-model # set logger log = logging.getLogger() # root logger for hdlr in log.handlers[:]: # remove all old handlers log.removeHandler(hdlr) # Old code works in a separate script; but it works if we remove the old handler logging.basicConfig(filename = '{:s}/MEReconstruction-step1-{:s}.log'.format(log_directory, model), filemode = 'w', level = logging.WARNING, format = log_format) log.addHandler(self.logger['MEReconstruction-step1']) #log.addHandler(logging.StreamHandler(sys.stdout)) logging.captureWarnings(True) coralme.core.extended_classes.ListHandler.print_and_log("Initiating ME-model reconstruction...") # This will include the bare minimum representations of that still produce a working ME-model: # - Metabolic Reactions # - Transcription Reactions # - Translation Reactions # - Complex Formation Reactions # # ### 1) Create a MEModel object and populate its global info # This includes important parameters that are used to calculate coupling constraints as well as organism-specific information such as compartments, GC fraction, etc. me = self.me_model # update default options with user-defined values me.global_info.update(self.configuration) # Define the types of biomass components that will be synthesized in the model me.add_biomass_constraints_to_model(me.global_info['biomass_constraints']) # Define ME-model compartments me._compartments = me.global_info.get('compartments', {}) if 'mc' not in me._compartments.keys(): me._compartments['mc'] = 'ME-model Constraint' logging.warning('Pseudo-compartment \'mc\' (\'ME-model Constraint\') was added into the ME-model.') # Define M-model if hasattr(self, 'org'): me.gem = self.org.m_model else: if me.global_info['m-model-path'].endswith('.json'): me.gem = cobra.io.load_json_model(me.global_info['m-model-path']) else: me.gem = cobra.io.read_sbml_model(me.global_info['m-model-path']) # remove unused genes, reactions, and metabolites cobra.manipulation.delete.remove_genes(me.gem, [ x for x in me.gem.genes if len(x.reactions) == 0 ], remove_reactions = False) coralme.core.model.MEModel.prune_unused_reactions(me.gem) # reactions without metabolites # coralme.core.model.MEModel.prune_unused_metabolites(me.gem) # orphan metabolites # update default options with missing, automated-defined values me.global_info.update(self.configuration) # Read user inputs tmp1, tmp2 = coralme.builder.main.MEReconstruction.input_data(self, me.gem, overwrite) (df_tus, df_rmsc, df_subs, df_mets, df_keffs), (df_data, df_rxns, df_cplxs, df_ptms, df_enz2rxn, df_rna_mods, df_protloc, df_transpaths) = tmp1, tmp2 #TODO: Iterate tmp1 and tmp2 and warn empty dataframes me.internal_data = {} for key in ['df_tus', 'df_rmsc', 'df_subs', 'df_mets', 'df_keffs', 'df_data', 'df_rxns', 'df_cplxs', 'df_ptms', 'df_enz2rxn', 'df_rna_mods', 'df_protloc', 'df_transpaths']: exec('me.internal_data[\'{:s}\'] = {:s}'.format(key, key)) # Remove default ME-model SubReactions from global_info that are not mapped in the organism-specific matrix subrxns = set(df_data[df_data['ME-model SubReaction'].notnull()]['ME-model SubReaction']) # list of subreactions for key in ['peptide_processing_subreactions']: me.global_info[key] = list(set(me.global_info[key]).intersection(subrxns)) # dictionary mapping subreactions and stoichiometry for key in ['transcription_subreactions', 'translation_subreactions']: me.global_info[key] = { k:v for k,v in me.global_info[key].items() if k in subrxns } # ### 2) Load metabolites and build Metabolic reactions # # It creates a new M-model, then incorporates it into the ME-model using the *add_m_model_content* function. Reactions are added directly and metabolites are added as *StoichiometricData* (*me.stoichiometric_data*). # # Different metabolite types have different properties in a ME-model, so complexes are added to the model as a *ComplexData*, not as a *Metabolite*. Components in the M-model that are actually *Complexes* are compiled in the *cplx_lst* variable. # Modify M-model m_model = coralme.builder.flat_files.process_m_model( m_model = me.gem, rxns_data = df_rxns, # metadata of new reactions mets_data = df_mets, # metadata of new metabolites cplx_data = df_cplxs, # metadata of prot-prot/prot-rna complexes reaction_matrix = df_rmsc, # stoichiometric coefficients of new! reactions me_compartments = { v:k for k,v in me._compartments.items() }, repair = True, defer_to_rxn_matrix = me.global_info['defer_to_rxn_matrix']) me.processed_m_model = m_model # Some of the 'metabolites' in the M-model are actually complexes. # We pass those in so they get created as complexes, not metabolites. cplx_dct = coralme.builder.flat_files.get_complex_subunit_stoichiometry(df_cplxs) cplx_lst = [ i.id for i in m_model.metabolites if i.id.split('_mod_')[0] in cplx_dct.keys() ] # Complexes in `cplx_lst` are added as a coralme.core.component.Complex object # Other `metabolites` are added as a coralme.core.component.Metabolite object coralme.util.building.add_m_model_content(me, m_model, complex_metabolite_ids = set(cplx_lst)) # NEW! Add metabolic subreactions (e.g. 10-Formyltetrahydrofolate:L-methionyl-tRNA N-formyltransferase) rxn_to_cplx_dict = coralme.builder.flat_files.get_reaction_to_complex(m_model, df_enz2rxn) reaction_matrix_dict = coralme.builder.flat_files.process_reaction_matrix_dict( df_subs, cplx_data = df_cplxs, me_compartments = { v:k for k,v in me._compartments.items() }) for subreaction, stoichiometry in reaction_matrix_dict.items(): coralme.util.building.add_subreaction_data(me, subreaction, stoichiometry, rxn_to_cplx_dict[subreaction]) # ### 3) Add Transcription and Translation reactions # # To construct the bare minimum components of a transcription and translation reactions. # For example, transcription reactions at this point include nucleotides and the synthesized RNAs. # RNA and protein names are prefixed in the ME-model following then the locus tag lst = set(df_data['Gene Locus ID'].str.replace('^protein_', '', regex = True).str.replace('^RNA_', '', regex = True).tolist()) # detect if the genbank file was modified using biocyc data gb = '{:s}/building_data/genome_modified.gb'.format(out_directory) gb = gb if pathlib.Path(gb).exists() else config['genbank-path'] coralme.util.building.build_reactions_from_genbank( #me_model = me, gb_filename = me.global_info['genbank-path'], me_model = me, gb_filename = gb, tu_frame = df_tus, genes_to_add = lst, update = True, verbose = True, feature_types = me.global_info['feature_types'], trna_misacylation = me.global_info['trna_misacylation'], genome_mods = me.global_info['genome_mods'], knockouts = me.global_info['knockouts']) # ### 4) Add in ComplexFormation reactions without modifications (for now) # RNA components different from tRNAs and from 5S, 16S and 23S rRNAs rna_components = set(me.global_info['rna_components']) # in order: RNase_P_RNA, SRP_RNA, 6S RNA # cplx_dct is a dictionary {complex_name: {locus_tag/generic_name/subcomplex_name: count}} cplx_dct = coralme.builder.flat_files.get_complex_subunit_stoichiometry(df_cplxs, rna_components) # mods_dct is a dictionary {complex_name_with_mods: {core_enzyme: complex_name, modifications: {cofactor: stoichiometry}} mods_dct = coralme.builder.flat_files.get_complex_modifications( reaction_matrix = df_rmsc, protein_complexes = df_cplxs, complex_mods = df_ptms, compartments = { v:k for k,v in me._compartments.items() }) # Add complexes into the ME-model as coralme.core.processdata.ComplexData objects. # Modifications are added as SubReactions coralme.util.building.add_model_complexes(me, cplx_dct, mods_dct) # Remove modifications. They will be added back in later (See Building Step 2, subsection 3). for data in tqdm.tqdm(list(me.complex_data), 'Removing SubReactions from ComplexData...', bar_format = bar_format): data.subreactions = {} # Add ComplexFormation reactions for each of the ComplexData for data in tqdm.tqdm(list(me.complex_data), 'Adding ComplexFormation into the ME-model...', bar_format = bar_format): formation = data.formation if formation: formation.update() elif data.id != 'CPLX_dummy': data.create_complex_formation() logging.warning('Added ComplexFormation for \'{:s}\'.'.format(data.id)) else: pass # ### 5) Add *GenericData* and its reactions # Multiple entities can perform the same role. To prevent a combinatorial explosion, we create "generic" versions of the components, where any of those entities can fill in. # # This step was originally in the second part of the builder process, and was moved here to be able to use generics in enzymes associated to metabolic reactions. generics = coralme.builder.preprocess_inputs.get_generics(df_data) for generic, components in tqdm.tqdm(generics, 'Adding Generic(s) into the ME-model...', bar_format = bar_format): if 'generic_fes_transfers_complex' in generic: continue coralme.core.processdata.GenericData(generic, me, components).create_reactions() # ### 6) Add dummy reactions to model and the *unmodeled_protein_fraction* constraint # # This includes the Transcription, Translation, ComplexFormation, and Metabolic reactions for a dummy RNA/protein/complex. Sequence for *dummy RNA* is based on the prevalence of each codon found in the genbank file. coralme.util.building.add_dummy_reactions(me, me.global_info['transl_tables']['c'], update = True) # The 'dummy protein' is associated to orphan reactions. # This ensures that orphan reactions will not become favored to fulfil unmodeled protein fraction requirement. rxn = coralme.core.reaction.SummaryVariable('dummy_protein_to_mass') me.add_reactions([rxn]) mass = me.metabolites.protein_dummy.formula_weight / 1000. # in kDa met = coralme.core.component.Constraint('unmodeled_protein_biomass') rxn.add_metabolites({'protein_biomass': -mass, 'protein_dummy': -1, met: mass}) # WARNING: DO NOT REMOVE. This is needed if troubleshooter adds sink reactions for 4fe4s/2fe2s # Add CPLX_dummy_mod_2fe2s(1) and CPLX_dummy_mod_4fe4s(1) if fes_transfers is {'CPLX_dummy'} if 'CPLX_dummy' in me.global_info['complex_cofactors']['fes_transfers']: for fes in ['2fe2s', '4fe4s']: coralme.util.building.add_complex_to_model(me, 'CPLX_dummy_mod_{:s}(1)'.format(fes), { 'protein_dummy' : 1.0, fes + '_c': 1.0}) # WARNING: do not change escape characters me.complex_data.query(r'CPLX_dummy_mod_2fe2s\(1\)')[0].create_complex_formation() me.complex_data.query(r'CPLX_dummy_mod_4fe4s\(1\)')[0].create_complex_formation() # ### 7) Associate Complexes to Metabolic reactions and build the ME-model metabolic network # Associate a reaction id with the ME-model complex id (including modifications) rxn_to_cplx_dict = coralme.builder.flat_files.get_reaction_to_complex(m_model, df_enz2rxn) spontaneous_rxns = [me.global_info['dummy_rxn_id']] + list(df_rxns[df_rxns['is_spontaneous'].str.match('1|TRUE|True|true')].index.values) # Correct rxn_to_cplx_dict with spontaneous reactions for key in spontaneous_rxns: if rxn_to_cplx_dict.get(key, None) is None: rxn_to_cplx_dict[key] = {None} else: rxn_to_cplx_dict[key].add(None) coralme.util.building.add_reactions_from_stoichiometric_data( me, rxn_to_cplx_dict, is_spontaneous = spontaneous_rxns, update = True) # correct the enzyme list of a subreaction if it matches the component_list of a generic # WARNING: This allows the use of generics in subreactions # e.g.: acpP_activation is associated to EG12221-MONOMER OR HOLO-ACP-SYNTH-CPLX_mod_mg2(1) OR HOLO-ACP-SYNTH-CPLX_mod_mn2(1) # e.g.: EG12221-MONOMER OR HOLO-ACP-SYNTH-CPLX_mod_mg2(1) OR HOLO-ACP-SYNTH-CPLX_mod_mn2(1) is associated to generic_acp_synthase for data in list(me.subreaction_data): for generic in list(me.generic_data): if data.enzyme == set(generic.component_list): data.enzyme = set([generic.id]) # ### 8) Incorporate remaining biomass constituents # ### 1. General Demand Requirements # There are leftover components from the biomass equation that either: # 1. they have no mechanistic function in the model (e.g., *glycogen*) # 2. they are cofactors that are regenerated (e.g., *nad*) # # Applies demands and coefficients from the biomass objective function from the M-model. # set values from configuration as properties of the ME-model for key in [ 'gam', 'ngam', 'unmodeled_protein_fraction' ]: if key in me.global_info: setattr(me, key, me.global_info[key]) # if only one biomass reaction, default ID is 'biomass_constituent_demand' me.global_info['biomass_reactions'] = me.global_info.get('biomass_reactions', ['biomass_constituent_demand']) biomass_constituents = me.global_info.get('flux_of_biomass_constituents', {}) try: biomass_constituents = pandas.DataFrame(biomass_constituents) for biomass_reaction in me.global_info['biomass_reactions']: if not biomass_reaction in biomass_constituents.index: biomass_constituents.loc[biomass_reaction] = None # add missing conditions if 'All' in biomass_constituents.index: # 'All' contains a value that applies to all the missing conditions for column in biomass_constituents: # get conditions with NaN values empty_value_in_conditions = biomass_constituents[biomass_constituents[column].isna()].index for idx in empty_value_in_conditions: biomass_constituents.loc[idx, column] = biomass_constituents.loc['All', column] except: biomass_constituents = pandas.DataFrame(biomass_constituents, index = me.global_info['biomass_reactions']) biomass_constituents = biomass_constituents.fillna('') me.global_info['biomass_constituents'] = biomass_constituents for idx, row in biomass_constituents.iterrows(): # replace IDs. New metabolites x types are created during the processing of the m_model # old ID (M-model) : new ID (ME-model) dct = df_mets[df_mets['type'].str.contains('REPLACE')].to_dict()['me_id'] for key in list(row.keys()): if key in dct: row[dct[key]] = row.pop(key) logging.warning('Metabolite \'{:s}\' was replaced with \'{:s}\' in MetabolicReaction \'{:s}\'.'.format(key, dct[key], 'biomass_constituent_demand')) # remove metabolites not in the model or without molecular weight row = { k:v for k,v in row.items() if me.metabolites.has_id(k) and me.metabolites.get_by_id(k).formula_weight } problems = list(set(me.global_info.get('flux_of_biomass_constituents', {})).difference(row)) if problems: logging.warning('The following biomass constituents, biomass \'{:s}\', are not in the ME-model or have no formula: {:s}.'.format(idx, ', '.join(problems))) logging.warning('A second attempt to add biomass constituents will be perform after update of formulas.') name = 'biomass_constituent_demand' if idx == 'biomass_constituent_demand' else 'biomass_constituent_demand_' + idx rxn = coralme.core.reaction.SummaryVariable(name) me.add_reactions([rxn]) # An old bug in the YAML Resolver? YAML files should be written and read by the same version of anyconfig # https://stackoverflow.com/questions/30458977/yaml-loads-5e-6-as-string-and-not-a-number rxn.add_metabolites({ k:-(abs(float(v))) for k,v in row.items() if isinstance(me.metabolites.get_by_id(k), coralme.core.component.Metabolite) and isinstance(v, (float, int)) }) rxn.lower_bound = 0. # me.mu # coralme.util.mu rxn.upper_bound = 0. # me.mu # coralme.util.mu constituent_mass = sum([me.metabolites.get_by_id(c).formula_weight / 1000. * abs(float(v)) for c,v in row.items() if isinstance(v, (float, int)) ]) rxn.add_metabolites({me.metabolites.get_by_id('constituent_biomass'): constituent_mass}) # ### 2. Lipid Demand Requirements # Metabolites and coefficients from biomass objective function lipid_demand = me.global_info.get('flux_of_lipid_constituents', {}) #for key, value in me.global_info.get('flux_of_lipid_constituents', {}).items(): #lipid_demand[key] = abs(value) try: lipid_demand = pandas.DataFrame(lipid_demand) for biomass_reaction in me.global_info['biomass_reactions']: if not biomass_reaction in lipid_demand.index: lipid_demand.loc[biomass_reaction] = None # add missing conditions if 'All' in lipid_demand.index: # 'All' contains a value that applies to all the missing conditions for column in lipid_demand: # get conditions with NaN values empty_value_in_conditions = lipid_demand[lipid_demand[column].isna()].index for idx in empty_value_in_conditions: lipid_demand.loc[idx, column] = lipid_demand.loc['All', column] except: lipid_demand = pandas.DataFrame(lipid_demand, index = me.global_info['biomass_reactions']) lipid_demand = lipid_demand.fillna('') if not lipid_demand.empty: me.global_info['lipid_demand_per_condition'] = {} for met, requirement in lipid_demand.items(): for condition, value in requirement.items(): me.global_info['lipid_demand_per_condition'].setdefault(condition, []) try: component_mass = me.metabolites.get_by_id(met).formula_weight / 1000. me.global_info['lipid_demand_per_condition'][condition].append('DM_' + met + '_' + condition) rxn = coralme.core.reaction.SummaryVariable('DM_' + met + '_' + condition) me.add_reactions([rxn]) rxn.add_metabolites({met: -1 * abs(value), 'lipid_biomass': component_mass * abs(value)}) rxn.lower_bound = 0. # me.mu # coralme.util.mu rxn.upper_bound = 0. # me.mu # originally 1000. except: msg = 'Metabolite \'{:s}\' lacks a formula. Please correct it in the M-model or the \'metabolites.txt\' metadata file.' logging.warning(msg.format(met)) else: logging.warning('All metabolites in the \'flux_of_lipid_constituents\' configuration key lack their formulae.') # set active biomass reaction and lipid reactions if me.global_info.get('active_biomass_reaction', False): me.active_biomass_reaction = me.global_info['active_biomass_reaction'] else: me.active_biomass_reaction = 'biomass_constituent_demand' # ### 3. DNA Demand Requirements # Added based on growth rate dependent DNA levels as in [O'brien EJ et al 2013](https://www.ncbi.nlm.nih.gov/pubmed/24084808) (*E. coli* data) if me.global_info.get('flux_of_dna_constituents', False): # Fraction of each nucleotide in DNA, based on M-model's BOF dna_demand_stoich = { 'datp_c': -abs(me.global_info['flux_of_dna_constituents']['datp_c']), 'dctp_c': -abs(me.global_info['flux_of_dna_constituents']['dctp_c']), 'dgtp_c': -abs(me.global_info['flux_of_dna_constituents']['dgtp_c']), 'dttp_c': -abs(me.global_info['flux_of_dna_constituents']['dttp_c']), } dna_demand_stoich['ppi_c'] = abs(me.global_info['flux_of_dna_constituents'].get('ppi_c', sum([ v for k,v in dna_demand_stoich.items() if k != 'ppi_c']))) else: # Fraction of each nucleotide in DNA, based on gc_fraction dna_demand_stoich = { 'datp_c': -((1 - me.global_info['GC_fraction']) / 2), 'dctp_c': -(me.global_info['GC_fraction'] / 2), 'dgtp_c': -(me.global_info['GC_fraction'] / 2), 'dttp_c': -((1 - me.global_info['GC_fraction']) / 2), 'ppi_c': 1 } dna_replication = coralme.core.reaction.SummaryVariable('DNA_replication') me.add_reactions([dna_replication]) dna_replication.add_metabolites(dna_demand_stoich) dna_mw = 0 dna_mw_no_ppi = coralme.builder.dna_replication.get_dna_mw_no_ppi_dict(me) for met, value in me.reactions.get_by_id('DNA_replication').metabolites.items(): if met.id != 'ppi_c': dna_mw -= value * dna_mw_no_ppi[met.id.replace('_c', '')] / 1000. # in kDa dna_biomass = coralme.core.component.Constraint('DNA_biomass') dna_replication.add_metabolites({dna_biomass: dna_mw}) if me.global_info.get('flux_of_dna_constituents', False): dna_demand_bound = me.mu else: dna_demand_bound = coralme.builder.dna_replication.return_gr_dependent_dna_demand( me, me.global_info['GC_fraction'], me.global_info['percent_dna_data'], me.global_info['gr_data_doublings_per_hour']) dna_replication.lower_bound = dna_demand_bound dna_replication.upper_bound = dna_demand_bound # ### 9) Save ME-model as a pickle file with open('{:s}/MEModel-step1-{:s}.pkl'.format(out_directory, model), 'wb') as outfile: pickle.dump(me, outfile) coralme.core.extended_classes.ListHandler.print_and_log('ME-model was saved in the {:s} directory as MEModel-step1-{:s}.pkl'.format(out_directory, model)) # ## Part 2: Add metastructures to solving ME-model # set logger log = logging.getLogger() # root logger for hdlr in log.handlers[:]: # remove all old handlers log.removeHandler(hdlr) # Old code works in a separate script; but it works if we remove the old handler logging.basicConfig(filename = '{:s}/MEReconstruction-step2-{:s}.log'.format(log_directory, model), filemode = 'w', level = logging.WARNING, format = log_format) log.addHandler(self.logger['MEReconstruction-step2']) #log.addHandler(logging.StreamHandler(sys.stdout)) logging.captureWarnings(True) # ### 1) Add *GenericData* and its reactions # Multiple entities can perform the same role. To prevent a combinatorial explosion, we create "generic" versions of the components, where any of those entities can fill in. # # This step was moved to the first part of the builder process to be able to use generics in enzymes linked to metabolic reactions. # ### 2) Add DNA polymerase and the ribosome and its rRNAs modifications # ~~This uses the ribosome composition and subreaction definitions in **coralme/ribosome.py**~~ # WARNING: EXPERIMENTAL, not included in the original cobrame paper. # 1) Is the coupling coefficient dependent on the length of the DNA? # 2) Do we need to modify the DNA replication to have one reaction per replicon? # dnapol_id = me.global_info['dnapol_id'] # me.add_metabolites([cobrame.core.component.Complex(dnapol_id)]) # data = cobrame.core.processdata.ComplexData(dnapol_id, me) # data.stoichiometry.update(coralme.preprocess_inputs.dnapolymerase_stoichiometry(df_data)) # data.create_complex_formation(verbose = False) ribosome_stoich = coralme.builder.preprocess_inputs.ribosome_stoichiometry(df_data) ribosome_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Ribosome') df_rrna_mods = df_rna_mods[df_rna_mods['bnum'].isin(['16s_rRNAs', '23s_rRNAs'])] coralme.builder.ribosome.add_ribosome(me, ribosome_stoich, ribosome_subreactions, df_rrna_mods, verbose = True) # ### 3) Add charged tRNA reactions # The tRNA charging reactions were automatically added when loading the genome from the GenBank file. However, the charging reactions still need to be made aware of the tRNA synthetases which are responsible. Generic charged tRNAs are added to translation reactions via *SubreactionData* below. # tRNA synthetases per organelle if hasattr(self, 'org') and len(me.global_info.get('amino_acid_trna_synthetase', {})) == 0: aa_synthetase_dict = me.global_info['amino_acid_trna_synthetase'] elif len(me.global_info.get('amino_acid_trna_synthetase', {})) != 0: aa_synthetase_dict = me.global_info.get('amino_acid_trna_synthetase', {}) else: aa_synthetase_dict = coralme.builder.preprocess_inputs.aa_synthetase_dict(df_data) logging.warning('Association of tRNA synthetases and amino acids was inferred from GenBank annotation. It can be incomplete.') for data in tqdm.tqdm(list(me.tRNA_data), 'Adding tRNA synthetase(s) information into the ME-model...', bar_format = bar_format): data.synthetase = str(aa_synthetase_dict.get(data.amino_acid, 'CPLX_dummy')) # Correct 'translation_stop_dict' if PrfA and/or PrfB homologs were not identified PrfA_mono = me.global_info['peptide_release_factors'].get('UAG', 'PrfA_mono') # In Trans Table 4, UGA encodes Trp, not a stop codon. User might choose not to add 'UGA' in peptide_release_factors in organism.json PrfB_mono = me.global_info['peptide_release_factors'].get('UGA', 'PrfB_mono') generic_RF = me.global_info['peptide_release_factors'].get('UAA', 'generic_RF') if me.metabolites.has_id(PrfA_mono) and not me.metabolites.has_id(PrfB_mono): me.global_info['peptide_release_factors']['UGA'] = PrfA_mono # originally assigned to PrfB_mono me.global_info['peptide_release_factors']['UAA'] = PrfA_mono # originally assigned to generic_RF if not me.metabolites.has_id(PrfA_mono) and me.metabolites.has_id(PrfB_mono): me.global_info['peptide_release_factors']['UAG'] = PrfB_mono # originally assigned to PrfA_mono me.global_info['peptide_release_factors']['UAA'] = PrfB_mono # originally assigned to generic_RF if not me.metabolites.has_id(PrfA_mono) and not me.metabolites.has_id(PrfB_mono): me.global_info['peptide_release_factors']['UAG'] = 'CPLX_dummy' # originally assigned to PrfA_mono me.global_info['peptide_release_factors']['UGA'] = 'CPLX_dummy' # originally assigned to PrfB_mono if me.global_info['peptide_release_factors']['UAG'] == 'CPLX_dummy' and me.global_info['peptide_release_factors']['UGA'] == 'CPLX_dummy': me.global_info['peptide_release_factors']['UAA'] = 'CPLX_dummy' # charged tRNAs for organelle, transl_table in me.global_info['transl_tables'].items(): if len(transl_table) == 0: continue coralme.builder.translation.add_charged_trna_subreactions(me, organelle, transl_table, translation_stop_dict = me.global_info['peptide_release_factors'], selenocysteine_enzymes = me.global_info.get('selenocysteine_enzymes', [])) # ### 4) Add tRNA modifications into the ME-model and associate them with tRNA charging reactions # Add tRNA modifications to ME-model per type of organism if me.global_info['domain'].lower() in ['prokaryote', 'bacteria']: df_trna_mods = df_rna_mods[~df_rna_mods['bnum'].isin(['16s_rRNAs', '23s_rRNAs'])] elif me.global_info['domain'].lower() in ['eukarya', 'eukaryote']: df_trna_mods = df_rna_mods[~df_rna_mods['bnum'].isin(['18S_rRNAs', '25S_rRNAs', '28S_rRNAs'])] else: logging.warning('The \'domain\' property is not valid. A valid value is \'Prokaryote\', \'Bacteria\', \'Eukarya\', or \'Eukaryote\'.') coralme.builder.trna_charging.add_trna_modification_procedures(me, df_trna_mods) # Associate tRNA modifications to tRNAs trna_modifications = coralme.builder.flat_files.get_trna_modification_targets(df_trna_mods) for idx, trna in tqdm.tqdm(list(df_trna_mods.iterrows()), 'Associating tRNA modification enzyme(s) to tRNA(s)...', bar_format = bar_format): for data in me.process_data.query('tRNA_' + trna['bnum']): data.subreactions = trna_modifications[trna['bnum']] # ### 5) Add translation SubReactions into the ME-model and associate them with Translation reactions initiation_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_initiation') elongation_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_elongation') termination_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_termination') #termination_subreactions.update(coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_termination_generic_RF_mediated')) processing_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Protein_processing') for data in tqdm.tqdm(list(me.translation_data), 'Adding SubReactions into TranslationReactions...', bar_format = bar_format): data.add_initiation_subreactions(start_codons = me.global_info['start_codons'], start_subreactions = initiation_subreactions) data.add_elongation_subreactions(elongation_subreactions = elongation_subreactions) data.add_termination_subreactions(translation_terminator_dict = me.global_info['peptide_release_factors']) # ### 6) Add Transcription Metacomplexes: RNA Polymerase(s) rna_polymerases = me.global_info.get('rna_polymerases', {}) # Create polymerase "metabolites" for rnap, components in tqdm.tqdm(rna_polymerases.items(), 'Adding RNA Polymerase(s) into the ME-model...', bar_format = bar_format): if me.metabolites.has_id(components['sigma_factor']) and me.metabolites.has_id(components['polymerase']): rnap_obj = coralme.core.component.RNAP(rnap) me.add_metabolites(rnap_obj) logging.warning('The RNA Polymerase \'{:s}\' was created in the ME-model successfully.'.format(rnap)) else: if not me.metabolites.has_id(components['sigma_factor']): logging.warning('The complex ID \'{:s}\' from \'rna_polymerases\' in the configuration does not exist in the organism-specific matrix. Please check if it is the correct behaviour.'.format(components['sigma_factor'])) if not me.metabolites.has_id(components['polymerase']): logging.warning('The complex ID \'{:s}\' from \'rna_polymerases\' in the configuration does not exist in the organism-specific matrix. Please check if it is the correct behaviour.'.format(components['polymerase'])) # Add polymerase complexes in the model coralme.builder.transcription.add_rna_polymerase_complexes(me, rna_polymerases, verbose = False) # Associate the correct RNA_polymerase and factors to TUs sigma_to_rnap = { v['sigma_factor']:k for k,v in rna_polymerases.items() } for tu_id in tqdm.tqdm(df_tus.index, 'Associating a RNA Polymerase to each Transcriptional Unit...', bar_format = bar_format): if me.process_data.has_id(tu_id): transcription_data = me.process_data.get_by_id(tu_id) transcription_data.RNA_polymerase = sigma_to_rnap.get(df_tus['rnapol'][tu_id], None) else: logging.warning('Transcription Unit \'{:s}\' is missing from ProcessData, likely the associated gene(s) (\'{:s}\') were filtered out before the reconstruction. Check if it is the correct behavior.'.format(tu_id, df_tus['genes'][tu_id])) pass # WARNING: Without a TUs file, the 'most common' polymerase should be an empty string rnap_counter = collections.Counter([ x.RNA_polymerase for x in me.transcription_data ]) # override 'most_common' polymerase if it is empty user_default_sigma = me.global_info.get('default_sigma_factor', '') user_default_rnap = sigma_to_rnap.get(user_default_sigma, None) if user_default_rnap is None: msg = 'Assigning most common RNAP \'{:s}\' to missing polymerase in \'{:s}\'' most_common = max(rnap_counter, key = rnap_counter.get) else: msg = 'Assigning user default RNAP \'{:s}\' to missing polymerase in \'{:s}\'' most_common = user_default_rnap for transcription_data in me.transcription_data: if transcription_data.RNA_polymerase == '' and most_common != '': logging.warning(msg.format(most_common,transcription_data.id)) transcription_data.RNA_polymerase = most_common # ### 7) Add Transcription Metacomplexes: Degradosome (both for RNA degradation and RNA splicing) degradosome_id = me.global_info['degradosome_id'] me.add_metabolites([coralme.core.component.Complex(degradosome_id)]) data = coralme.core.processdata.ComplexData(degradosome_id, me) stoich = coralme.builder.preprocess_inputs.degradosome_stoichiometry(df_data) if stoich is not None: data.stoichiometry.update(stoich) else: data.stoichiometry.update({'CPLX_dummy' : -1}) data.create_complex_formation(verbose = False) # Used for RNA splicing data = coralme.core.processdata.SubreactionData('RNA_degradation_machine', me) data.enzyme = [me.global_info['degradosome_id']] # .25 water equivalent for ATP hydrolysis per nucleotide data = coralme.core.processdata.SubreactionData('RNA_degradation_atp_requirement', me) data.stoichiometry = { 'atp_c': -0.25, 'h2o_c': -0.25, 'adp_c': +0.25, 'h_c': +0.25, 'pi_c': +0.25 } for excision_type in me.global_info['excision_machinery']: stoichiometry = coralme.builder.preprocess_inputs.excision_machinery_stoichiometry(df_data, excision_type) if stoichiometry is not None: coralme.builder.transcription.add_rna_excision_machinery(me, excision_type, stoichiometry) else: coralme.builder.transcription.add_rna_excision_machinery(me, excision_type, {'CPLX_dummy' : +1}) # +1 is correct logging.warning('All the components of the excision complex for \'{:s}\' were not identified from homology and it was assigned to the \'CPLX_dummy\' complex.'.format(excision_type)) # add excision machineries into TranscriptionData # WARNING: subreactions is now a property of the TranscriptionData recalculated when accessed #coralme.builder.transcription.add_rna_splicing(me) # ## Part 3: Add remaining modifications (including iron clusters and lipoate) ## WARNING: This is calculated above (See Building Step 1, subsection 3) ## mods_dct is a dictionary {complex_name_with_mods: {core_enzyme: complex_name, modifications: {stoichiometry}} #mods_dct = coralme.builder.flat_files.get_complex_modifications( #reaction_matrix = df_rmsc, #protein_complexes = df_cplxs, #complex_mods = df_ptms, #compartments = { v:k for k,v in me._compartments.items() }) for complex_id, info in tqdm.tqdm(mods_dct.items(), 'Processing ComplexData in ME-model...', bar_format = bar_format): modifications = {} for mod, value in info['modifications'].items(): # stoichiometry of modification determined in subreaction_data.stoichiometry modifications['mod_' + mod] = abs(value) me.process_data.get_by_id(complex_id).subreactions = modifications # Check if complex modifications are set on any component in the organism-specific matrix # Dipyrromethane # dpm modification never from the free metabolite if me.process_data.has_id('mod_dpm_c'): if me.metabolites.has_id('dpm_c'): me.remove_metabolites([me.metabolites.dpm_c]) # WARNING: use a different ID for spontaneous modification vs enzymatic modification (?) # biotin from the free metabolite in malonate decarboxylase (EC 7.2.4.4); don't remove biotin from the model (EC 4.1.1.88 is biotin-independent) # biotin from the free metabolite in acetyl-CoA carboxylase, but using biotin---[acetyl-CoA-carboxylase] ligase if me.process_data.has_id('mod_btn_c'): coralme.builder.modifications.add_btn_modifications(me) # 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA in CitD catalyzed by CitX # 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA loaded from the free metabolite; don't remove it from the model if me.process_data.has_id('mod_2tpr3dpcoa_c'): coralme.builder.modifications.add_2tpr3dpcoa_modifications(me) # activation of glycyl radical enzymes # glycyl modification never from the free metabolite if me.process_data.has_id('mod_glycyl_c'): coralme.builder.modifications.add_glycyl_modifications(me) if me.metabolites.has_id('glycyl_c'): me.remove_metabolites([me.metabolites.glycyl_c]) # pap4p in AcpP catalyzed by AcpS # pan4p loaded from free CoA, producing pap as byproduct; don't remove it from the model if me.process_data.has_id('mod_pan4p_c'): coralme.builder.modifications.add_pan4p_modifications(me) # https://www.genome.jp/pathway/map00785 # lipoyl is a pseudo metabolite; it rather comes from free lipoate or from octanoate-ACP if me.process_data.has_id('mod_lipoyl_c'): coralme.builder.modifications.add_lipoyl_modifications(me) if me.metabolites.has_id('lipoyl_c'): me.remove_metabolites([me.metabolites.lipoyl_c]) if me.process_data.has_id('mod_3fe4s_c') or me.process_data.has_id('mod_4fe4s_c') or me.process_data.has_id('mod_2fe2s_c'): coralme.builder.modifications.add_iron_sulfur_modifications(me) if me.process_data.has_id('mod_FeFe_cofactor_c') or me.process_data.has_id('mod_NiFe_cofactor_c'): coralme.builder.modifications.add_FeFe_and_NiFe_modifications(me) if me.process_data.has_id('mod_bmocogdp_c'): coralme.builder.modifications.add_bmocogdp_chaperones(me) # add formation reactions for each of the ComplexData for data in tqdm.tqdm(list(me.complex_data), 'Adding ComplexFormation into the ME-model...', bar_format = bar_format): formation = data.formation if formation: formation.update() else: data.create_complex_formation() logging.warning('Added a ComplexFormation reaction for \'{:s}\'.'.format(data.id)) # ## Part 4: Add remaining subreactions # ### 1. Add translation related subreactions # get list of processed proteins from df_data processing_pathways = [ x.replace('Protein_processing_', '') for x in me.global_info['translation_subreactions'].keys() if x.startswith('Protein_processing') ] protein_processing = { k:df_data[~df_data[k].isnull() & df_data[k].str.match('1|TRUE|True|true')]['Gene Locus ID'].tolist() for k in processing_pathways } # Add the translation subreaction data objects to the ME-model coralme.builder.translation.add_subreactions_to_model( me, [initiation_subreactions, elongation_subreactions, termination_subreactions, processing_subreactions]) for data in tqdm.tqdm(list(me.translation_data), 'Adding SubReactions into TranslationReactions...', bar_format = bar_format): for process in protein_processing: if data.id in protein_processing[process]: data.subreactions['Protein_processing_' + process] = 1. # This block was run above, but it should be run again to incorporate any subreactions not added previously data.add_initiation_subreactions(start_codons = me.global_info['start_codons'], start_subreactions = initiation_subreactions) data.add_elongation_subreactions(elongation_subreactions = elongation_subreactions) data.add_termination_subreactions(translation_terminator_dict = me.global_info['peptide_release_factors']) # Add organism specific subreactions associated with peptide processing for subrxn in me.global_info['peptide_processing_subreactions']: data.subreactions[subrxn] = 1. # ### 2) Add transcription related subreactions transcription_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Transcription') coralme.builder.transcription.add_subreactions_to_model(me, [transcription_subreactions]) for transcription_data in tqdm.tqdm(list(me.transcription_data), 'Adding Transcription SubReactions...', bar_format = bar_format): # Assume false if not in tu_df is_rho_dependent = df_tus.rho_dependent.get(transcription_data.id, 'False') rho_dependency = 'dependent' if is_rho_dependent in ['1', 'TRUE', 'True', 'true'] else 'independent' stable = 'stable' if transcription_data.codes_stable_rna else 'normal' if 'Transcription_{:s}_rho_{:s}'.format(stable, rho_dependency) in me.global_info['transcription_subreactions']: transcription_data._subreactions['Transcription_{:s}_rho_{:s}'.format(stable, rho_dependency)] = 1. else: logging.warning('The SubReaction \'Transcription_{:s}_rho_{:s}\' is not defined in the organism-specific matrix.'.format(stable, rho_dependency)) # ## Part 5: Add in Translocation reactions v1 = { 'fixed_keff' : False, 'length_dependent' : True } # default v2 = { 'fixed_keff' : True, 'length_dependent' : False } # only for FtsY in the SRP pathway v3 = { 'fixed_keff' : False, 'length_dependent' : False } # for all the enzymes from the tat, tat_alt, lol and bam pathways for key, value in me.global_info['translocation_pathway'].items(): if 'translocation_pathway_' + key in df_transpaths.index: me.global_info['translocation_pathway'][key]['enzymes'] = { k:(v2 if k == value.get('FtsY', None) else v1 if (key.lower() not in ['tat', 'tat_alt', 'lol', 'bam']) else v3) \ for k in df_transpaths.loc['translocation_pathway_' + key].tolist()[0] } # TO ADD PATHWAYS WITHOUT HOMOLOGS # Check if the user wants to add dummies to the translocation pathways elif bool(me.global_info.get('add_translocases', False)) and value.get('enzymes', None) is None: me.global_info['translocation_pathway'][key]['enzymes'] = { 'CPLX_dummy':(v2 if value.get('FtsY', None) else v1 if (key.lower() not in ['tat', 'tat_alt', 'lol', 'bam']) else v3) } logging.warning('The component \'CPLX_dummy\' was associated to translocation pathways without defined homologs.') dct = { k:v['abbrev'] for k,v in me.global_info['translocation_pathway'].items() } dct = dict([(v, [k + '_translocation' for k,v1 in dct.items() if v1 == v]) for v in set(dct.values())]) dct = { k:(v[0] if len(v) == 1 else v) for k,v in dct.items() } # tat pathways should be a list, but not the others # Check if the user added the reactions for translocation data if not me.process_data.has_id('atp_hydrolysis_sec_pathway'): stoichiometry = {'atp_c': -0.04, 'h2o_c': -0.04, 'adp_c': +0.04, 'h_c': +0.04, 'pi_c' : +0.04} coralme.util.building.add_subreaction_data( me, modification_id = 'atp_hydrolysis_sec_pathway', modification_stoichiometry = stoichiometry, modification_enzyme = 'CPLX_dummy') if not me.process_data.has_id('atp_hydrolysis_secA'): stoichiometry = {'atp_c': -1/75, 'h2o_c': -1/75, 'adp_c': +1/75, 'h_c': +1/75, 'pi_c': +1/75} coralme.util.building.add_subreaction_data( me, modification_id = 'atp_hydrolysis_secA', modification_stoichiometry = stoichiometry, modification_enzyme = 'CPLX_dummy') if not me.process_data.has_id('gtp_hydrolysis_srp_pathway'): stoichiometry = {'gtp_c': -2.0, 'h2o_c': -2.0, 'gdp_c': +2.0, 'h_c': +2.0, 'pi_c': +2.0} coralme.util.building.add_subreaction_data( me, modification_id = 'gtp_hydrolysis_srp_pathway', modification_stoichiometry = stoichiometry, modification_enzyme = 'CPLX_dummy') # for pathway, info in coralme.translocation.pathway.items(): for pathway, info in me.global_info['translocation_pathway'].items(): if me.global_info['translocation_pathway'][pathway].get('enzymes', None) is not None: transloc_data = coralme.core.processdata.TranslocationData(pathway + '_translocation', me) transloc_data.enzyme_dict = info['enzymes'] transloc_data.keff = info['keff'] transloc_data.length_dependent_energy = info['length_dependent_energy'] if me.process_data.has_id(info['stoichiometry']): transloc_data.stoichiometry = me.process_data.get_by_id(info['stoichiometry']).stoichiometry else: transloc_data.stoichiometry = {} # Associate data and add translocation reactions multipliers = collections.defaultdict(dict) for enzyme, value in me.global_info.get('translocation_multipliers', {}).items(): for bnum in value.keys(): multipliers['protein_' + bnum][enzyme] = value[bnum] coralme.builder.translocation.add_translocation_pathways(me, pathways_df = df_protloc, abbreviation_to_pathway = dct, multipliers = multipliers, membrane_constraints = False) # Update stoichiometry of membrane complexes new_stoich = collections.defaultdict(dict) for idx, row in tqdm.tqdm(list(df_protloc.iterrows()), 'Processing StoichiometricData in SubReactionData...', bar_format = bar_format): protID = row['Protein'] protIDLoc = protID + '_' + row['Protein_compartment'] # to use dict.update() later; It replaces removing the key using the dict.pop method new_stoich[idx]['protein_' + protID] = 0 new_stoich[idx]['protein_' + protIDLoc] = float(me.process_data.get_by_id(idx).stoichiometry['protein_' + protID]) for cplx, stoich in new_stoich.items(): complex_data = me.process_data.get_by_id(cplx) complex_data.stoichiometry.update(new_stoich[cplx]) # remove zeroes from complex_data.stoichiometry complex_data.stoichiometry = { k:v for k,v in complex_data.stoichiometry.items() if v != 0 } complex_data.formation.update() # Complex IDs in protein compartment file don't include modifications # Some have multiple alternative modifications so must loop through these for complex_data in me.process_data.query('^{:s}_mod_'.format(cplx)): # WARNING: FeFe and NiFe cofactors reform the formation reactions as follow: # requires a formation -> base_complex + FeFe/NiFe => base_complex_mod_FeFe/_mod_NiFe <- should not have a formation reaction # base_complex_mod_FeFe/_mod_NiFe + other cofactors => final modified complex lst = [ type(me.metabolites.get_by_id(x)) for x in complex_data.stoichiometry.keys() ] if coralme.core.component.Complex in lst: continue complex_data.stoichiometry.update(new_stoich[cplx]) # remove zeroes from complex_data.stoichiometry complex_data.stoichiometry = { k:v for k,v in complex_data.stoichiometry.items() if v != 0 } complex_data.formation.update() # ## Part 6: Add Cell Wall Components # ### 1. Add lipid modification SubreactionData compartment_dict = {} for idx, compartment in df_protloc.set_index('Protein').Protein_compartment.to_dict().items(): compartment_dict[idx] = compartment lipid_modifications = me.global_info.get('lipid_modifications') lipoprotein_precursors = me.global_info.get('lipoprotein_precursors', []) # Step1: assign enzymes to lipid modifications for data in me.process_data.query('^mod_1st'): data.enzyme = me.global_info.get('pg_pe_160', 'CPLX_dummy') for data in me.process_data.query('^mod_2nd'): data.enzyme = me.global_info.get('other_lipids', 'CPLX_dummy') # Step2: add reactions of lipoprotein formation if bool(config.get('add_lipoproteins', False)) and lipoprotein_precursors: coralme.builder.translocation.add_lipoprotein_formation( me, compartment_dict, lipoprotein_precursors, lipid_modifications, membrane_constraints = False, update = True) # ### 2. Correct complex formation IDs if they contain lipoproteins #for gene in tqdm.tqdm(coralme.builder.translocation.lipoprotein_precursors.values()): if bool(config.get('add_lipoproteins', False)) and lipoprotein_precursors: for gene in tqdm.tqdm(lipoprotein_precursors.values(), 'Adding lipid precursors and lipoproteins...', bar_format = bar_format): compartment = compartment_dict.get(gene) if compartment is None: pass else: for rxn in me.metabolites.get_by_id('protein_' + gene + '_' + compartment).reactions: if isinstance(rxn, coralme.core.reaction.ComplexFormation): data = me.process_data.get_by_id(rxn.complex_data_id) value = data.stoichiometry.pop('protein_' + gene + '_' + compartment) data.stoichiometry['protein_' + gene + '_lipoprotein' + '_' + compartment] = value rxn.update() # ### 3. Braun's lipoprotein demand # Metabolites and coefficients as defined in [Liu et al 2014](http://bmcsystbiol.biomedcentral.com/articles/10.1186/s12918-014-0110-6) brauns_lipid_mod = me.global_info['braun\'s_lipid_mod'] if len(me.global_info['braun\'s_lipoproteins']) >= 1: for brauns_lipoprotein in me.global_info['braun\'s_lipoproteins']: # Perform checks before attempt to incorporate the Braun's lipoproteins if not me.metabolites.has_id(brauns_lipid_mod): logging.warning('The metabolite \'{:s}\' is not present in the ME-model and the Braun\'s lipoprotein demand cannot be set. Please check if it is the correct behavior. See http://bigg.ucsd.edu/universal/metabolites/murein5px4p for more information.'.format(brauns_lipid_mod)) continue if not me.reactions.has_id('formation_{:s}'.format(brauns_lipoprotein)): logging.warning('The \'add_lipoproteins\' option is \'False\' or coralme failed to add the \'{:s}\' ComplexFormation reaction and the Braun\'s lipoprotein demand cannot be set. Please check if it is the correct behavior.'.format(brauns_lipoprotein)) continue rxn = coralme.core.reaction.SummaryVariable('core_structural_demand_brauns_{:s}'.format(brauns_lipoprotein)) murein5px4p = me.metabolites.get_by_id(brauns_lipid_mod) murein5px4p_mass = murein5px4p.formula_weight / 1000. # ecolime: 1.0 protein_b1677_lipoprotein_Outer_Membrane --> 1.0 EG10544-MONOMER (brauns_lipoprotein ID) lipoprotein = me.metabolites.get_by_id(brauns_lipoprotein) me.add_reactions([rxn]) # biomass of lipoprotein accounted for in translation and lipid modification rxn.add_metabolites({ murein5px4p : -abs(me.global_info['braun\'s_murein_flux']), lipoprotein : -abs(me.global_info['braun\'s_lpp_flux']), me.metabolites.peptidoglycan_biomass : abs(me.global_info['braun\'s_murein_flux']) * murein5px4p_mass }, combine = False) rxn.lower_bound = me.mu # coralme.util.mu rxn.upper_bound = me.mu # coralme.util.mu else: logging.warning('No Braun\'s lipoprotein (lpp gene) homolog was set. Please check if it is the correct behavior.') # WARNING: Part 7 was originally "set keffs", however, formulas of complexes are corrected later and sasa can be underestimated # ## Part 7: Model updates and corrections # ### 1. Subsystems # Add reaction subsystems from M-model to ME-model for rxn in tqdm.tqdm(me.processed_m_model.reactions, 'Adding reaction subsystems from M-model into the ME-model...', bar_format = bar_format): if rxn.id in me.process_data: data = me.process_data.get_by_id(rxn.id) else: continue for parent_rxn in data.parent_reactions: parent_rxn.subsystem = rxn.subsystem # ### 2. Add enzymatic coupling for "carriers" # These are enzyme complexes that act as metabolites in a metabolic reaction. for data in tqdm.tqdm(list(me.stoichiometric_data), 'Processing StoichiometricData in ME-model...', bar_format = bar_format): if data.id == 'dummy_reaction': continue for met, value in data.stoichiometry.items(): # This will add any complex to the list of enzymes if not isinstance(me.metabolites.get_by_id(met), coralme.core.component.Complex): continue subreaction_id = met + '_carrier_activity' if subreaction_id not in me.process_data: sub = coralme.core.processdata.SubreactionData(subreaction_id, me) sub.enzyme = [met] data.subreactions[subreaction_id] = abs(value) # ### 3. Update ME-model # trick to obtain shadow prices and reduced costs from the optimizer me.reactions.dummy_reaction_FWD_SPONT.objective_coefficient = 1. if update: me.update() # not parallelized # ### 4. Add remaining formulas and compartments to the ME-model for r in tqdm.tqdm(me.reactions.query('^formation_'), 'Updating all FormationReactions...', bar_format = bar_format): r.update() modification_formulas = df_mets[df_mets['type'].str.match('COFACTOR|MOD|MODIFICATION')] modification_formulas = dict(zip(modification_formulas['me_id'], modification_formulas['formula'])) me.global_info['modification_formulas'] = modification_formulas # Correct formula of complexes based on their base complex # This will add the formula to complexes not formed from a complex formation reaction (e.g. CPLX + na2_c -> CPLX_mod_na2(1)) #coralme.builder.formulas.add_remaining_complex_formulas(me, modification_formulas) for met in [ x for x in me.metabolites if '_mod_' in x.id and isinstance(x, coralme.core.component.Complex)]: met.formula = None met.elements = {} base_complex = met.id.split('_mod_')[0] base_complex_elements = collections.Counter(me.metabolites.get_by_id(base_complex).elements) for mod in met.id.split('_mod_')[1:]: #for num in range(int(mod.rstrip(')').split('(')[1])): mod_elements = None mod_name = mod.split('(')[0] if mod_name in modification_formulas: mod_elements = coralme.builder.helper_functions.parse_composition(modification_formulas[mod_name]) # 2fe2s_c and 4fe4s_c appear as free metabolites in reactions and need to have formula for correct mass balance determination if me.metabolites.has_id(mod_name + '_c') and me.metabolites.get_by_id(mod_name + '_c').formula is None: me.metabolites.get_by_id(mod_name + '_c').formula = modification_formulas[mod_name] logging.warning('Formula for \'{:s}\' updated from me_mets.txt file.'.format(mod_name + '_c')) logging.warning('Elemental contribution for \'{:s}\' calculated from me_mets.txt file.'.format(mod_name)) elif me.metabolites.has_id(mod_name + '_c') and me.metabolites.get_by_id(mod_name + '_c').formula is not None: mod_elements = me.metabolites.get_by_id(mod_name + '_c').elements logging.warning('Elemental contribution for \'{:s}\' calculated from metabolite formula.'.format(mod_name)) # WARNING: electron carriers can, assuming they are neutral, transfer also protons # WARNING: Ferredoxins only transfer electrons; thioredoxins and others transfer protons and electrons. elif mod.startswith('Oxidized'): if base_complex in me.global_info['electron_transfers'].get('ferredoxins', []): mod_elements = {'H': 0} logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex)) elif base_complex in me.global_info['electron_transfers'].get('cytochromes', []): mod_elements = {'H': 0} logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex)) elif 'Oxidized(1)' == mod and base_complex not in me.global_info['electron_transfers'].get('flavodoxins', ['FLAVODOXIN']): mod_elements = {'H': -2} logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex)) elif 'Oxidized(2)' == mod and base_complex not in me.global_info['electron_transfers'].get('flavodoxins', ['FLAVODOXIN']): mod_elements = {'H': -4} logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex)) # TODO: is the fmn cofactor in flavodoxin neutral? # WARNING: flavodoxin homologs might have a different base_complex ID compared to the ecolime model elif 'Oxidized(1)' == mod and base_complex in me.global_info['electron_transfers'].get('flavodoxins', ['FLAVODOXIN']): mod_elements = {'H': 0} logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex)) else: logging.warning('Elemental contribution in \'{:s}\' could not be determined. Please check configuration file and add the base complex into the \'electron_transfers\' key.'.format(base_complex)) # WARNING: Negative elemental contributions cannot be set in the metabolites.txt input file elif 'glycyl(1)' == mod: mod_elements = {'H': -1} logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(mod_name)) elif 'cosh(1)' == mod: mod_elements = {'H': +1, 'O': -1, 'S': +1} logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(mod_name)) else: logging.warning('Elemental contribution for \'{:s}\' could not be determined. Please check me_mets.txt file.'.format(mod_name)) if mod_elements: mod_elements = collections.Counter(mod_elements) mod_elements = { k:v * int(re.findall(r'\((\d+)\)', mod)[0]) for k,v in mod_elements.items() } base_complex_elements.update(mod_elements) else: logging.warning('Attempt to calculate a corrected formula for \'{:s}\' failed. Please check if it is the correct behaviour, or if the modification \'{:s}_c\' exists as a metabolite in the ME-model or a formula is included in the me_mets.txt file.'.format(met.id, mod_name)) complex_elements = { k:base_complex_elements[k] for k in sorted(base_complex_elements) if base_complex_elements[k] != 0 } met.formula = ''.join([ '{:s}{:d}'.format(k, v) for k,v in complex_elements.items() ]) met.elements = coralme.builder.helper_functions.parse_composition(met.formula) logging.warning('Setting new formula for \'{:s}\' to \'{:s}\' successfully.'.format(met.id, met.formula)) # Update a second time to incorporate all of the metabolite formulas correctly for data in tqdm.tqdm(me.subreaction_data.query(r'(?!^\w\w\w_addition_at_\w\w\w$)'), 'Recalculation of the elemental contribution in SubReactions...', bar_format = bar_format): data._element_contribution = data.calculate_element_contribution() # Update reactions affected by formula update for r in tqdm.tqdm(me.reactions.query('^formation_'), 'Updating all FormationReactions...', bar_format = bar_format): r.update() for r in tqdm.tqdm(me.reactions.query('_mod_lipoyl'), 'Updating FormationReactions involving a lipoyl prosthetic group...', bar_format = bar_format): r.update() for r in tqdm.tqdm(me.reactions.query('_mod_glycyl'), 'Updating FormationReactions involving a glycyl radical...', bar_format = bar_format): r.update() # Update biomass_constituent_demand reaction(s) with components that previously has no formula for idx, row in biomass_constituents.iterrows(): constituent_mass = sum(me.metabolites.get_by_id(c).formula_weight / 1000. * abs(v) for c,v in row.items() if isinstance(v, (float, int))) name = 'biomass_constituent_demand' if idx == 'biomass_constituent_demand' else 'biomass_constituent_demand_' + idx rxn = me.reactions.get_by_id(name) rxn.add_metabolites({ k:-(abs(v)) for k,v in row.items() if isinstance(v, (float, int)) }, combine = False) rxn.add_metabolites({me.metabolites.get_by_id('constituent_biomass'): constituent_mass}, combine = False) # ## Part 8: Set keffs # Step 1. Determine SASA and median SASA if bool(config.get('estimate_keffs', True)): sasa_dct = { x.id:( x.formula_weight ** (3. / 4.) if x.formula_weight else 0, x.id if not x.formula_weight else False ) for x in me.metabolites if type(x) == coralme.core.component.Complex } for met in [ v[1] for k,v in sasa_dct.items() ]: if met: logging.warning('The complex \'{:s}\' has no valid formula to determine its molecular weight.'.format(met)) logging.warning('Please, set a value in the keff input file for reactions associated to the \'{:s}\' complex.'.format(met)) median_sasa = numpy.median([ v[0] for k,v in sasa_dct.items() if v[0] != 0 ]) me.global_info['median_sasa'] = median_sasa me.global_info['sasa_estimation'] = { k:v[0] for k,v in sasa_dct.items() if v[0] != 0 } me.global_info['sasa_estimation_missing'] = [ v[1] for k,v in sasa_dct.items() if v[0] == 0 ] # Step 2: Estimate keff for all the reactions in the model #if "complex" not in df_keffs.columns: #df_keffs.empty: # The if True avoids the estimation if the user uses an "incomplete" input reaction_ids = [ rxn for rxn in me.reactions if isinstance(rxn, coralme.core.reaction.MetabolicReaction) if rxn.id not in [ 'dummy_reaction_FWD_SPONT', 'dummy_reaction_REV_SPONT' ] if rxn._complex_data is not None ] #if 'complex' in df_keffs.columns: # user provided a file with keffs # This is always True because w/o a user file, df_keffs is empty with open('{:s}/building_data/reaction_median_keffs.txt'.format(me.global_info['out_directory']), 'r') as infile: reaction_median_keffs = pandas.read_csv(infile, sep = '\t').set_index('reaction') mapped_keffs = {} for rxn in tqdm.tqdm(reaction_ids, 'Estimating effective turnover rates for reactions using the SASA method...', bar_format = bar_format): logging.warning('Estimating effective turnover rates for reaction \'{:s}\''.format(rxn.id)) base_id = rxn._stoichiometric_data.id cplx_id = me.metabolites.get_by_id(rxn._complex_data.id).id if base_id not in reaction_median_keffs.index: continue median_keff = reaction_median_keffs.T[base_id]['keff'] sasa = sasa_dct[cplx_id][0] keff = sasa * median_keff / median_sasa mapped_keffs[rxn] = 3000. if keff > 3000. else 0.01 if keff < 0.01 else keff # WARNING: keff won't be deprecated by coupling_coefficient_enzyme or coupling_coefficient_subreaction properties # WARNING: keff maps back to the value used to determine coupling_coefficient_enzyme or coupling_coefficient_subreaction # dictionary of reaction IDs : coralme.core.reaction objects rxns_to_map = { x.id:x for x in me.subreaction_data + me.reactions if hasattr(x, 'keff') } # Step 3: Replace user values if they match for idx, row in tqdm.tqdm(list(df_keffs.iterrows()), 'Mapping effective turnover rates from user input...', bar_format = bar_format): if row['direction'] == '' and row['complex'] == '' and row['mods'] == '': # subreactions have ID = reaction_name idx = row['reaction'] else: # metabolic reactions have ID = reaction_name + direction + complex idx = '{:s}_{:s}_{:s}'.format(row['reaction'], row['direction'], row['complex']) if row['mods'] != '': idx = '{:s}_mod_{:s}'.format(idx, '_mod_'.join(row['mods'].split(' AND '))) if idx in rxns_to_map.keys(): mapped_keffs[rxns_to_map[idx]] = 3000. if float(row['keff']) > 3000. else 0.01 if float(row['keff']) < 0.01 else row['keff'] logging.warning('Mapping of the effective turnover rate for \'{:}\' with a user provided value.'.format(idx)) else: logging.warning('Mapping of the effective turnover rate for \'{:}\' reaction failed. Please check if the reaction or subreaction is in the ME-model.'.format(idx)) # Step 4: Set keffs if mapped_keffs: for rxn, keff in tqdm.tqdm(sorted(mapped_keffs.items(), key = lambda x: x[0].id), 'Setting the effective turnover rates using user input...', bar_format = bar_format): if isinstance(rxn, coralme.core.reaction.MetabolicReaction): rxn.coupling_coefficient_enzyme = float(keff) * me.unit_registry.parse_units('1 per second') if isinstance(rxn, coralme.core.processdata.SubreactionData): rxn.coupling_coefficient_subreaction = float(keff) * me.unit_registry.parse_units('1 per second') # update() will convert the coupling coefficient into mu / value / 3600. if hasattr(rxn, 'update'): # subreactions has no update attribute rxn.update() logging.warning('Setting the effective turnover rate for \'{:s}\' in {:f} successfully.'.format(rxn.id, float(keff))) # ### 5. Add metabolite compartments coralme.builder.compartments.add_compartments_to_model(me) # ### 6. Prune reactions from ME-model # WARNING: Do it recursively to reduce further the size of the ME-model. if prune: rnum = len(me.reactions) delta = 1 while delta > 0: me.prune(skip=skip) delta = rnum - len(me.reactions) rnum = len(me.reactions) # Part 9. Save and report with open('{:s}/MEModel-step2-{:s}.pkl'.format(out_directory, model), 'wb') as outfile: pickle.dump(me, outfile) coralme.core.extended_classes.ListHandler.print_and_log('ME-model was saved in the {:s} directory as MEModel-step2-{:s}.pkl'.format(out_directory, model)) n_mets = len(me.metabolites) new_mets = n_mets * 100. / len(me.gem.metabolites) - 100 n_rxns = len(me.reactions) new_rxns = n_rxns * 100. / len(me.gem.reactions) - 100 n_genes = len(me.metabolites.query(re.compile('^RNA_(?!biomass|dummy|degradosome)'))) new_genes = n_genes * 100. / len(me.gem.genes) - 100 coralme.core.extended_classes.ListHandler.print_and_log('ME-model reconstruction is done.') coralme.core.extended_classes.ListHandler.print_and_log('Number of metabolites in the ME-model is {:d} (+{:.2f}%, from {:d})'.format(n_mets, new_mets, len(me.gem.metabolites))) coralme.core.extended_classes.ListHandler.print_and_log('Number of reactions in the ME-model is {:d} (+{:.2f}%, from {:d})'.format(n_rxns, new_rxns, len(me.gem.reactions))) coralme.core.extended_classes.ListHandler.print_and_log('Number of genes in the ME-model is {:d} (+{:.2f}%, from {:d})'.format(n_genes, new_genes, len(me.gem.genes))) if hasattr(self, 'ref'): coralme.core.extended_classes.ListHandler.print_and_log('Number of missing genes from reconstruction with homology, but no function is {:d}. Check the curation notes for more details.'.format(coralme.builder.helper_functions.check_me_coverage(self))) else: coralme.core.extended_classes.ListHandler.print_and_log('Number of missing genes from reconstruction cannot be determined.') logging.shutdown() coralme.builder.notes.save_curation_notes( self.curation_notes, self.configuration['out_directory'] + '/curation_notes.json' ) coralme.builder.notes.publish_curation_notes( self.curation_notes, self.configuration['out_directory']+ '/curation_notes.txt' ) with open('{:s}/MEReconstruction-{:s}.log'.format(log_directory, model), 'w') as outfile: for filename in [ '{:s}/MEReconstruction-step1-{:s}.log'.format(log_directory, model), '{:s}/MEReconstruction-step2-{:s}.log'.format(log_directory, model) ]: try: pathlib.Path(filename).unlink(missing_ok = True) # python>=3.8 except: if pathlib.Path(filename).exists(): pathlib.Path(filename).unlink() # python==3.7 logger = self.logger['MEReconstruction-step1'].log_list logger += self.logger['MEReconstruction-step2'].log_list debug = self.logger['MEReconstruction-step1'].debug coralme.core.extended_classes.ListHandler.log_to_file(logger, debug, outfile)
class METroubleshooter(object): """ METroubleshooter class for troubleshooting growth in a ME-model This class contains methods to identify gaps and obtain a feasible ME-model. Parameters ---------- MEBuilder : coralme.builder.main.MEBuilder """ def __init__(self, builder): self.logger = builder.logger self.me_model = builder.me_model self.configuration = builder.configuration self.curation_notes = builder.curation_notes def troubleshoot(self, growth_key_and_value = None, skip = set(), guesses = [], met_types = [], platform = None, solver = 'qminos', savefile = None, gapfill_cofactors = False): """Performs the Gap-finding step of the reconstruction. This function will iterate through different parts of the M- and E-matrices, looking for a minimal set of sinks that allows for growth. Parameters ---------- growth_key_and_value : dict A dictionary of Sympy.Symbol and value(s) to evaluate. It defines the parameters for the feasibility checks in each iteration. skip : set A set of ME-components to not evaluate met_types: list Any combination of 'ME-Deadends', 'Cofactors', 'All-Deadends', 'Metabolite', 'GenerictRNA', 'Complex', 'TranscribedGene', 'TranslatedGene', 'ProcessedProtein', and/or 'GenericComponent'. solver: str Solver to use. Values: 'qminos', 'gurobi' or 'cplex' """ types = { 'M-matrix' : [ 'ME-Deadends', 'Cofactors', 'All-Deadends', 'Metabolite' ], 'E-matrix' : [ 'GenerictRNA', 'Complex', 'TranscribedGene', 'TranslatedGene', 'ProcessedProtein', 'GenericComponent' ] } if hasattr(self, 'notes') and self.notes.get('from cobra', False): met_types = [ ('M-matrix', 'Metabolite') ] elif len(met_types) > 0: met_types = [ ('M-matrix', x) if x in types['M-matrix'] else ('E-matrix', x) if x in types['E-matrix'] else None for x in set(met_types) ] met_types = [ x for x in met_types if x is not None ] else: logging.warning('Metabolite types valid values are {:s}. The predefined order of metabolites will be tested.\n'.format(', '.join(types['M-matrix'] + types['E-matrix']))) met_types = [] for x, y in types.items(): for met in y: met_types.append((x, met)) if not hasattr(self, 'me_model'): me = self self = coralme.builder.main.MEBuilder(**{'out_directory' : '.'}) self.me_model = me self.configuration['out_directory'] = './' self.configuration['log_directory'] = './' self.me_model.troubleshooting = True if solver in ['gurobi', 'cplex']: self.me_model.get_solution = self.me_model.optimize_windows self.me_model.check_feasibility = self.me_model.feas_windows(solver = solver) elif solver == "qminos": self.me_model.get_solution = self.me_model.optimize self.me_model.check_feasibility = self.me_model.feasibility print('The MINOS and quad MINOS solvers are a courtesy of Prof Michael A. Saunders. Please cite Ma, D., Yang, L., Fleming, R. et al. Reliable and efficient solution of genome-scale models of Metabolism and macromolecular Expression. Sci Rep 7, 40863 (2017). https://doi.org/10.1038/srep40863\n') else: raise ValueError('Solver not supported.') config = self.configuration model = config.get('ME-Model-ID', 'coralME') out_directory = config.get('out_directory', '.') log_directory = config.get('log_directory', '.') # clean restart if troubleshooter is killed if not hasattr(self.me_model, "troubleshooted") or self.me_model.troubleshooted: pass # do not remove TS reactions else: rxns = self.me_model.reactions.query('^TS_') self.me_model.remove_reactions(rxns) # set logger log = logging.getLogger() # root logger for hdlr in log.handlers[:]: # remove all old handlers log.removeHandler(hdlr) # Old code works in a separate script; but it works if we remove the old handler logging.basicConfig(filename = '{:s}/METroubleshooter-{:s}.log'.format(log_directory, model), filemode = 'w', level = logging.WARNING, format = log_format) log.addHandler(self.logger['METroubleshooter']) log.addHandler(logging.StreamHandler(sys.stdout)) logging.captureWarnings(True) if growth_key_and_value is None: growth_key_and_value = { self.me_model.mu : 0.001 } growth_key, growth_value = zip(*growth_key_and_value.items()) logging.warning('~ '*1 + 'Troubleshooting started...') if gapfill_cofactors: # Ensure cofactors can be produced logging.warning(' '*1 + 'Ensuring the ME-model can produce all cofactors') cofactors = coralme.builder.troubleshooting.get_cofactors_in_me_model(self.me_model) ts_cofactors = coralme.builder.troubleshooting.add_exchange_reactions(self.me_model, cofactors, prefix = 'COFACTOR_TS_') for ts in ts_cofactors: ts.bounds = (1e-6,1000) # Step 1. Test if current ME-model is feasible logging.warning(' '*1 + 'Checking if the ME-model can simulate growth without gapfilling reactions...') if self.me_model.check_feasibility(keys = growth_key_and_value): logging.warning(' '*1 + 'Original ME-model is feasible with a tested growth rate of {:f} 1/h'.format(list(growth_value)[0])) works = True else: logging.warning(' '*1 + 'Original ME-model is not feasible with a tested growth rate of {:f} 1/h'.format(list(growth_value)[0])) works = False # Step 2. Test different sets of MEComponents if len(guesses) > 0: guesses = [ x for x in guesses if self.me_model.metabolites.has_id(x) ] if len(guesses) > 0: met_types.insert(0, (guesses, 'User guesses')) e_gaps = [] history = dict() if works == False: #logging.warning('~ '*1 + 'Step 3. Attempt gapfilling different groups of E-matrix components.') for idx, met_type in enumerate(met_types): logging.warning(' '*1 + 'Step {}. Gapfill reactions to provide components of type \'{:s}\' using brute force.'.format(idx + 1, met_type[1])) if met_type[0] == 'E-matrix': logging.warning(' '*5 + 'Relaxing bounds for E-matrix gap-fill') self.me_model.relax_bounds() self.me_model.reactions.protein_biomass_to_biomass.lower_bound = growth_value[0]/100 # Needed to enforce protein production if met_type[1] == 'User guesses': history, output = coralme.builder.troubleshooting.brute_check(self.me_model, growth_key_and_value, met_type, skip = skip, history = history,solver=solver) else: history, output = coralme.builder.troubleshooting.brute_check(self.me_model, growth_key_and_value, met_type[1], skip = skip, history = history,solver=solver) bf_gaps, no_gaps, works = output # close sink reactions that are not gaps if no_gaps: self.me_model.remove_reactions(no_gaps) if works: e_gaps = bf_gaps break if works: # Meaning it can grow in any case # Save warnings if isinstance(e_gaps, list) and e_gaps: self.curation_notes['troubleshoot'].append({ 'msg':'Some metabolites are necessary for growth', 'triggered_by':e_gaps,#[ x for y in e_gaps for x in y ], 'importance':'critical', 'to_do':'Fix the gaps by adding reactions or solving other warnings. If some items are from the E-matrix, fix these first!'}) # delete added sink reactions with lb == 0 and ub == 0 sinks = [] for rxn in self.me_model.reactions.query('^TS_'): sinks.append(rxn.id) #f = self.me_model.solution.fluxes[rxn.id] if rxn.lower_bound == 0 and rxn.upper_bound == 0:# or f == 0: self.me_model.remove_reactions([rxn]) if sinks: logging.warning('~ '*1 + 'Troubleshooter added the following sinks: {:s}.'.format(', '.join(sinks))) logging.warning('~ '*1 + 'Final step. Fully optimizing with precision 1e-6 and save solution into the ME-model...') # Delete demand reactions for cofactor gapfilling if gapfill_cofactors: rxns = self.me_model.reactions.query('^COFACTOR_TS_') self.me_model.remove_reactions(rxns) # final optimization if self.me_model.get_solution(max_mu = 3.0, precision = 1e-6, verbose = False): if hasattr(self.me_model, 'gem'): logging.warning(' '*1 + 'Gapfilled ME-model is feasible with growth rate {:f} (M-model: {:f}).'.format(self.me_model.solution.objective_value, self.me_model.gem.optimize().objective_value)) else: logging.warning(' '*1 + 'Gapfilled ME-model is feasible with growth rate {:f}.'.format(self.me_model.solution.objective_value)) else: logging.warning(' '*1 + 'Error: Gapfilled ME-model is not feasible ?') # save model as a pickle file if savefile is None: savefile = '{:s}/MEModel-step3-{:s}-TS.pkl'.format(out_directory, self.me_model.id) message = 'ME-model was saved in the {:s} directory as MEModel-step3-{:s}-TS.pkl'.format(out_directory, self.me_model.id) elif pathlib.Path(savefile).parent.exists(): message = 'ME-model was saved to {:s}.'.format(savefile) else: message = False logging.warning('Model was not saved. Please do it manually.') self.me_model.troubleshooted = True if message: with open(savefile, 'wb') as outfile: pickle.dump(self.me_model, outfile) logging.warning(message) else: logging.warning('~ '*1 + 'METroubleshooter failed to determine a set of problematic metabolites.') self.me_model.troubleshooted = False logging.shutdown() coralme.builder.notes.save_curation_notes( self.curation_notes, self.configuration['out_directory'] + '/curation_notes.json' ) coralme.builder.notes.publish_curation_notes( self.curation_notes, self.configuration['out_directory']+ '/curation_notes.txt' ) # We will remove duplicates entries in the log output with open('{:s}/METroubleshooter-{:s}.log'.format(log_directory, model), 'w') as outfile: logger = self.logger['METroubleshooter'] coralme.core.extended_classes.ListHandler.log_to_file(logger.log_list, logger.debug, outfile) self.me_model.troubleshooting = False