Source code for coralme.builder.main

# python imports
import io
import os
import re
import sys
import pickle
import shutil
import pathlib
import warnings
import collections
import subprocess
import logging

# third party imports
import tqdm
import sympy
import numpy
import pandas
import anyconfig

import cobra
import coralme

# configuration
from coralme.core.extended_classes import log_format
bar_format = '{desc:<75}: {percentage:.1f}%|{bar:10}| {n_fmt:>5}/{total_fmt:>5} [{elapsed}<{remaining}]'
try:
	warnings.simplefilter(action = 'ignore', category = pandas.errors.SettingWithCopyWarning)
except:
	warnings.warn("This pandas version does not allow for correct warning handling. Pandas >=1.5.1 is suggested.")


[docs]
class MEBuilder(object):
	"""
	MEBuilder class to coordinate the reconstruction of ME-models.

	Parameters
	----------
	*args:
		Positional arguments are passed as paths to JSON files that
		update the configuration of the parent class.
	**kwargs:
		Further keyword arguments are passed on as dictionaries
		to update the configuration of the parent class.

	"""
	def __init__(self, *args,
			m_model_path = False,
			genbank_path = False,
			locus_tag = 'locus_tag',
			blast_threads = None, # if zero, no run_bbh_blast
			df_gene_cplxs_mods_rxns = 'automated-org-with-refs.xlsx',
			df_TranscriptionalUnits = '',
			df_matrix_stoichiometry = '',
			df_matrix_subrxn_stoich = '',
			df_metadata_orphan_rxns = '',
			df_metadata_metabolites = '',
			out_directory = './',
			log_directory = './',
			debug = False, **kwargs):
		"""
		keyword arguments
			ME-Model-ID, str
			m-model-path, JSON or SBML file
			genbank-path, GenBank file only

			df_gene_cplxs_mods_rxns, path to output, must end in '.xlsx'
			df_TranscriptionalUnits
			df_matrix_stoichiometry
			df_matrix_subrxn_stoich
			df_metadata_orphan_rxns
			df_metadata_metabolites

			log_directory, str
			out_directory, str

			dev_reference, default True
			user_reference, path. If 'user_reference' is defined, dev_reference is False
			add_lipoproteins, default True
			estimate_keffs, default True
		"""

		self.available_reference_models = {
				'iJL1678b' : 'locus_tag', # E. coli, gram negative
				'iJT964'   : 'old_locus_tag', # B. subtilis, gram positive
				}

		run_bbh_blast = True
		if blast_threads < 0:
			run_bbh_blast = False

		if blast_threads is None:
			blast_threads = os.cpu_count()-1

		config = {
			'ME-Model-ID' : 'coralME',
			'm-model-path' : m_model_path,
			'genbank-path' : genbank_path,

			'df_gene_cplxs_mods_rxns' : '{:s}/{:s}'.format(out_directory, df_gene_cplxs_mods_rxns),
			'df_TranscriptionalUnits' : df_TranscriptionalUnits,
			'df_matrix_stoichiometry' : df_matrix_stoichiometry,
			'df_matrix_subrxn_stoich' : df_matrix_subrxn_stoich,
			'df_metadata_orphan_rxns' : df_metadata_orphan_rxns,
			'df_metadata_metabolites' : df_metadata_metabolites,

			'log_directory' : log_directory,
			'out_directory' : out_directory,

			'locus_tag' : locus_tag,
			'run_bbh_blast' : run_bbh_blast,
			'blast_threads' : blast_threads,
			'dev_reference' : True,
			'add_lipoproteins' : True,
			# 'add_translocases' : False,
			'estimate_keffs' : True,

			'defer_to_rxn_matrix' : [],

			'percent_dna_data' : [0.0592, 0.0512, 0.0330, 0.0252, 0.0222, 0.0208], # E. coli ME-model
			'gr_data_doublings_per_hour' : [0, 0.6, 1.0, 1.5, 2.0, 2.5], # E. coli ME-model

			'complex_cofactors' : {
				'fes_transfers' : [],
				'fes_chaperones' : {},
				'bmocogdp_chaperones' : {},

				'acps_subreactions': {'mod_pan4p_c': ['acpP_activation']},
				'biotin_subreactions' : { 'mod_btn_c' : [ 'biotin_ligase' ] },
				'citx_subreactions': {'mod_2tpr3dpcoa_c': ['citx_transfer_to_citd']},
				'glycyl_subreactions': {'mod_glycyl_c': ['gre_activation']},
				'lipoate_subreactions' : { 'mod_lipoyl_c' : [ 'lipoyl_denovo', 'lipoyl_scavenging' ] },

				'FeFe/NiFe' : { 'mod_FeFe_cofactor_c' : '', 'mod_NiFe_cofactor_c' : '' }
				},
			}

		for input_file in args:
			with open(input_file, 'r') as infile:
				config.update(anyconfig.load(infile))

		if kwargs:
			config.update(kwargs)

		# User must provide values for m-model-path and genbank-path
		if not config.get('m-model-path', False):
			raise ValueError('Configuration must supply a \'m-model-path\' key and value.')
		if not config.get('genbank-path', False):
			raise ValueError('Configuration must supply a \'genbank-path\' key and value.')

		self.configuration = config
		self.me_model = coralme.core.model.MEModel(id_or_model = self.configuration.get('ME-Model-ID', 'coralME'),
												   name = self.configuration.get('ME-Model-ID', 'coralME'),
												   mu = self.configuration.get('growth_key', 'mu'))
		self.curation_notes = coralme.builder.notes.load_curation_notes(
			self.configuration['out_directory'] + '/curation_notes.json'
		)
		self.logger = {
			'MEBuilder' : coralme.core.extended_classes.ListHandler([]),
			'MEReconstruction-step1' : coralme.core.extended_classes.ListHandler([]),
			'MEReconstruction-step2' : coralme.core.extended_classes.ListHandler([]),
			'METroubleshooter' : coralme.core.extended_classes.ListHandler([])
			}

		data = \
			'code,interpretation,gram\n' \
			'CCI-CW-BAC-POS-GP,Cell_Wall,pos\n' \
			'CCI-OUTER-MEM-GN,Outer_Membrane,neg\n' \
			'CCI-PM-BAC-NEG-GN,Inner_Membrane,neg\n' \
			'CCI-PM-BAC-POS-GP,Plasma_Membrane,pos\n' \
			'CCO-MEMBRANE,Membrane,'

		self.location_interpreter = pandas.read_csv(io.StringIO(data), index_col = 0)

		# check user options
		exists = []
		for filename in [
			"m-model-path",
			"genbank-path",

			"df_TranscriptionalUnits",
			"df_matrix_stoichiometry",
			"df_matrix_subrxn_stoich",
			"df_metadata_orphan_rxns",
			"df_metadata_metabolites",
			"df_reaction_keff_consts",

			"biocyc.genes",
			"biocyc.prots",
			"biocyc.TUs",
			"biocyc.RNAs",
			"biocyc.seqs",
			]:

			if config.get(filename, None) is None:
				pass
			elif config.get(filename, '') == '':
				pass
			else:
				exists.append([config[filename], pathlib.Path(config[filename]).exists()])

		if not all([ y for x,y in exists ]):
			raise FileNotFoundError('Check the path to the {:s} file(s).'.format(', '.join([ x for x,y in exists if y == False ])))

		for option in [ "out_directory", "log_directory" ]:
			if config.get(option, '.') == '':
				self.configuration[option] = config.get('ME-Model-ID', 'coralME')

		# debug data
		self.debug_data = {}

	def generate_files(self, overwrite = True):
		"""Performs the Synchronize and Complement steps of the reconstruction.

		This function will read the Organism and the Reference. It will
		synchronize the input files, complement them, and finally build
		the OSM for the Organism.

		Parameters
		----------
		overwrite : bool
			If True, overwrite the OSM using the defined path in the configuration.
		"""
		config = self.configuration
		model = config.get('ME-Model-ID', 'coralME')
		directory = config.get('log_directory', '.')
		#if overwrite and os.path.exists(directory):
			#shutil.rmtree(directory + '/blast_files_and_results')
			#shutil.rmtree(directory + '/building_data')

		if not os.path.exists(directory):
			os.mkdir(directory)

		log = logging.getLogger() # root logger
		for hdlr in log.handlers[:]: # remove all old handlers
			log.removeHandler(hdlr)

		logging.basicConfig(filename = '{:s}/MEBuilder-{:s}.log'.format(directory, model), filemode = 'w', level = logging.WARNING, format = log_format)
		log.addHandler(self.logger['MEBuilder'])
		#log.addHandler(logging.StreamHandler(sys.stdout))
		logging.captureWarnings(True)

		sep = ''
		coralme.core.extended_classes.ListHandler.print_and_log("{}Initiating file processing...".format(sep))

		# Read organism
		self.org = coralme.builder.organism.Organism(config, is_reference = False)
		self.org.get_organism()
		self.curation_notes = self.org.curation_notes
		# self.org.rpod = ''
		# self.org.get_rna_polymerase(force_RNAP_as='')

		logging.warning("Modifying and preparing M-model")
# 		self.curate()
		self.prepare_model()

		# ## Homology with reference
		# Curation note: Check which locus tag field in the genbank agrees with those in the biocyc files.
		# Make sure you have specified that field in the beginning of this notebook.
		# Reference
		if bool(config.get('dev_reference', False)) or bool(config.get('user_reference', False)):
			logging.warning("Reading reference")

			self.ref = coralme.builder.organism.Organism(config.copy(), is_reference = True, available_reference_models = self.available_reference_models)
			self.ref.get_organism()

			folder = self.org.blast_directory
			if bool(config.get('run_bbh_blast', True)):
				blast_threads = config.get('blast_threads', 1)
				coralme.core.extended_classes.ListHandler.print_and_log("~ Running BLAST with {} threads...".format(blast_threads))
				self.org.gb_to_faa('org', element_types = {'CDS'}, outdir = self.org.blast_directory)
				self.ref.gb_to_faa('ref', element_types = {'CDS'}, outdir = self.org.blast_directory)

				def execute(cmd):
					if os.name == 'nt':
						cmd = 'cmd /c {:s}'.format(cmd)
					cmd = re.findall(r'(?:[^\s,"]|"+(?:=|\\.|[^"])*"+)+', cmd)
					out, err = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()

				# make blast databases
				execute('makeblastdb -in {:s}/org.faa -dbtype prot -out {:s}/org'.format(folder, folder))
				execute('makeblastdb -in {:s}/ref.faa -dbtype prot -out {:s}/ref'.format(folder, folder))

				# bidirectional blast
				execute('blastp -db {:s}/org -query {:s}/ref.faa -num_threads {} -out {:s}/org_as_db.txt -outfmt 6'.format(folder, folder, blast_threads, folder))
				execute('blastp -db {:s}/ref -query {:s}/org.faa -num_threads {} -out {:s}/ref_as_db.txt -outfmt 6'.format(folder, folder, blast_threads, folder))

				#os.system('{}/auto_blast.sh {}'.format(self.directory,self.org.directory))
				coralme.core.extended_classes.ListHandler.print_and_log('BLAST done.')

			# #### Reciprocal hits
			logging.warning("Getting homologs")

			self.get_homology(evalue = self.org.config.get("e_value_cutoff", 1e-10))
			self.homology.mutual_hits_df.to_csv('{:s}/mutual_hits.txt'.format(folder))
			#self.homology.mutual_hits_df.to_csv(self.org.directory+'{:s}/mutual_hits.txt'.format(folder))

			# #### Get enzyme homology
			self.homology.get_complex_homology()

			# #### Update model info with homology
			logging.warning("Updating from homology")
			self.update_from_homology()

		filename = self.org.config.get('df_TranscriptionalUnits', self.org.directory + "TUs_from_biocyc.txt")
		filename = self.org.directory + "TUs_from_biocyc.txt" if filename == '' else filename

		df = self.org.TU_df
		df = df.sort_index(inplace = False)

		if overwrite:
			with open(filename, 'w') as outfile:
				self.org.TU_df.to_csv(outfile, sep = '\t')
				logging.warning('The BioCyc transcriptional data file was processed and overwritten into the {:s} file.'.format(filename))
		else:
			if pathlib.Path(filename).exists():
				logging.warning('Set \'overwrite = True\' to overwrite the {:s} file.'.format(filename))
			else:
				with open(filename, 'w') as outfile:
					self.org.TU_df.to_csv(outfile, sep = '\t')
					logging.warning('The BioCyc transcriptional data file was saved to the ./{:s} file.'.format(filename))
		self.configuration['df_TranscriptionalUnits'] = filename

		filename = self.org.directory + "subreaction_matrix.txt"
		if overwrite:
			with open(filename, 'w') as outfile:
				self.org.subreaction_matrix.to_csv(outfile, sep = '\t')
				logging.warning('The subreaction data file was processed and overwritten into the {:s} file.'.format(filename))
		else:
			if pathlib.Path(filename).exists():
				logging.warning('Set \'overwrite = True\' to overwrite the {:s} file.'.format(filename))
			else:
				with open(filename, 'w') as outfile:
					self.org.subreaction_matrix.to_csv(outfile, sep = '\t')
					logging.warning('The subreaction data file was saved to the ./{:s} file.'.format(filename))

		filename = self.org.directory + "me_metabolites.txt"
		if overwrite:
			with open(filename, 'w') as outfile:
				self.org.me_mets.to_csv(outfile, sep = '\t')
				logging.warning('The M to ME metabolite mapping file was processed and overwritten into the {:s} file.'.format(filename))
		else:
			if pathlib.Path(filename).exists():
				logging.warning('Set \'overwrite = True\' to overwrite the {:s} file.'.format(filename))
			else:
				with open(filename, 'w') as outfile:
					self.org.me_mets.to_csv(outfile, sep = '\t')
					logging.warning('The M to ME metabolite mapping file was saved to the ./{:s} file.'.format(filename))

		logging.warning("Updating enzyme-reaction association")
		self.update_enzyme_reaction_association()
		logging.warning("Processing RNA modifications")
		self.org.process_rna_modifications()
		logging.warning("Getting tRNA to codon information")
		self.get_trna_to_codon()

		# #### Biomass constituents
		self.org.biomass_constituents = config.get('flux_of_biomass_constituents', {})

		# Fill builder with dummy
		logging.warning("Filling files with CPLX_dummy")
		self.fill()
		# Final checks of builder
		logging.warning("Performing final checks of files")
		self.check()

		# Update manual curation files for user reference
		logging.warning("Generating filled manual curation files")
		self.org.manual_curation.save()
		self.org.complexes_df.to_csv(self.org.directory + "reference_files/complexes.txt")
		self.org.protein_mod.to_csv(self.org.directory + "reference_files/protein_modification.txt")

		# Update notes
		logging.warning("Generating curation notes")
		coralme.builder.notes.save_curation_notes(
				self.curation_notes,
				self.configuration['out_directory'] + '/curation_notes.json'
			)
		coralme.builder.notes.publish_curation_notes(
				self.curation_notes,
				self.configuration['out_directory']+ '/curation_notes.txt'
			)

		logging.warning("Saving modified M-model")
		filename = '{:s}/building_data/m_model_modified.json'.format(config.get('out_directory', './'))
		cobra.io.save_json_model(self.org.m_model, filename)
		config['m-model-path'] = filename

		logging.warning("Generating new configuration file")
		self.input_data(self.org.m_model, overwrite)
		coralme.core.extended_classes.ListHandler.print_and_log("{}File processing done.".format(sep))

		logging.shutdown()

		# We will remove duplicates entries in the log output
		with open('{:s}/MEBuilder-{:s}.log'.format(config.get('log_directory', '.'), config.get('ME-Model-ID', 'coralME')), 'w') as outfile:
			logger = self.logger['MEBuilder']
			coralme.core.extended_classes.ListHandler.log_to_file(logger.log_list, logger.debug, outfile)

	def prepare_model(self):
		"""Performs initial preparation of the M-model.

		This function will fix some known issues that M-models can

		Parameters
		----------
		overwrite : bool
			If True, overwrite the OSM using the defined path in the configuration.
		"""
		m_model = self.org.m_model
		target_compartments = {"c": "Cytosol", "e": "Extra-organism", "p": "Periplasm"}
		new_dict = {}
		warn_compartments = []
		for k,v in tqdm.tqdm(m_model.compartments.items(),
						'Gathering M-model compartments...',
						bar_format = bar_format,
						total=len(m_model.compartments)):
			if k in target_compartments:
				new_dict[k] = target_compartments[k]
			else:
				warn_compartments.append(k)
				new_dict[k] = k
		m_model.compartments = new_dict
		gene_list = []
		for m in tqdm.tqdm(m_model.metabolites,
						'Fixing compartments in M-model metabolites...',
						bar_format = bar_format):
			if not m.compartment:
				logging.warning("Fixing compartment for metabolite {}".format(m.id))
				m.compartment = m.id[-1]
		for r in tqdm.tqdm(m_model.reactions,
						'Fixing missing names in M-model reactions...',
						bar_format = bar_format):
			if not r.name or isinstance(r.name, float):
				logging.warning("Fixing name for reaction {}".format(r.id))
				r.name = r.id

		# Solve m_model
		solution = m_model.optimize()
		if not solution or solution.objective_value < 1e-6:
			self.org.curation_notes['prepare_model'].append({
				'msg':'M-model cannot grow',
				'importance':'critical',
				'to_do':'Check that the model you provided works'})

		try:
			self.org.biomass = str(m_model.objective.expression.as_two_terms()[0]).split('*')[1]
			biomass_rxn = m_model.reactions.get_by_id(self.org.biomass)
			logging.warning('{} was identified as the biomass reaction'.format(biomass_rxn.id))
		except:
			self.org.biomass = None
			biomass_rxn = None
			logging.warning('Could not identify biomass reaction')

		self.org.GAM = self.configuration.get('gam',None)
		# Get GAM
		if self.org.GAM is None:
			adp = m_model.metabolites.adp_c
			if biomass_rxn is not None and adp in biomass_rxn.metabolites:
				self.org.GAM = biomass_rxn.metabolites[adp]
				logging.warning('GAM identified with value {}'.format(self.org.GAM))
			else:
				self.org.GAM = self.configuration.get('gam', self.me_model._gam)
				self.org.curation_notes['prepare_model'].append({
					'msg':'GAM could not be identified from biomass reaction, setting a standard value of {:g}. adp_c is not present as a product.'.format(self.me_model._gam),
					'importance':'high',
					'to_do':'Check whether the biomass reaction was read or defined correctly. You can define GAM with builder.configuration[\'gam\'] = GAM_value'})
		self.org.NGAM = self.configuration.get('ngam',None)
		if self.org.NGAM is None:
			# Get NGAM
			NGAMs = ['NGAM','ATPM']
			for r in NGAMs:
				if r in m_model.reactions:
					rxn = m_model.reactions.get_by_id(r)
					if rxn.lower_bound <= 0:
						continue
					self.org.NGAM = rxn.lower_bound
					logging.warning('{} was identified as NGAM with value {}'.format(r,self.org.NGAM))
					break
		if self.org.NGAM is None:
			self.org.NGAM = self.configuration.get('ngam', self.me_model._ngam)
			self.org.curation_notes['prepare_model'].append({
				'msg':'NGAM could not be identified in M-model, setting a standard value of {:g}.'.format(self.me_model._ngam),
				'importance':'high',
				'to_do':'Manually define NGAM with builder.configuration[\'ngam\'] = NGAM_value. Check if a reaction with identifier NGAM or ATPM has a zero or negative lower bound.'})
		elif self.org.NGAM == 0:
			self.org.NGAM = self.configuration.get('ngam', self.me_model._ngam)
			self.org.curation_notes['prepare_model'].append({
				'msg':'NGAM was identified from reaction {}, but its lower bound is 0. NGAM set to a standard value of {:g}.'.format(rxn.id, self.me_model._ngam),
				'importance':'high',
				'to_do':'Manually define NGAM with builder.configuration[\'ngam\'] = NGAM_value'})
		# Warnings
		if warn_compartments:
			self.org.curation_notes['prepare_model'].append({
				'msg':'Some compartments in m_model are unknown',
				'triggered_by':warn_compartments,
				'importance':'medium',
				'to_do':'Check whether the compartment is correct. If not, change it in the reaction ID in the m_model.'})

	def get_homology(self, evalue=1e-10):
		"""Calculates homology between Organism and Reference.

		Parameters
		----------
		evalue : float, default 1e-10
			Sets the E-value cutoff for calling protein
			homologs using BLAST.
		"""
		self.homology = coralme.builder.homology.Homology(self.org, self.ref, evalue = evalue)

	def get_trna_to_codon(self):
		"""Gets tRNA to codon association from the Genome.
		"""
		import Bio
		from Bio import SeqIO, Seq, SeqFeature, SeqUtils

		me_model = self.me_model
		contigs = self.org.contigs

		# Dictionary of tRNA locus ID to the model.metabolite object. It accounts for misacylation
		trna_to_aa = {}

		# Dictionary of tRNA locus ID to the model.metabolite object. It accounts for misacylation
		trna_misacylation = self.configuration.get('trna_misacylation',dict())

		# Dictionary of tRNA locus ID to amino acid, one dict of tRNAs per organelle type
		# aa2trna does not account for misacylation
		aa2trna = { 'c' : {} } # prokaryotes and eukaryotes
		if self.configuration.get('domain','prokaryote').lower() not in ['prokaryote', 'bacteria']:
			aa2trna.update({'m' : {}, 'h' : {}}) # mitochondria and plastids

		# Translation tables, one table per organelle type
		transl_tables = { 'c' : set() } # prokaryotes and eukaryotes
		if self.configuration.get('domain','prokaryote') not in ['prokaryote', 'bacteria']:
			transl_tables.update({'m' : set(), 'h' : set()}) # mitochondria and plastids

		# Messages
		msg1 = 'From the tRNA misacylation dictionary, the {:s} gene [tRNA({:s})] is loaded and converted into {:s}-tRNA({:s}). Make sure a MetabolicReaction to convert a {:s}-tRNA({:s}) into a {:s}-tRNA({:s}) is present in the ME-model.'
		msg2 = 'From the tRNA misacylation dictionary, the {:s} gene [tRNA({:s})] is loaded and converted into {:s}-tRNA({:s}). No further modification needs to take place.'

		canonical_aas = [
			'Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu', 'Gly', 'His', 'Ile',
			'Leu', 'Lys', 'Met', 'Phe', 'Pro', 'Ser', 'Thr', 'Trp', 'Tyr', 'Val'
			]

		for contig in contigs:
			iterator = tqdm.tqdm(contig.features, 'Getting tRNA to codon dictionary from {}'.format(contig.id), bar_format = bar_format) if len(contigs) < 10 else contig.features
			for feature in iterator:
				# Find organelle in source
				if feature.type in ['source', 'region']: # genbanks from gff+fna do not contain a 'source' feature
					organelle = feature.qualifiers.get('organelle', [None])[0]
					continue
				seq = feature.extract(contig).seq.replace('-', '')
				if feature.type == 'CDS':
					# Add the translation table
					prot = feature.qualifiers.get('translation', [''])[0]
					transl_table = feature.qualifiers.get('transl_table', ['1'])[0]

					# Add the translation table per organelle
					if organelle is None:
						transl_tables['c'].add(int(transl_table))
						if me_model.global_info['domain'].lower() not in ['prokaryote', 'bacteria']:
							logging.warning('Contig \'{:s}\' does not report an organelle type.'.format(contig.id))
					elif organelle.lower() in ['mitochondria', 'mitochondrion']:
						transl_tables['m'].add(int(transl_table))
					elif organelle.lower() in ['chloroplast', 'plastid']:
						transl_tables['h'].add(int(transl_table))
					else:
						continue

				if not feature.type == 'tRNA':
					continue

				bnum = self.org._get_feature_locus_tag(feature)
				if bnum is None:
					continue
				aa = feature.qualifiers.get('product', ['tRNA-None'])[0].split('-')[1]
				if aa in canonical_aas + ['Asx', 'Glx', 'fMet', 'Sec']:
					pass
				else:
					logging.warning('The tRNA \'{:s}\' is not associated to a valid product name (tRNA-Amino acid 3 letters code)'.format(bnum))
					continue
				msg = 'The tRNA \'{:s}\' is associated to two amino acids. The \'trna_misacylation\' dictionary was modified to attempt load the correct amino acid.'
				# Special tRNA(Asx) that can be loaded with Asn (EC 6.1.1.22) or Asp (EC 6.1.1.12)
				if aa == 'Asx':
					trna_misacylation['Asx'] = 'Asp'
					logging.warning(msg.format(bnum))
				# Special tRNA(Glx) that can be loaded with Gln (EC 6.1.1.18) or Glu (EC 6.1.1.17)
				if aa == 'Glx':
					trna_misacylation['Glx'] = 'Glu'
					logging.warning(msg.format(bnum))

				if aa in trna_misacylation.keys():
					# misacylation only in mitochondria and chloroplasts
					filter1a = me_model.global_info['domain'].lower() in ['eukarya', 'eukaryote']
					filter1b = str(organelle).lower() in ['mitochondria', 'mitochondrion', 'chloroplast', 'plastid']
					# misacylation in the cytoplasm of Gram-positive eubacteria (and other bacteria such as cyanobacteria)
					filter2 = me_model.global_info['domain'].lower() in ['bacteria', 'prokaryote']

					if filter1a and filter1b or filter2:
						trna_to_aa[bnum] = trna_misacylation[aa]
						if aa.endswith('x'):
							logging.warning(msg2.format(bnum, aa, trna_misacylation[aa], aa))
						else:
							logging.warning(msg1.format(bnum, aa, trna_misacylation[aa], aa, trna_misacylation[aa], aa, aa, aa))
					else:
						# misacylation is not valid in the compartment and domain
						trna_to_aa[bnum] = aa
				else:
					trna_to_aa[bnum] = aa

				if organelle is None:
					aa2trna['c'][bnum] = aa
				elif organelle.lower() in ['mitochondria', 'mitochondrion']:
					aa2trna['m'][bnum] = aa
				elif organelle.lower() in ['chloroplast', 'plastid']:
					aa2trna['h'][bnum] = aa

		# trna_to_codon does not account for misacylation: { 'tRNA ID' : 'Amino acid to load into the tRNA' }
		trna_to_aa = { k:v.replace('fMet', 'Met') for k,v in trna_to_aa.items() }

		for organelle, aa2trna_dct in aa2trna.items():
			aa2trna_dct = { k:v.capitalize().split('_')[0] if 'fMet' not in v else 'fMet' for k,v in aa2trna_dct.items() }
			aa2trna_df = pandas.DataFrame(data = [aa2trna_dct.values(), aa2trna_dct.keys()]).T
			aa2trna_df = aa2trna_df.groupby(0).agg({1: lambda x: x.tolist()})
			if 'fMet' in aa2trna_df.index:
				aa2trna_df.loc['Met'] = aa2trna_df.loc['fMet'] + aa2trna_df.loc['Met']

			aa2trna[organelle] = aa2trna_df

			if not aa2trna_df.empty:
				# assign START tRNAs to every fMet-tRNA (Met-tRNA if not) and check if at least one tRNA was identified
				if 'fMet' in aa2trna_df.index:
					if len(me_model.global_info['START_tRNA']) == 0:
						me_model.global_info['START_tRNA'] = list(aa2trna_df.loc['fMet'])[0]

				if 'Met' in aa2trna_df.index:
					if len(me_model.global_info['START_tRNA']) == 0:
						me_model.global_info['START_tRNA'] = list(aa2trna_df.loc['Met'])[0]

				# final check
				if len(me_model.global_info['START_tRNA']) == 0:
					logging.warning('Unable to identify at least one \'tRNA-Met\' or \'tRNA-fMet\' annotation from the \'Definition\' column in the organism-specific matrix.')
			else:
				logging.warning('No tRNA genes were identified from their locus tags.')

		# DataFrame mapping tRNAs (list) and the encoded amino acid (index), per organelle
		# aa2trna derives from trna_to_aa, so it also accounts for misacylation: { 'organelle ID' : 'DataFrame of amino acid to load into the tRNA' }
		aa2codon = {}
		trna_to_codon = {}
		for organelle, transl_table in transl_tables.items():
			if len(transl_table) == 0:
				continue

			codon_table = Bio.Data.CodonTable.generic_by_id[list(transl_table)[0]]

			dct = { k.replace('T', 'U'):SeqUtils.seq3(v) for k,v in codon_table.forward_table.items() if 'U' not in k }
			aa2codons = pandas.DataFrame(data = [dct.keys(), dct.values()]).T.groupby(1).agg({0: lambda x: x.tolist()})
			# aa2codons derives from the translation table and maps amino acids to the codon
			aa2codon[organelle] = aa2codons
			if list(transl_table)[0] == 11:
				aa2codons.loc['Sec'] = [['UGA']] # an internal UGA encodes selenocysteine

			df = pandas.concat([aa2codons, aa2trna[organelle]], axis = 1).dropna(how = 'any').explode(1)

			# Check amino acids
			warn_trnas = []
			for aa in canonical_aas:
				if aa in aa2trna[organelle].index:
					pass
				else:
					warn_trnas.append(aa)
			if warn_trnas:
				self.org.curation_notes['get_trna_to_codon'].append({
					'msg':'Some tRNAs could be missing in the files.',
					'triggered_by':warn_trnas,
					'importance':'critical',
					'to_do':'Identify the respective missing tRNAs and add them to a file (RNAs.txt or genome.gb)'})

			trna_to_codon_organelle = { k:v + ['START'] if k in me_model.global_info['START_tRNA'] else v for k,v in zip(df[1].values, df[0].values) }
			trna_to_codon[organelle] = trna_to_codon_organelle

		me_model.global_info['trna_to_aa'] = trna_to_aa
		me_model.global_info['trna_to_codon'] = trna_to_codon

		me_model.global_info['trna_misacylation'] = trna_misacylation

		return None

	def update_enzyme_stoichiometry(self):
		complexes_df = self.org.complexes_df
		org_cplx_homolog = self.homology.org_cplx_homolog
		ref_complexes_df = self.ref.complexes_df
		mutual_hits = self.homology.mutual_hits

		cplx_stoich = {}
		for c, rc in org_cplx_homolog.items():
			if "generic" in c:
				continue
			if rc not in ref_complexes_df.index:
				continue
			for rg in ref_complexes_df["genes"][rc].split(" AND "):
				rg_id = re.findall(r'.*(?=\(\d*\))', rg)[0]
				coeff = re.findall(r"(?<=\()[0-9]{1,}", rg)
				if coeff:
					coeff = coeff[0]
				else:
					coeff = ""
				if c not in cplx_stoich:
					cplx_stoich[c] = {}
				cplx_stoich[c][mutual_hits[rg_id]] = coeff

		for c, stoich in cplx_stoich.items():
			complexes_df.at[c, "genes"] = " AND ".join(
				[g + "({})".format(coeff) for g, coeff in stoich.items()]
			)
		self.org.complexes_df = complexes_df

	def update_protein_modification(self):
		cplx_homolog = self.homology.org_cplx_homolog
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		complexes_df = self.org.complexes_df
		ref_mod_df = self.ref.protein_mod.reset_index().set_index("Core_enzyme")
		cplx_cofactor_dict = {}
		protein_mod_dict = {}
		for c, row in complexes_df.iterrows():
			if c in cplx_homolog:
				rc = cplx_homolog[c]
				if rc in ref_mod_df.index:
					if c not in cplx_cofactor_dict:
						cplx_cofactor_dict[c] = {}
					ref_mods = ref_mod_df.loc[[rc]]
					for _,row in ref_mods.iterrows():
						mods = row["Modifications"].split(" AND ")
						cplx = c
						cofs = []
						coeffs = []
						for mod in mods:
							cof = re.findall(r".*(?=\()", mod)[0]
							coeff = re.findall(r"(?<=\()[0-9]{1,}", mod)
							if coeff:
								coeff = coeff[0]
								cplx += "_mod_{}({})".format(cof, coeff)
							else:
								cplx += "_mod_{}".format(cof)
								coeff = ""
							cofs.append(cof)
							coeffs.append(coeff)
						ref_cplx_homolog[row["Modified_enzyme"]] = cplx
						cplx_homolog[cplx] = row["Modified_enzyme"]
						if cplx in self.org.protein_mod.index:
							continue
						protein_mod_dict[cplx] = {}
						protein_mod_dict[cplx]["Core_enzyme"] = c
						protein_mod_dict[cplx]["Modifications"] = " AND ".join(
							"{}({})".format(cof, coeff)
							for cof, coeff in zip(cofs, coeffs)
						)
						protein_mod_dict[cplx]["Source"] = "Homology"

		protein_mod = pandas.DataFrame.from_dict(protein_mod_dict).T
		protein_mod.index.name = "Modified_enzyme"
		self.org.protein_mod = pandas.concat([self.org.protein_mod,protein_mod])

	def update_enzyme_reaction_association(self):
		enz_rxn_assoc_df = self.org.enz_rxn_assoc_df
		protein_mod = self.org.protein_mod
		for r,row in tqdm.tqdm(enz_rxn_assoc_df.iterrows(),
					'Updating enzyme reaction association...',
					bar_format = bar_format,
					total=enz_rxn_assoc_df.shape[0]):
			if r in self.org.manual_curation.enz_rxn_assoc_df.data.index:
				# Only update those not in manual curation
				continue
			cplxs = row['Complexes']
			reaction_cplx_list = []
			for cplx_id in cplxs.split(" OR "):
				cplx_id = cplx_id.split("_mod_")[0]
				if cplx_id in protein_mod["Core_enzyme"].values:
					# Use modifications
					cplx_mods = protein_mod[
						protein_mod["Core_enzyme"].eq(cplx_id)
					].index
					for cplx_id in cplx_mods:
						if "Oxidized" in cplx_id:
							cplx_id = cplx_id.split("_mod_Oxidized")[0]
						reaction_cplx_list.append(cplx_id)
				else:
					reaction_cplx_list.append(cplx_id)
			enz_rxn_assoc_df.at[r,'Complexes'] = " OR ".join(reaction_cplx_list)

	def update_TU_df(self):
		return NotImplemented

	def protein_location_from_homology(self):
		protein_location = self.org.protein_location
		complexes_df = self.org.complexes_df
		org_proteins_df = self.org.proteins_df
		org_cplx_homolog = self.homology.org_cplx_homolog
		mutual_hits = self.homology.mutual_hits
		ref_protein_location = self.ref.protein_location
		if not isinstance(ref_protein_location, pandas.DataFrame):
			return
		for rc,row in tqdm.tqdm(ref_protein_location.iterrows(),
					'Updating protein location from homology...',
					bar_format = bar_format,
					total=ref_protein_location.shape[0]):
			ref_gene = re.findall(r'.*(?=\(.*\))', row['Protein'])[0]
			if ref_gene not in mutual_hits:
								continue
			org_gene = mutual_hits[ref_gene]
			ref_info = ref_protein_location[ref_protein_location['Protein'].str.contains(ref_gene)]
			gene_string = r'{}\('.format(org_gene)
			org_cplxs = complexes_df[complexes_df['genes'].str.contains(gene_string)].index
			for org_cplx in org_cplxs:
				if protein_location.any().any() and \
					org_cplx in protein_location.index and \
					protein_location.loc[[org_cplx]]['Protein'].str.contains(gene_string).any().any():
							# Check if already in protein location, if not add.
					continue
				tmp = pandas.DataFrame.from_dict(
					{ org_cplx: {
						"Complex_compartment": ref_info["Complex_compartment"].values[0],
						"Protein": '{}()'.format(org_gene),
						"Protein_compartment": ref_info["Protein_compartment"].values[0],
						"translocase_pathway": ref_info["translocase_pathway"].values[0],
									}
								}).T
				protein_location = pandas.concat([protein_location, tmp], axis = 0, join = 'outer')
		protein_location.index.name = 'Complex'
		self.org.protein_location = protein_location

	def update_translocation_multipliers(self):
		ref_multipliers = self.ref.translocation_multipliers
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		mutual_hits = self.homology.mutual_hits
		multipliers = {}
		for ref_cplx, genes in tqdm.tqdm(ref_multipliers.items(),
					'Updating translocation multipliers from homology...',
					bar_format = bar_format,
					total=len(ref_multipliers)):
			if ref_cplx not in ref_cplx_homolog:
				continue
			org_cplx = ref_cplx_homolog[ref_cplx]
			multipliers[org_cplx] = {}
			for g, value in genes.items():
				if not value:
					continue
				if g not in mutual_hits:
					continue
				org_g = mutual_hits[g]
				multipliers[org_cplx][org_g] = value
		self.org.translocation_multipliers = multipliers

	def update_lipoprotein_precursors(self):
		ref_lipoprotein_precursors = self.ref.lipoprotein_precursors
		mutual_hits = self.homology.mutual_hits
		lipoprotein_precursors = {}
		for c, g in tqdm.tqdm(ref_lipoprotein_precursors.items(),
					'Updating lipoprotein precursors from homology...',
					bar_format = bar_format,
					total=len(ref_lipoprotein_precursors)):
			if g in mutual_hits:
				og = mutual_hits[g]
				lipoprotein_precursors[c] = og
		self.org.lipoprotein_precursors = lipoprotein_precursors

	def update_cleaved_methionine(self):
		cleaved_methionine = self.org.cleaved_methionine
		ref_cleaved_methionine = self.ref.cleaved_methionine
		mutual_hits = self.homology.mutual_hits
		for g in tqdm.tqdm(ref_cleaved_methionine,
					'Updating cleaved-methionine proteins from homology...',
					bar_format = bar_format):
			if g not in mutual_hits:
				continue
			cleaved_methionine.append(mutual_hits[g])
		self.org.cleaved_methionine = cleaved_methionine

	def update_me_mets(self):
		ref_me_mets = self.ref.me_mets
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		protein_mod = self.org.protein_mod.reset_index().set_index("Core_enzyme")
		me_mets = self.org.me_mets
		m_model = self.org.m_model
		d = {}
		warn_skip = []
		warn_found = []
		for ref_m, row in tqdm.tqdm(ref_me_mets.iterrows(),
					'Mapping M-metabolites to E-metabolites...',
					bar_format = bar_format,
					total=ref_me_mets.shape[0]):
			if ref_m not in m_model.metabolites:
				if "__" in ref_m:
					ref_m = ref_m.replace("__", "_")
					if ref_m not in m_model.metabolites:
						warn_skip.append(ref_m)
						continue
					else:
						warn_found.append(ref_m)
			if ref_m in me_mets.index:
				if me_mets.loc[ref_m]['type'] != 'CURATE':
					continue
				me_mets.drop(ref_m,inplace=True)
			ref_me = row["me_id"]
			ref_changetype = row['type']
			d[ref_m] = {}
			me_id = ''
			if ref_me in ref_cplx_homolog:
				org_me = ref_cplx_homolog[ref_me]
				me_id = org_me
				changetype = "REPLACE"
			elif ref_changetype == "REMOVE":
				changetype = "REMOVE"
			else:
				changetype = "CURATE"
			d[ref_m]["me_id"] = me_id
			d[ref_m]["type"] = changetype
		if d:
			df = pandas.DataFrame.from_dict(d).T
			df.index.name = "id"
			me_mets = pandas.concat([me_mets, df], axis = 0, join = 'outer')
			self.org.me_mets = me_mets.fillna('')
		if warn_skip:
			self.org.curation_notes['update_me_mets'].append({
				'msg':'Some metabolites in me_metabolites.txt are not in m_model, so they were skipped.',
				'triggered_by':warn_skip,
				'importance':'medium',
				'to_do':'Confirm these metabolites are correctly defined in me_metabolites.txt'})
		if warn_found:
			self.org.curation_notes['update_me_mets'].append({
				'msg':'Some metabolites in me_metabolites.txt were found in reference m_model after replacing __ with _',
				'triggered_by':warn_found,
				'importance':'medium',
				'to_do':'Confirm these metabolites are correctly defined in me_metabolites.txt'})

	def update_generics_from_homology(self):
		generic_dict = self.org.generic_dict
		ref_generic_dict = self.ref.generic_dict
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_generic_dict.items(),
					'Updating generics from homology...',
					bar_format = bar_format,
					total=len(ref_generic_dict)):
			if k not in generic_dict:
				generic_dict[k] = {"enzymes":[]}
				#continue
			if generic_dict[k]['enzymes']:
				continue
			ref_cplxs = v['enzymes']
			for i in ref_cplxs:
				if i in ref_cplx_homolog:
					homolog = ref_cplx_homolog[i]
					if homolog not in generic_dict[k]['enzymes']:
						generic_dict[k]['enzymes'].append(homolog)

	def update_folding_dict_from_homology(self):
		org_folding_dict = self.org.folding_dict
		ref_folding_dict = self.ref.folding_dict
		mutual_hits = self.homology.mutual_hits
		for k, v in tqdm.tqdm(ref_folding_dict.items(),
					'Updating folding from homology...',
					bar_format = bar_format,
					total=len(ref_folding_dict)):
			ref_cplxs = v['enzymes']
			for i in ref_cplxs:
				if i in mutual_hits:
					homolog = mutual_hits[i]
					if homolog not in org_folding_dict[k]['enzymes']:
						org_folding_dict[k]['enzymes'].append(homolog)

	def update_ribosome_subreactions_from_homology(self):
		ref_ribosome_subreactions = self.ref.ribosome_subreactions
		org_ribosome_subreactions = self.org.ribosome_subreactions
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		warn_proteins = []
		for k, v in tqdm.tqdm(ref_ribosome_subreactions.items(),
					'Updating ribosome subreaction machinery from homology...',
					bar_format = bar_format,
					total=len(ref_ribosome_subreactions)):
			ref_cplx = v["enzyme"]
			if ref_cplx in ref_cplx_homolog:
				org_cplx = ref_cplx_homolog[v["enzyme"]]
				if k not in org_ribosome_subreactions:
					org_ribosome_subreactions[k] = {"enzyme":""}
				defined_cplx = org_ribosome_subreactions[k]["enzyme"]
				if defined_cplx: continue
# 				if not defined_cplx or defined_cplx in org_cplx or 'CPLX_dummy' in defined_cplx:
				org_ribosome_subreactions[k]["enzyme"] = org_cplx
# 				else:
				warn_proteins.append({
					'subreaction':k,
					'defined_complex':defined_cplx,
					'inferred_complex':org_cplx
				})
		# Warnings
		if warn_proteins:
			self.org.curation_notes['update_ribosome_subreactions_from_homology'].append({
				'msg':'Some enzymes defined in me_builder.org.ribosome_subreactions are different from the ones inferred from homology',
				'triggered_by':warn_proteins,
				'importance':'medium',
				'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.ribosome_subreactions. Curate the inputs in ribosome_subreactions.txt accordingly.'})

# 	def update_rrna_modifications_from_homology(self):
# 		ref_rrna_modifications = self.ref.rrna_modifications
# 		org_rrna_modifications = self.org.rrna_modifications
# 		ref_cplx_homolog = self.homology.ref_cplx_homolog
# 		warn_proteins = []
# 		for k, v in tqdm.tqdm(ref_rrna_modifications.items(),
# 					'Updating rRNA modifications from homology...',
# 					bar_format = bar_format,
# 					total=len(ref_rrna_modifications)):
# 			ref_cplx = v["machine"]
# 			if ref_cplx in ref_cplx_homolog:
# 				org_cplx = ref_cplx_homolog[v["machine"]]
# 				defined_cplx = org_rrna_modifications[k]["machine"]
# 				if not defined_cplx or defined_cplx in org_cplx or 'CPLX_dummy' in defined_cplx:
# 					org_rrna_modifications[k]["machine"] = org_cplx
# 				else:
# 					warn_proteins.append({
# 						'subreaction':k,
# 						'defined_complex':defined_cplx,
# 						'inferred_complex':org_cplx
# 					})
# 		# Warnings
# 		if warn_proteins:
# 			self.org.curation_notes['update_rrna_modifications_from_homology'].append({
# 				'msg':'Some enzymes defined in me_builder.org.rrna_modifications are different from the ones inferred from homology',
# 				'triggered_by':warn_proteins,
# 				'importance':'medium',
# 				'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.rrna_modifications. Curate the inputs in rrna_modifications.txt accordingly.'})

	def update_amino_acid_trna_synthetases_from_homology(self):
		ref_amino_acid_trna_synthetase = self.ref.amino_acid_trna_synthetase
		org_amino_acid_trna_synthetase = self.org.amino_acid_trna_synthetase
		manual_curation = self.org.manual_curation.amino_acid_trna_synthetase.data
		protein_mod = self.org.protein_mod
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		warn_proteins = []
		for k, v in tqdm.tqdm(ref_amino_acid_trna_synthetase.items(),
					'Updating tRNA synthetases from homology...',
					bar_format = bar_format,
					total=len(ref_amino_acid_trna_synthetase)):
			ref_cplx = v
			if ref_cplx in ref_cplx_homolog:
				org_cplx = ref_cplx_homolog[v] # From homology
				defined_cplx = manual_curation[k] # From manual
				if defined_cplx:
					# If has been manually curated, do not override
					continue
				inferred_cplx = org_amino_acid_trna_synthetase[k] # From regex
				if inferred_cplx:
					# Catch whether the inferred complex has a modification
					mod = protein_mod[protein_mod["Core_enzyme"]==inferred_cplx].index
					if not mod.empty:
						inferred_cplx = mod[0]
					if inferred_cplx != org_cplx:
						warn_proteins.append({
							'amino_acid':k,
							'inferred_ligase':inferred_cplx,
							'homology_ligase':org_cplx
						})
					org_cplx = inferred_cplx # Use inferred complex instead of homology
				# Update the complex
				org_amino_acid_trna_synthetase[k] = org_cplx
		# Warnings
		if warn_proteins:
			self.org.curation_notes['update_amino_acid_trna_synthetases_from_homology'].append({
				'msg':'Some enzymes identified in me_builder.org.amino_acid_trna_synthetase are different from the ones inferred from homology',
				'triggered_by':warn_proteins,
				'importance':'medium',
				'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.amino_acid_trna_synthetase. Curate the inputs in amino_acid_trna_synthetase.txt accordingly.'})

	def update_peptide_release_factors_from_homology(self):
		ref_peptide_release_factors = self.ref.peptide_release_factors
		org_peptide_release_factors = self.org.peptide_release_factors
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		warn_proteins = []
		for k, v in tqdm.tqdm(ref_peptide_release_factors.items(),
					'Updating peptide release factors from homology...',
					bar_format = bar_format,
					total=len(ref_peptide_release_factors)):
			ref_cplx = v['enzyme']
			if ref_cplx in ref_cplx_homolog:
				org_cplx = ref_cplx_homolog[ref_cplx]
				defined_cplx = org_peptide_release_factors[k]['enzyme']
				if defined_cplx: continue
# 				if not defined_cplx or defined_cplx in org_cplx or 'CPLX_dummy' in defined_cplx:
				org_peptide_release_factors[k]['enzyme'] = org_cplx
# 				else:
				warn_proteins.append({
					'subreaction':k,
					'defined_complex':defined_cplx,
					'inferred_complex':org_cplx
				})
		# Warnings
		if warn_proteins:
			self.org.curation_notes['update_peptide_release_factors_from_homology'].append({
				'msg':'Some enzymes defined in me_builder.org.peptide_release_factors are different from the ones inferred from homology',
				'triggered_by':warn_proteins,
				'importance':'medium',
				'to_do':'Confirm whether the definitions or homology calls are correct in me_builder.org.peptide_release_factors. Curate the inputs in peptide_release_factors.txt accordingly.'})

	def update_initiation_subreactions_from_homology(self):
		ref_initiation_subreactions = self.ref.initiation_subreactions
		org_initiation_subreactions = self.org.initiation_subreactions
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_initiation_subreactions.items(),
					'Updating translation initiation subreactions from homology...',
					bar_format = bar_format,
					total=len(ref_initiation_subreactions)):
			ref_cplxs = v["enzymes"]
			defined_cplxs = org_initiation_subreactions[k]["enzymes"]
			if defined_cplxs: continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if i in defined_cplxs:
					continue
				defined_cplxs.append(i)

	def update_elongation_subreactions_from_homology(self):
		ref_elongation_subreactions = self.ref.elongation_subreactions
		org_elongation_subreactions = self.org.elongation_subreactions
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_elongation_subreactions.items(),
					'Updating translation elongation subreactions from homology...',
					bar_format = bar_format,
					total=len(ref_elongation_subreactions)):
			ref_cplxs = v["enzymes"]
			defined_cplxs = org_elongation_subreactions[k]["enzymes"]
			if defined_cplxs: continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if i in defined_cplxs:
					continue
				defined_cplxs.append(i)

	def update_termination_subreactions_from_homology(self):
		ref_termination_subreactions = self.ref.termination_subreactions
		org_termination_subreactions = self.org.termination_subreactions
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_termination_subreactions.items(),
					'Updating translation termination subreactions from homology...',
					bar_format = bar_format,
					total=len(ref_termination_subreactions)):
			ref_cplxs = v["enzymes"]
			defined_cplxs = org_termination_subreactions[k]["enzymes"]
			if defined_cplxs: continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if i in defined_cplxs:
					continue
				defined_cplxs.append(i)

	def update_special_trna_subreactions_from_homology(self):
		ref_special_trna_subreactions = self.ref.special_trna_subreactions
		org_special_trna_subreactions = self.org.special_trna_subreactions
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_special_trna_subreactions.items(),
					'Updating special tRNA subreactions from homology...',
					bar_format = bar_format,
					total=len(ref_special_trna_subreactions)):
			ref_cplxs = v["enzymes"]
			defined_cplxs = org_special_trna_subreactions[k]["enzymes"]
			if defined_cplxs: continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if i not in defined_cplxs:
					defined_cplxs.append(i)

	def update_rna_degradosome_from_homology(self):
		ref_rna_degradosome = self.ref.rna_degradosome
		org_rna_degradosome = self.org.rna_degradosome
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_rna_degradosome.items(),
					'Updating RNA degradosome composition from homology...',
					bar_format = bar_format,
					total=len(ref_rna_degradosome)):
			ref_cplxs = v['enzymes']
			defined_cplxs = org_rna_degradosome[k]['enzymes']
			if defined_cplxs: continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if i in defined_cplxs:
					continue
				defined_cplxs.append(i)

	def update_excision_machinery_from_homology(self):
		ref_excision_machinery = self.ref.excision_machinery
		org_excision_machinery = self.org.excision_machinery
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_excision_machinery.items(),
					'Updating excision machinery from homology...',
					bar_format = bar_format,
					total=len(ref_excision_machinery)):
			ref_cplxs = v['enzymes']
			defined_cplxs = org_excision_machinery[k]['enzymes']
			if defined_cplxs: continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if self._is_base_complex_in_list(i,defined_cplxs):
					continue
				defined_cplxs.append(i)

	def update_special_modifications_from_homology(self):
		ref_special_trna_subreactions = self.ref.special_modifications
		org_special_trna_subreactions = self.org.special_modifications
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_special_trna_subreactions.items(),
					'Updating tRNA subreactions from homology...',
					bar_format = bar_format,
					total=len(ref_special_trna_subreactions)):
			ref_cplxs = v["enzymes"]
			defined_cplxs = org_special_trna_subreactions[k]["enzymes"]
			if defined_cplxs: continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				# WARNING: stoichiometry of special modifications is in subreactions.txt
				# if v["stoich"]:
				# 	org_special_trna_subreactions[k]["stoich"] = v["stoich"]
				if self._is_base_complex_in_list(i,defined_cplxs):
					continue
				defined_cplxs.append(i)

	def _is_base_complex_in_list(self,cplx,lst):
		return cplx in set(i.split('_mod_')[0] for i in lst)

	def update_rna_modification_from_homology(self):
		ref_rna_modification = self.ref.rna_modification_df
		org_rna_modification = self.org.rna_modification_df
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for mod,row in ref_rna_modification.iterrows():
			enzymes = set(row['enzymes'].split('AND'))
			positions = row['positions'].split(',')
			if mod in org_rna_modification.index:
				df = org_rna_modification.loc[[mod]]
				mod_type = row['type']
				if df['type'].str.contains(mod_type).any():
					continue
			hits = enzymes.intersection(set(ref_cplx_homolog.keys()))
			if len(hits) == len(enzymes):
				row = row.copy()
				row['enzymes'] = ' AND '.join([ref_cplx_homolog[i] for i in enzymes])
				row['positions'] = ','.join(positions)
				row['source'] = 'Homology'
				df = pandas.DataFrame(row).T
				org_rna_modification = pandas.concat([org_rna_modification,df])
		org_rna_modification.index.name = 'modification'
		self.org.rna_modification_df = org_rna_modification

# 	def update_rna_modification_from_homology(self):
# 		ref_rna_modification = self.ref.rna_modification
# 		org_rna_modification = self.org.rna_modification
# 		ref_cplx_homolog = self.homology.ref_cplx_homolog
# 		defined_mods = set()
# 		for _,v in org_rna_modification.items():
# 			for i in v:
# 				defined_mods.add(i)
# 		for k, v in tqdm.tqdm(ref_rna_modification.items(),
# 					'Updating RNA modification machinery from homology...',
# 					bar_format = bar_format,
# 					total=len(ref_rna_modification)):
# 			if k not in ref_cplx_homolog: continue
# 			org_cplx = ref_cplx_homolog[k]
# 			if self._is_base_complex_in_list(org_cplx,list(org_rna_modification.keys())):
# 				continue
# 			if org_cplx not in org_rna_modification:
# 				org_rna_modification[org_cplx] = []
# 			org_rna_modification[org_cplx] += [i for i in v.copy() if i not in defined_mods]
# 			org_rna_modification[org_cplx] = list(set(org_rna_modification[org_cplx]))
# 		org_rna_modification = {k:v for k,v in org_rna_modification.items() if v}

	def update_lipid_modifications_from_homology(self):
		ref_lipid_modifications = self.ref.lipid_modifications
		org_lipid_modifications = self.org.lipid_modifications
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_lipid_modifications.items(),
					'Updating lipid modification machinery from homology...',
					bar_format = bar_format,
					total=len(ref_lipid_modifications)):
			if k not in org_lipid_modifications:
				org_lipid_modifications[k] = []
			for i in v:
				if i not in ref_cplx_homolog: continue
				org_cplx = ref_cplx_homolog[i]
# 				if org_cplx not in org_lipid_modifications[k]:
				if self._is_base_complex_in_list(org_cplx,org_lipid_modifications[k]):
					continue
				org_lipid_modifications[k].append(org_cplx)

	def update_transcription_subreactions_from_homology(self):
		ref_transcription_subreactions = self.ref.transcription_subreactions
		org_transcription_subreactions = self.org.transcription_subreactions
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_transcription_subreactions.items(),
					'Updating transcription subreactions machinery from homology...',
					bar_format = bar_format,
					total=len(ref_transcription_subreactions)):
			ref_cplxs = v["enzymes"]
			defined_cplxs = org_transcription_subreactions[k]["enzymes"]
			if defined_cplxs:
				continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if self._is_base_complex_in_list(i,defined_cplxs):
					continue
				defined_cplxs.append(i)

	def update_translocation_pathways_from_homology(self):
		ref_translocation_pathways = self.ref.translocation_pathways
		org_translocation_pathways = self.org.translocation_pathways
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		for k, v in tqdm.tqdm(ref_translocation_pathways.items(),
					'Updating translocation machinery from homology...',
					bar_format = bar_format,
					total=len(ref_translocation_pathways)):
			ref_cplxs = v["enzymes"]
			if k not in org_translocation_pathways:
				org_translocation_pathways[k] = {"enzymes":[]}#v.copy()
			defined_cplxs = org_translocation_pathways[k]["enzymes"]
			if defined_cplxs:
				continue
			org_cplxs = [
				ref_cplx_homolog[i] for i in ref_cplxs if i in ref_cplx_homolog
			]
			for i in org_cplxs:
				if self._is_base_complex_in_list(i,defined_cplxs):
					continue
				defined_cplxs.append(i)

	def update_m_model(self):
		org_model = self.org.m_model
		ref_model = self.ref.m_model

		for m in tqdm.tqdm(org_model.metabolites,
					'Fixing M-model metabolites with homology...',
					bar_format = bar_format):
			if m.id not in ref_model.metabolites:
				continue
			ref_m = ref_model.metabolites.get_by_id(m.id)
			if not m.formula:
				m.formula = ref_m.formula

	def update_subreaction_matrix(self):
		org_subreaction_matrix = self.org.subreaction_matrix
		if not org_subreaction_matrix.empty:return
		ref_subreaction_matrix = self.ref.subreaction_matrix
		org_model = self.org.m_model
		ref_model = self.ref.m_model
		ref_cplx_homolog = self.homology.ref_cplx_homolog
		warn_mets = []
		warn_cplxs = []
		for subrxn,row in tqdm.tqdm(ref_subreaction_matrix.iterrows(),
					'Updating subreaction matrix with homology...',
					bar_format = bar_format,
					total=ref_subreaction_matrix.shape[0]):
			d = {}
			d[subrxn] = {'Metabolites':'','Stoichiometry':''}
			met = row['Metabolites'].split('_mod_')[0]
			if met in self.ref.complexes_df.index:
				if met in ref_cplx_homolog:
					if 'mod' in row['Metabolites']:
						mods = '_mod_' + '_mod_'.join(row['Metabolites'].split('_mod_')[1:])
					else:
						mods = ''
					d[subrxn]['Metabolites'] = ref_cplx_homolog[met] + mods
				else:
					warn_cplxs = []
					d[subrxn]['Metabolites'] = 'CPLX_dummy'
				d[subrxn]['Stoichiometry'] = row['Stoichiometry']
			else:
				if not org_model.metabolites.has_id(met):
					warn_mets.append(met)
				d[subrxn]['Metabolites'] = met
				d[subrxn]['Stoichiometry'] = row['Stoichiometry']
			org_subreaction_matrix = \
				pandas.concat([org_subreaction_matrix,
							  pandas.DataFrame.from_dict(d).T],
							 axis = 0, join = 'outer')
		self.org.subreaction_matrix = org_subreaction_matrix
		self.org.subreaction_matrix.index.name = 'Reaction'
# 		self.org.subreaction_matrix.to_csv(self.org.directory + 'subreaction_matrix.txt')

		if warn_mets:
			self.curation_notes['update_subreaction_matrix'].append({
				'msg':'Some metabolites in subreaction_matrix were added from reference but are not in M-model',
				'triggered_by':warn_mets,
				'importance':'high',
				'to_do':'Map these metabolites or replace the subreaction'})
		if warn_cplxs:
			self.curation_notes['update_subreaction_matrix'].append({
				'msg':'Some complexes in subreaction_matrix of reference could not be mapped',
				'triggered_by':warn_cplxs,
				'importance':'high',
				'to_do':'Map these complexes or replace the subreaction'})

	def update_from_homology(self):
		self.update_enzyme_stoichiometry()
		self.update_protein_modification()
		self.update_TU_df()
		self.update_translocation_pathways_from_homology()
		self.protein_location_from_homology()
		self.update_translocation_multipliers()
		self.update_lipoprotein_precursors()
		self.update_cleaved_methionine()
		self.update_me_mets()
		self.update_generics_from_homology()
		self.update_folding_dict_from_homology()
		self.update_ribosome_subreactions_from_homology()
# 		self.update_rrna_modifications_from_homology()
		self.update_amino_acid_trna_synthetases_from_homology()
		self.update_peptide_release_factors_from_homology()
		self.update_transcription_subreactions_from_homology()
		self.update_initiation_subreactions_from_homology()
		self.update_elongation_subreactions_from_homology()
		self.update_termination_subreactions_from_homology()
		self.update_special_trna_subreactions_from_homology()
		self.update_rna_degradosome_from_homology()
		self.update_excision_machinery_from_homology()
		self.update_special_modifications_from_homology()
		self.update_rna_modification_from_homology()
		self.update_lipid_modifications_from_homology()
		self.update_m_model()
		self.update_subreaction_matrix()

	def fill(self, fill_with='CPLX_dummy'):
		coralme.builder.troubleshooting.fill_builder(self,fill_with='CPLX_dummy')

	def check(self):
		t_pathways = self.org.protein_location['translocase_pathway'].unique()

		file_t_paths = set()
		for t in t_pathways:
			for i in t:
				file_t_paths.add(i)

		defined_t_paths = set()
		for t in tqdm.tqdm(self.org.translocation_pathways.keys(),
					'Checking defined translocation pathways...',
					bar_format = bar_format):
			defined_t_paths.add(coralme.builder.dictionaries.pathway_to_abbreviation[t])
		missing_pathways = file_t_paths - defined_t_paths
		if missing_pathways:
			self.org.curation_notes['check'].append({
									'msg':'Some translocase pathways in org.protein_location are not defined in org.translocation_pathways.',
									'triggered_by':list(missing_pathways),
									'importance':'high',
									'to_do':'Fill in translocation pathways in org.translocation_pathways or in translocation_pathways.txt'
				})

		me_mets = self.org.me_mets
		warn_mets = list(me_mets[me_mets['type'] == 'CURATE'].index)
		# Warnings
		if warn_mets:
			self.org.curation_notes['check'].append({
				'msg':'Some metabolites in me_metabolites.txt need curation',
				'triggered_by':warn_mets,
				'importance':'medium',
				'to_do':'Map or remove these metabolites in me_metabolites.txt'})

	def load(self, directory):
		with open(directory, "rb") as f:
			tmp = pickle.load(f)
			return tmp

	def save(self, directory=False):
		if not directory:
			directory = self.org.directory + "builder.pickle"
		with open(directory, "wb") as f:
			pickle.dump(self, f)

# 	def load_me(self,filename='me_model.pickle'):
# 		with open(self.org.directory + '/'+filename, "rb") as f:
# 			return pickle.load(f)

	def save_builder_info(self):
		include = [float,int,str,pandas.DataFrame,dict]
		floats = {}
		dataframes = {}
		for i in dir(self.org):
			if i[0] == '_':
				continue
			attr = getattr(self.org,i)
			for c in include:
				if isinstance(attr,c):
					if c == float:
						floats[i] = attr
					elif c == dict:
						dataframes[i] = pandas.DataFrame.from_dict({'value':attr})
					elif c == pandas.DataFrame:
						dataframes[i] = attr
					break
		dataframes['parameters'] = pandas.DataFrame.from_dict({'value':floats})

		directory = self.org.directory + 'builder_info/'
		if not os.path.exists(directory):
			os.mkdir(directory)
		for k,v in dataframes.items():
			v.to_csv(directory + k + '.txt')

	# shortcuts to methods in the MECurator class
	def find_issue(self,query):
		coralme.builder.curation.MECurator(self.org).find_issue_with_query(query)

	# shortcuts to methods in the MEReconstruction and METroubleshooter classes
	def build_me_model(self, update = True, prune = True, overwrite = False, skip = None):
		coralme.builder.main.MEReconstruction(self).build_me_model(update = update, prune = prune, overwrite = overwrite, skip = skip)

	def troubleshoot(self, growth_key_and_value = None, skip = set(), guesses = set(), met_types = set(), platform = None, solver = 'qminos', savefile = None, gapfill_cofactors = False):
		"""
		growth_key_and_value:
			dictionary of Sympy.Symbol and value to replace

		skip:
			set of ME-components to not evaluate during gapfilling

		guesses:
			set of ME-components to try first before any other set of components

		solver:
			'qminos', 'gurobi' or 'cplex'

		savefile:
			file path (absolute or relative) to save the ME-model as a pickle file
		"""

		coralme.builder.main.METroubleshooter(self).troubleshoot(growth_key_and_value, skip = skip, guesses = guesses, platform = platform, solver = solver, savefile = savefile,gapfill_cofactors=gapfill_cofactors)
		coralme.builder.notes.save_curation_notes(
				self.curation_notes,
				self.configuration['out_directory'] + '/curation_notes.json'
			)
		coralme.builder.notes.publish_curation_notes(
				self.curation_notes,
				self.configuration['out_directory']+ '/curation_notes.txt'
			)

	def input_data(self, gem, overwrite):
		tmp1, tmp2 = coralme.builder.main.MEReconstruction(self).input_data(gem, overwrite)
		self.df_tus, self.df_rmsc, self.df_subs, self.df_mets, self.df_keffs = tmp1
		self.df_data, self.df_rxns, self.df_cplxs, self.df_ptms, self.df_enz2rxn, self.df_rna_mods, self.df_protloc, self.df_transpaths = tmp2
		return tmp1, tmp2



[docs]
class MEReconstruction(MEBuilder):
	"""
	MEReconstruction class for reconstructing a ME-model from user/automated input

	Parameters
	----------
	MEBuilder : coralme.builder.main.MEBuilder

	"""
	def __init__(self, builder):
		# only if builder.generate_files() was run before builder.build_me_model()
		if hasattr(builder, 'org'):
			self.org = builder.org
		if hasattr(builder, 'ref'):
			self.ref = builder.ref
		if hasattr(builder, 'homology'):
			self.homology = builder.homology
		if hasattr(builder, 'df_data'):
			self.df_data = builder.df_data
			self.df_tus = builder.df_tus
			self.df_rmsc = builder.df_rmsc
			self.df_subs = builder.df_subs
			self.df_mets = builder.df_mets
			self.df_keffs = builder.df_keffs
			self.df_rxns = builder.df_rxns
			self.df_cplxs = builder.df_cplxs
			self.df_ptms = builder.df_ptms
			self.df_enz2rxn = builder.df_enz2rxn
			self.df_rna_mods = builder.df_rna_mods
			self.df_protloc = builder.df_protloc
			self.df_transpaths = builder.df_transpaths
		if hasattr(builder, 'debug_data'):
			self.debug_data = builder.debug_data

		self.logger = builder.logger
		self.configuration = builder.configuration
		# reboot me_model
		if len(builder.me_model.reactions) == 1 and len(builder.me_model.metabolites):
			self.me_model = builder.me_model
		else:
			self.me_model = coralme.core.model.MEModel(name = self.configuration.get('ME-Model-ID', 'coralME'), mu = self.configuration.get('growth_key', 'mu'))
		self.curation_notes = builder.curation_notes

	def input_data(self, m_model, overwrite = False):
		if hasattr(self, 'df_data'):
			return (self.df_tus, self.df_rmsc, self.df_subs, self.df_mets, self.df_keffs), (self.df_data, self.df_rxns, self.df_cplxs, self.df_ptms, self.df_enz2rxn, self.df_rna_mods, self.df_protloc, self.df_transpaths)

		config = self.configuration

		# Inferred information
		if hasattr(self, 'org'):
			config['selenocysteine_enzymes'] = self.org.special_trna_subreactions['sec_addition_at_UGA']['enzymes']
			logging.warning('The selenocysteine complex SelAB was set from homology data.')

			config['pg_pe_160'] = self.org.lipid_modifications.get('pg_pe_160', 'CPLX_dummy')
			logging.warning('The prolipoprotein diacylglyceryl transferase and the signal peptidase homologs were set from homology data.')

			config['other_lipids'] = self.org.lipid_modifications.get('other_lipids', 'CPLX_dummy')
			logging.warning('The apolipoprotein N-acyltransferase homolog was set from homology data.')

			lst = self.org.generic_dict.get('generic_fes_transfers_complex', {'enzymes' : ['CPLX_dummy']})['enzymes'] # ecolime = ['CPLX0-7617', 'CPLX0-7824', 'IscA_tetra']
# 			if len(lst) != 3:
# 				lst = lst + ['CPLX_dummy'] * (3 - len(lst))
			config['complex_cofactors']['fes_transfers'] = lst
			logging.warning('The iron-sulfur cluster insertion homologs were set from homology data.')

			self.org.get_reaction_keffs() # saves a file to <out_directory>/building_data/reaction_median_keffs.txt
# 			config['df_reaction_keff_consts'] = config.get('out_directory', '.') + '/building_data/reaction_median_keffs.txt'

		# include rna_polymerases, lipids and lipoproteins from automated info and save new configuration file
		if config.get('rna_polymerases', None) is None or config.get('rna_polymerases') == {}:
			if hasattr(self, 'org'):
				config['default_sigma_factor'] = self.org.rpod
				config['rna_polymerases'] = self.org.rna_polymerase_id_by_sigma_factor
				logging.warning('The RNA Polymerases (core enzyme and sigma factors) information was set from homology data.')

			## replace IDs
			#for name, rnap in config['rna_polymerases'].items():
				#if hasattr(self, 'homology'):
					#for key, value in rnap.items():
						#config['rna_polymerases'][name][key] = self.homology.org_cplx_homolog.get(value, value)
						#config['rna_polymerases'][name][key] = self.homology.org_cplx_homolog.get(value, value.replace('-MONOMER', '_MONOMER'))

		if config.get('lipid_modifications', None) is None or len(config.get('lipid_modifications')) == 0:
			if hasattr(self, 'org'):
				config['lipid_modifications'] = [ x for x in self.org.lipids if x.endswith('_p') and x.startswith('pg') and not x.startswith('pgp') ]
				logging.warning('The lipid modifications were set from M-model metabolites.')

		if config.get('lipoprotein_precursors', None) is None or len(config.get('lipoprotein_precursors')) == 0:
			if hasattr(self, 'org'):
				config['lipoprotein_precursors'] = self.org.lipoprotein_precursors
				logging.warning('The lipoprotein precursors were set from homology data.')

		if config.get('ngam', None) is None:
			if hasattr(self, 'org'):
				config['ngam'] = self.org.NGAM
				logging.warning('The ATPM value (ATP requirement for maintenance; also NGAM) was set from the M-model.')

		if config.get('gam', None) is None:
			if hasattr(self, 'org'):
				config['gam'] = self.org.GAM
				logging.warning('The GAM value (ATP requirement for growth) was set from the M-model or default value.')

		# modify options
		#config['create_files'] = False
		#config['run_bbh_blast'] = False
		#config['dev_reference'] = False

		if hasattr(self, 'org') and len(config.get('translocation_multipliers', {})) == 0:
			config['translocation_multipliers'] = { k:{ k:v for k,v in v.items() if v != 0 } for k,v in self.org.translocation_multipliers.items() }
			logging.warning('The translocation multipliers for yidC and tat homologs were set from homology data.')

		if hasattr(self, 'org') and len(config.get('amino_acid_trna_synthetase', {})) == 0:
			config['amino_acid_trna_synthetase'] = self.org.amino_acid_trna_synthetase
			logging.warning('The tRNA synthetases were set from homology data.')

		if hasattr(self, 'org') and len(config.get('defer_to_rxn_matrix', [])) == 0:
			config['defer_to_rxn_matrix'] = [self.org.biomass] if self.org.biomass is not None else []
			logging.warning('The biomass reaction will be skipped during the ME reconstruction steps.')

		if hasattr(self, 'org') and len(config.get('peptide_release_factors', [])) == 0:
			config['peptide_release_factors'] = { k:v['enzyme'] for k,v in self.org.peptide_release_factors.items() }
			logging.warning('The peptide release factors were set from homology data.')

		if not 'FMETTRS' in config.get('defer_to_rxn_matrix', []):
			config['defer_to_rxn_matrix'].append('FMETTRS')
			logging.warning('The FMETTRS reaction from the M-model will be replaced by a SubReaction during the ME-model reconstruction steps.')

		if not 'ATPM' in config.get('defer_to_rxn_matrix', []):
			config['defer_to_rxn_matrix'].append('ATPM')
			logging.warning('The ATPM reaction from the M-model will be replaced by a SummaryVariable during the ME-model reconstruction steps.')

		if hasattr(self, 'org') and len(config.get('braun\'s_lipoproteins', [])) == 0:
			lst = [ k.split('_mod_')[0] for k,v in self.org.protein_mod.to_dict()['Modifications'].items() if 'palmitate' in v ]
			config['braun\'s_lipoproteins'] = lst if isinstance(lst, list) else [lst]
			if len(lst) != 0:
				logging.warning('The Braun\'s lipoprotein homologs list was set to \'{:s}\'.'.format(', '.join(lst)))

		def read(filecode, input_type, default, columns = []):
			filename = config.get(filecode, '')
			if pathlib.Path(filename).is_file():
				file_to_read = filename
			else:
				#return pandas.DataFrame(columns = columns)
				if filecode == 'df_reaction_keff_consts':
					return pandas.DataFrame(columns = columns)
				file_to_read = '{:}/building_data/{:s}'.format(config.get('out_directory', '.'), default)

			df = coralme.builder.flat_files.read(file_to_read)
			if set(columns).issubset(set(df.columns)):
				return df
			else:
				#logging.warning('Column names in \'{:s}\' does not comply default values: {:s}.'.format(filename, ', '.join(columns)))
				raise Exception('Column names in \'{:s}\' does not comply default values: {:s}.'.format(filename, ', '.join(columns)))

		# User inputs
		# Transcriptional Units
		cols = ['TU_id', 'replicon', 'genes', 'start', 'stop', 'tss', 'strand', 'rho_dependent', 'rnapol']
		df_tus = read('df_TranscriptionalUnits', 'transcriptional units data', 'TUs_from_biocyc.txt', cols)
		df_tus = df_tus.set_index('TU_id', inplace = False)

		# Reaction Matrix: reactions, metabolites, compartments, stoichiometric coefficients
		cols = ['Reaction', 'Metabolites', 'Stoichiometry']
		df_rmsc = read('df_matrix_stoichiometry', 'reaction stoichiometry data', 'reaction_matrix.txt', cols)

		# SubReaction Matrix: subreactions, metabolites, compartments, stoichiometric coefficients
		cols = ['Reaction', 'Metabolites', 'Stoichiometry']
		df_subs = read('df_matrix_subrxn_stoich', 'subreaction stoichiometry data', 'subreaction_matrix.txt', cols)

		# Orphan and Spontaneous reaction metadata
		cols = ['name', 'description', 'subsystems', 'is_reversible', 'is_spontaneous']
		df_rxns = read('df_metadata_orphan_rxns', 'new reactions metadata', 'orphan_and_spont_reactions.txt', cols)
		df_rxns = df_rxns.set_index('name', inplace = False)

		# Metabolites metadata
		cols = ['id', 'me_id', 'name', 'formula', 'compartment', 'type']
		df_mets = read('df_metadata_metabolites', 'new metabolites metadata', 'me_metabolites.txt', cols)
		df_mets = df_mets.set_index('id', inplace = False)

		# Effective turnover rates
		cols = ['reaction', 'direction', 'complex', 'mods', 'keff']
		df_keffs = read('df_reaction_keff_consts', 'effective turnover rates', 'reaction_median_keffs.txt', cols)

		# set new options in the MEBuilder object
		self.configuration.update(config)

		# detect if the genbank file was modified using biocyc data
		gb = '{:s}/building_data/genome_modified.gb'.format(config.get('out_directory', '.'))
		config['genbank-path'] = gb if pathlib.Path(gb).exists() else config['genbank-path']

		if overwrite:
			new = config.get('new_config_file', 'coralme-config.yaml')
			yaml = new if new.endswith('.yaml') else '{:s}.yaml'.format(new)
			with open('{:s}/{:s}'.format(config.get('out_directory', '.'), yaml), 'w') as outfile:
				anyconfig.dump(config, outfile)
			logging.warning('New configuration file \'{:s}\' was written with inferred options.'.format(yaml))
			#with open('{:s}/{:s}'.format(config.get('out_directory', '.'), new), 'w') as outfile:
				#anyconfig.dump(config, outfile)
			#with open('{:s}/{:s}'.format(config.get('out_directory', '.'), new), 'w') as outfile:
				#anyconfig.dump(config, outfile)

		# Drop-in replacement of input files:
		# step1: reactions, complexes, modification of complexes, enzyme-to-reaction mapping
		# step2a: generics, dnap stoichiometry, ribosome stoichiometry, degradosome stoichiometry, tRNA ligases, RNA modifications
		# step2b: folding pathways (DnaK, GroEL), N-terminal Methionine Processing, translocation pathways
		config['df_gene_cplxs_mods_rxns'] = './automated-org-with-refs.xlsx' if config.get('df_gene_cplxs_mods_rxns', '') == '' else config['df_gene_cplxs_mods_rxns']
		filename = config['df_gene_cplxs_mods_rxns']
		if overwrite:
			try:
				pathlib.Path(filename).unlink(missing_ok = True) # python>=3.8
			except:
				if pathlib.Path(filename).exists():
					pathlib.Path(filename).unlink() # python==3.7

		if pathlib.Path(filename).is_file() and filename.endswith('.xlsx'):
			df_data = pandas.read_excel(filename, dtype = str).dropna(how = 'all')
		elif pathlib.Path(filename).is_file() and filename.endswith('.txt'):
			df_data = pandas.read_csv(filename, sep = '\t', header = 0, dtype = str).dropna(how = 'all')
		else:
			# detect if the genbank file was modified using biocyc data
			gb = '{:s}/building_data/genome_modified.gb'.format(config.get('out_directory', '.'))
			gb = gb if pathlib.Path(gb).exists() else config['genbank-path']
			coralme.core.extended_classes.ListHandler.print_and_log('Writting the Organism-Specific Matrix to {:s}...'.format(config['df_gene_cplxs_mods_rxns']))
			# generate a minimal dataframe from the genbank and m-model files
			df_data = coralme.builder.preprocess_inputs.generate_organism_specific_matrix(gb, config.get('locus_tag', 'locus_tag'), model = m_model)
			# complete minimal dataframe with automated info from homology
			df_data = coralme.builder.preprocess_inputs.complete_organism_specific_matrix(self, df_data, model = m_model, output = filename)
			coralme.core.extended_classes.ListHandler.print_and_log('Organism-Specific Matrix saved to {:s} file.'.format(filename))

		# All other inputs and remove unnecessary genes from df_data
		return (df_tus, df_rmsc, df_subs, df_mets, df_keffs), coralme.builder.preprocess_inputs.get_df_input_from_excel(df_data, df_rxns)

	def build_me_model(self, update = True, prune = True, overwrite = False, skip = None):
		"""Performs the Build step of the reconstruction.

		This function will read the synchronized and complemented information
		in the OSM and build a ME-model.

		Parameters
		----------
		update : bool
			If True, runs the update method of all reactions after building.
		prune : bool
			If True, prunes unused reactions and metabolites after building.
		overwrite : bool
			If True, overwrites the OSM and other configuration files.

		"""
		config = self.configuration
		model = config.get('ME-Model-ID', 'coralME')
		out_directory = config.get('out_directory', '.')
		if not os.path.exists(out_directory):
			os.mkdir(out_directory)

		log_directory = config.get('log_directory', '.')
		if not os.path.exists(log_directory):
			os.mkdir(log_directory)

		# ## Part 1: Create a minimum solvable ME-model
		# set logger
		log = logging.getLogger() # root logger
		for hdlr in log.handlers[:]: # remove all old handlers
			log.removeHandler(hdlr)

		# Old code works in a separate script; but it works if we remove the old handler
		logging.basicConfig(filename = '{:s}/MEReconstruction-step1-{:s}.log'.format(log_directory, model), filemode = 'w', level = logging.WARNING, format = log_format)
		log.addHandler(self.logger['MEReconstruction-step1'])
		#log.addHandler(logging.StreamHandler(sys.stdout))
		logging.captureWarnings(True)

		coralme.core.extended_classes.ListHandler.print_and_log("Initiating ME-model reconstruction...")

		# This will include the bare minimum representations of that still produce a working ME-model:
		# - Metabolic Reactions
		# - Transcription Reactions
		# - Translation Reactions
		# - Complex Formation Reactions
		#
		# ### 1) Create a MEModel object and populate its global info
		# This includes important parameters that are used to calculate coupling constraints as well as organism-specific information such as compartments, GC fraction, etc.

		me = self.me_model

		# update default options with user-defined values
		me.global_info.update(self.configuration)

		# Define the types of biomass components that will be synthesized in the model
		me.add_biomass_constraints_to_model(me.global_info['biomass_constraints'])

		# Define ME-model compartments
		me._compartments = me.global_info.get('compartments', {})
		if 'mc' not in me._compartments.keys():
			me._compartments['mc'] = 'ME-model Constraint'
			logging.warning('Pseudo-compartment \'mc\' (\'ME-model Constraint\') was added into the ME-model.')

		# Define M-model
		if hasattr(self, 'org'):
			me.gem = self.org.m_model
		else:
			if me.global_info['m-model-path'].endswith('.json'):
				me.gem = cobra.io.load_json_model(me.global_info['m-model-path'])
			else:
				me.gem = cobra.io.read_sbml_model(me.global_info['m-model-path'])

		# remove unused genes, reactions, and metabolites
		cobra.manipulation.delete.remove_genes(me.gem, [ x for x in me.gem.genes if len(x.reactions) == 0 ], remove_reactions = False)
		coralme.core.model.MEModel.prune_unused_reactions(me.gem) # reactions without metabolites
		# coralme.core.model.MEModel.prune_unused_metabolites(me.gem) # orphan metabolites

		# update default options with missing, automated-defined values
		me.global_info.update(self.configuration)

		# Read user inputs
		tmp1, tmp2 = coralme.builder.main.MEReconstruction.input_data(self, me.gem, overwrite)
		(df_tus, df_rmsc, df_subs, df_mets, df_keffs), (df_data, df_rxns, df_cplxs, df_ptms, df_enz2rxn, df_rna_mods, df_protloc, df_transpaths) = tmp1, tmp2

		#TODO: Iterate tmp1 and tmp2 and warn empty dataframes

		me.internal_data = {}
		for key in ['df_tus', 'df_rmsc', 'df_subs', 'df_mets', 'df_keffs', 'df_data', 'df_rxns', 'df_cplxs', 'df_ptms', 'df_enz2rxn', 'df_rna_mods', 'df_protloc', 'df_transpaths']:
			exec('me.internal_data[\'{:s}\'] = {:s}'.format(key, key))

		# Remove default ME-model SubReactions from global_info that are not mapped in the organism-specific matrix
		subrxns = set(df_data[df_data['ME-model SubReaction'].notnull()]['ME-model SubReaction'])

		# list of subreactions
		for key in ['peptide_processing_subreactions']:
			me.global_info[key] = list(set(me.global_info[key]).intersection(subrxns))

		# dictionary mapping subreactions and stoichiometry
		for key in ['transcription_subreactions', 'translation_subreactions']:
			me.global_info[key] = { k:v for k,v in me.global_info[key].items() if k in subrxns }

		# ### 2) Load metabolites and build Metabolic reactions
		#
		# It creates a new M-model, then incorporates it into the ME-model using the *add_m_model_content* function. Reactions are added directly and metabolites are added as *StoichiometricData* (*me.stoichiometric_data*).
		#
		# Different metabolite types have different properties in a ME-model, so complexes are added to the model as a *ComplexData*, not as a *Metabolite*. Components in the M-model that are actually *Complexes* are compiled in the *cplx_lst* variable.

		# Modify M-model
		m_model = coralme.builder.flat_files.process_m_model(
			m_model = me.gem,
			rxns_data = df_rxns, # metadata of new reactions
			mets_data = df_mets, # metadata of new metabolites
			cplx_data = df_cplxs, # metadata of prot-prot/prot-rna complexes
			reaction_matrix = df_rmsc, # stoichiometric coefficients of new! reactions

			me_compartments = { v:k for k,v in me._compartments.items() }, repair = True,
			defer_to_rxn_matrix = me.global_info['defer_to_rxn_matrix'])

		me.processed_m_model = m_model

		# Some of the 'metabolites' in the M-model are actually complexes.
		# We pass those in so they get created as complexes, not metabolites.
		cplx_dct = coralme.builder.flat_files.get_complex_subunit_stoichiometry(df_cplxs)
		cplx_lst = [ i.id for i in m_model.metabolites if i.id.split('_mod_')[0] in cplx_dct.keys() ]

		# Complexes in `cplx_lst` are added as a coralme.core.component.Complex object
		# Other `metabolites` are added as a coralme.core.component.Metabolite object
		coralme.util.building.add_m_model_content(me, m_model, complex_metabolite_ids = set(cplx_lst))

		# NEW! Add metabolic subreactions (e.g. 10-Formyltetrahydrofolate:L-methionyl-tRNA N-formyltransferase)
		rxn_to_cplx_dict = coralme.builder.flat_files.get_reaction_to_complex(m_model, df_enz2rxn)

		reaction_matrix_dict = coralme.builder.flat_files.process_reaction_matrix_dict(
			df_subs, cplx_data = df_cplxs, me_compartments = { v:k for k,v in me._compartments.items() })

		for subreaction, stoichiometry in reaction_matrix_dict.items():
			coralme.util.building.add_subreaction_data(me, subreaction, stoichiometry, rxn_to_cplx_dict[subreaction])

		# ### 3) Add Transcription and Translation reactions
		#
		# To construct the bare minimum components of a transcription and translation reactions.
		# For example, transcription reactions at this point include nucleotides and the synthesized RNAs.

		# RNA and protein names are prefixed in the ME-model following then the locus tag
		lst = set(df_data['Gene Locus ID'].str.replace('^protein_', '', regex = True).str.replace('^RNA_', '', regex = True).tolist())

		# detect if the genbank file was modified using biocyc data
		gb = '{:s}/building_data/genome_modified.gb'.format(out_directory)
		gb = gb if pathlib.Path(gb).exists() else config['genbank-path']

		coralme.util.building.build_reactions_from_genbank(
			#me_model = me, gb_filename = me.global_info['genbank-path'],
			me_model = me, gb_filename = gb,
			tu_frame = df_tus, genes_to_add = lst, update = True, verbose = True,
			feature_types = me.global_info['feature_types'],
			trna_misacylation = me.global_info['trna_misacylation'],
			genome_mods = me.global_info['genome_mods'],
			knockouts = me.global_info['knockouts'])

		# ### 4) Add in ComplexFormation reactions without modifications (for now)

		# RNA components different from tRNAs and from 5S, 16S and 23S rRNAs
		rna_components = set(me.global_info['rna_components']) # in order: RNase_P_RNA, SRP_RNA, 6S RNA

		# cplx_dct is a dictionary {complex_name: {locus_tag/generic_name/subcomplex_name: count}}
		cplx_dct = coralme.builder.flat_files.get_complex_subunit_stoichiometry(df_cplxs, rna_components)

		# mods_dct is a dictionary {complex_name_with_mods: {core_enzyme: complex_name, modifications: {cofactor: stoichiometry}}
		mods_dct = coralme.builder.flat_files.get_complex_modifications(
			reaction_matrix = df_rmsc,
			protein_complexes = df_cplxs,
			complex_mods = df_ptms,
			compartments = { v:k for k,v in me._compartments.items() })

		# Add complexes into the ME-model as coralme.core.processdata.ComplexData objects.
		# Modifications are added as SubReactions
		coralme.util.building.add_model_complexes(me, cplx_dct, mods_dct)

		# Remove modifications. They will be added back in later (See Building Step 2, subsection 3).
		for data in tqdm.tqdm(list(me.complex_data), 'Removing SubReactions from ComplexData...', bar_format = bar_format):
			data.subreactions = {}

		# Add ComplexFormation reactions for each of the ComplexData
		for data in tqdm.tqdm(list(me.complex_data), 'Adding ComplexFormation into the ME-model...', bar_format = bar_format):
			formation = data.formation
			if formation:
				formation.update()
			elif data.id != 'CPLX_dummy':
				data.create_complex_formation()
				logging.warning('Added ComplexFormation for \'{:s}\'.'.format(data.id))
			else:
				pass

		# ### 5) Add *GenericData* and its reactions
		# Multiple entities can perform the same role. To prevent a combinatorial explosion, we create "generic" versions of the components, where any of those entities can fill in.
		#
		# This step was originally in the second part of the builder process, and was moved here to be able to use generics in enzymes associated to metabolic reactions.

		generics = coralme.builder.preprocess_inputs.get_generics(df_data)
		for generic, components in tqdm.tqdm(generics, 'Adding Generic(s) into the ME-model...', bar_format = bar_format):
			if 'generic_fes_transfers_complex' in generic:
				continue
			coralme.core.processdata.GenericData(generic, me, components).create_reactions()

		# ### 6) Add dummy reactions to model and the *unmodeled_protein_fraction* constraint
		#
		# This includes the Transcription, Translation, ComplexFormation, and Metabolic reactions for a dummy RNA/protein/complex. Sequence for *dummy RNA* is based on the prevalence of each codon found in the genbank file.

		coralme.util.building.add_dummy_reactions(me, me.global_info['transl_tables']['c'], update = True)

		# The 'dummy protein' is associated to orphan reactions.
		# This ensures that orphan reactions will not become favored to fulfil unmodeled protein fraction requirement.
		rxn = coralme.core.reaction.SummaryVariable('dummy_protein_to_mass')
		me.add_reactions([rxn])
		mass = me.metabolites.protein_dummy.formula_weight / 1000. # in kDa
		met = coralme.core.component.Constraint('unmodeled_protein_biomass')
		rxn.add_metabolites({'protein_biomass': -mass, 'protein_dummy': -1, met: mass})

		# WARNING: DO NOT REMOVE. This is needed if troubleshooter adds sink reactions for 4fe4s/2fe2s
		# Add CPLX_dummy_mod_2fe2s(1) and CPLX_dummy_mod_4fe4s(1) if fes_transfers is {'CPLX_dummy'}
		if 'CPLX_dummy' in me.global_info['complex_cofactors']['fes_transfers']:
			for fes in ['2fe2s', '4fe4s']:
				coralme.util.building.add_complex_to_model(me, 'CPLX_dummy_mod_{:s}(1)'.format(fes), { 'protein_dummy' : 1.0, fes + '_c': 1.0})
			# WARNING: do not change escape characters
			me.complex_data.query(r'CPLX_dummy_mod_2fe2s\(1\)')[0].create_complex_formation()
			me.complex_data.query(r'CPLX_dummy_mod_4fe4s\(1\)')[0].create_complex_formation()

		# ### 7) Associate Complexes to Metabolic reactions and build the ME-model metabolic network

		# Associate a reaction id with the ME-model complex id (including modifications)
		rxn_to_cplx_dict = coralme.builder.flat_files.get_reaction_to_complex(m_model, df_enz2rxn)
		spontaneous_rxns = [me.global_info['dummy_rxn_id']] + list(df_rxns[df_rxns['is_spontaneous'].str.match('1|TRUE|True|true')].index.values)
		# Correct rxn_to_cplx_dict with spontaneous reactions
		for key in spontaneous_rxns:
			if rxn_to_cplx_dict.get(key, None) is None:
				rxn_to_cplx_dict[key] = {None}
			else:
				rxn_to_cplx_dict[key].add(None)

		coralme.util.building.add_reactions_from_stoichiometric_data(
			me, rxn_to_cplx_dict, is_spontaneous = spontaneous_rxns, update = True)

		# correct the enzyme list of a subreaction if it matches the component_list of a generic
		# WARNING: This allows the use of generics in subreactions
		# e.g.: acpP_activation is associated to EG12221-MONOMER OR HOLO-ACP-SYNTH-CPLX_mod_mg2(1) OR HOLO-ACP-SYNTH-CPLX_mod_mn2(1)
		# e.g.: EG12221-MONOMER OR HOLO-ACP-SYNTH-CPLX_mod_mg2(1) OR HOLO-ACP-SYNTH-CPLX_mod_mn2(1) is associated to generic_acp_synthase
		for data in list(me.subreaction_data):
			for generic in list(me.generic_data):
				if data.enzyme == set(generic.component_list):
					data.enzyme = set([generic.id])

		# ### 8) Incorporate remaining biomass constituents
		# ### 1. General Demand Requirements
		# There are leftover components from the biomass equation that either:
		# 1. they have no mechanistic function in the model (e.g., *glycogen*)
		# 2. they are cofactors that are regenerated (e.g., *nad*)
		#
		# Applies demands and coefficients from the biomass objective function from the M-model.

		# set values from configuration as properties of the ME-model
		for key in [ 'gam', 'ngam', 'unmodeled_protein_fraction' ]:
			if key in me.global_info:
				setattr(me, key, me.global_info[key])

		# if only one biomass reaction, default ID is 'biomass_constituent_demand'
		me.global_info['biomass_reactions'] = me.global_info.get('biomass_reactions', ['biomass_constituent_demand'])
		biomass_constituents = me.global_info.get('flux_of_biomass_constituents', {})

		try:
			biomass_constituents = pandas.DataFrame(biomass_constituents)
			for biomass_reaction in me.global_info['biomass_reactions']:
				if not biomass_reaction in biomass_constituents.index:
					biomass_constituents.loc[biomass_reaction] = None # add missing conditions
			if 'All' in biomass_constituents.index: # 'All' contains a value that applies to all the missing conditions
				for column in biomass_constituents:
					# get conditions with NaN values
					empty_value_in_conditions = biomass_constituents[biomass_constituents[column].isna()].index
					for idx in empty_value_in_conditions:
						biomass_constituents.loc[idx, column] = biomass_constituents.loc['All', column]
		except:
			biomass_constituents = pandas.DataFrame(biomass_constituents, index = me.global_info['biomass_reactions'])
		biomass_constituents = biomass_constituents.fillna('')
		me.global_info['biomass_constituents'] = biomass_constituents

		for idx, row in biomass_constituents.iterrows():
		# replace IDs. New metabolites x types are created during the processing of the m_model
		# old ID (M-model) : new ID (ME-model)
			dct = df_mets[df_mets['type'].str.contains('REPLACE')].to_dict()['me_id']
			for key in list(row.keys()):
				if key in dct:
					row[dct[key]] = row.pop(key)
					logging.warning('Metabolite \'{:s}\' was replaced with \'{:s}\' in MetabolicReaction \'{:s}\'.'.format(key, dct[key], 'biomass_constituent_demand'))

			# remove metabolites not in the model or without molecular weight
			row = { k:v for k,v in row.items() if me.metabolites.has_id(k) and me.metabolites.get_by_id(k).formula_weight }

			problems = list(set(me.global_info.get('flux_of_biomass_constituents', {})).difference(row))
			if problems:
				logging.warning('The following biomass constituents, biomass \'{:s}\', are not in the ME-model or have no formula: {:s}.'.format(idx, ', '.join(problems)))
				logging.warning('A second attempt to add biomass constituents will be perform after update of formulas.')

			name = 'biomass_constituent_demand' if idx == 'biomass_constituent_demand' else 'biomass_constituent_demand_' + idx
			rxn = coralme.core.reaction.SummaryVariable(name)
			me.add_reactions([rxn])
			# An old bug in the YAML Resolver? YAML files should be written and read by the same version of anyconfig
			# https://stackoverflow.com/questions/30458977/yaml-loads-5e-6-as-string-and-not-a-number
			rxn.add_metabolites({ k:-(abs(float(v))) for k,v in row.items() if isinstance(me.metabolites.get_by_id(k), coralme.core.component.Metabolite) and isinstance(v, (float, int)) })
			rxn.lower_bound = 0. # me.mu # coralme.util.mu
			rxn.upper_bound = 0. # me.mu # coralme.util.mu
			constituent_mass = sum([me.metabolites.get_by_id(c).formula_weight / 1000. * abs(float(v)) for c,v in row.items() if isinstance(v, (float, int)) ])
			rxn.add_metabolites({me.metabolites.get_by_id('constituent_biomass'): constituent_mass})

		# ### 2. Lipid Demand Requirements
		# Metabolites and coefficients from biomass objective function

		lipid_demand = me.global_info.get('flux_of_lipid_constituents', {})
		#for key, value in me.global_info.get('flux_of_lipid_constituents', {}).items():
			#lipid_demand[key] = abs(value)

		try:
			lipid_demand = pandas.DataFrame(lipid_demand)
			for biomass_reaction in me.global_info['biomass_reactions']:
				if not biomass_reaction in lipid_demand.index:
					lipid_demand.loc[biomass_reaction] = None # add missing conditions
			if 'All' in lipid_demand.index: # 'All' contains a value that applies to all the missing conditions
				for column in lipid_demand:
					# get conditions with NaN values
					empty_value_in_conditions = lipid_demand[lipid_demand[column].isna()].index
					for idx in empty_value_in_conditions:
						lipid_demand.loc[idx, column] = lipid_demand.loc['All', column]
		except:
			lipid_demand = pandas.DataFrame(lipid_demand, index = me.global_info['biomass_reactions'])
		lipid_demand = lipid_demand.fillna('')

		if not lipid_demand.empty:
			me.global_info['lipid_demand_per_condition'] = {}
			for met, requirement in lipid_demand.items():
				for condition, value in requirement.items():
					me.global_info['lipid_demand_per_condition'].setdefault(condition, [])
					try:
						component_mass = me.metabolites.get_by_id(met).formula_weight / 1000.
						me.global_info['lipid_demand_per_condition'][condition].append('DM_' + met + '_' + condition)
						rxn = coralme.core.reaction.SummaryVariable('DM_' + met + '_' + condition)
						me.add_reactions([rxn])
						rxn.add_metabolites({met: -1 * abs(value), 'lipid_biomass': component_mass * abs(value)})
						rxn.lower_bound = 0. # me.mu # coralme.util.mu
						rxn.upper_bound = 0. # me.mu # originally 1000.
					except:
						msg = 'Metabolite \'{:s}\' lacks a formula. Please correct it in the M-model or the \'metabolites.txt\' metadata file.'
						logging.warning(msg.format(met))
		else:
			logging.warning('All metabolites in the \'flux_of_lipid_constituents\' configuration key lack their formulae.')

		# set active biomass reaction and lipid reactions
		if me.global_info.get('active_biomass_reaction', False):
			me.active_biomass_reaction = me.global_info['active_biomass_reaction']
		else:
			me.active_biomass_reaction = 'biomass_constituent_demand'

		# ### 3. DNA Demand Requirements
		# Added based on growth rate dependent DNA levels as in [O'brien EJ et al 2013](https://www.ncbi.nlm.nih.gov/pubmed/24084808) (*E. coli* data)
		if me.global_info.get('flux_of_dna_constituents', False):
			# Fraction of each nucleotide in DNA, based on M-model's BOF
			dna_demand_stoich = {
				'datp_c': -abs(me.global_info['flux_of_dna_constituents']['datp_c']),
				'dctp_c': -abs(me.global_info['flux_of_dna_constituents']['dctp_c']),
				'dgtp_c': -abs(me.global_info['flux_of_dna_constituents']['dgtp_c']),
				'dttp_c': -abs(me.global_info['flux_of_dna_constituents']['dttp_c']),
				}
			dna_demand_stoich['ppi_c'] = abs(me.global_info['flux_of_dna_constituents'].get('ppi_c', sum([ v for k,v in dna_demand_stoich.items() if k != 'ppi_c'])))
		else:
			# Fraction of each nucleotide in DNA, based on gc_fraction
			dna_demand_stoich = {
				'datp_c': -((1 - me.global_info['GC_fraction']) / 2),
				'dctp_c': -(me.global_info['GC_fraction'] / 2),
				'dgtp_c': -(me.global_info['GC_fraction'] / 2),
				'dttp_c': -((1 - me.global_info['GC_fraction']) / 2),
				'ppi_c': 1
				}

		dna_replication = coralme.core.reaction.SummaryVariable('DNA_replication')
		me.add_reactions([dna_replication])
		dna_replication.add_metabolites(dna_demand_stoich)

		dna_mw = 0
		dna_mw_no_ppi = coralme.builder.dna_replication.get_dna_mw_no_ppi_dict(me)
		for met, value in me.reactions.get_by_id('DNA_replication').metabolites.items():
			if met.id != 'ppi_c':
				dna_mw -= value * dna_mw_no_ppi[met.id.replace('_c', '')] / 1000. # in kDa

		dna_biomass = coralme.core.component.Constraint('DNA_biomass')
		dna_replication.add_metabolites({dna_biomass: dna_mw})

		if me.global_info.get('flux_of_dna_constituents', False):
			dna_demand_bound = me.mu
		else:
			dna_demand_bound = coralme.builder.dna_replication.return_gr_dependent_dna_demand(
				me, me.global_info['GC_fraction'], me.global_info['percent_dna_data'], me.global_info['gr_data_doublings_per_hour'])

		dna_replication.lower_bound = dna_demand_bound
		dna_replication.upper_bound = dna_demand_bound

		# ### 9) Save ME-model as a pickle file
		with open('{:s}/MEModel-step1-{:s}.pkl'.format(out_directory, model), 'wb') as outfile:
			pickle.dump(me, outfile)

		coralme.core.extended_classes.ListHandler.print_and_log('ME-model was saved in the {:s} directory as MEModel-step1-{:s}.pkl'.format(out_directory, model))

		# ## Part 2: Add metastructures to solving ME-model
		# set logger
		log = logging.getLogger() # root logger
		for hdlr in log.handlers[:]: # remove all old handlers
			log.removeHandler(hdlr)

		# Old code works in a separate script; but it works if we remove the old handler
		logging.basicConfig(filename = '{:s}/MEReconstruction-step2-{:s}.log'.format(log_directory, model), filemode = 'w', level = logging.WARNING, format = log_format)
		log.addHandler(self.logger['MEReconstruction-step2'])
		#log.addHandler(logging.StreamHandler(sys.stdout))
		logging.captureWarnings(True)

		# ### 1) Add *GenericData* and its reactions
		# Multiple entities can perform the same role. To prevent a combinatorial explosion, we create "generic" versions of the components, where any of those entities can fill in.
		#
		# This step was moved to the first part of the builder process to be able to use generics in enzymes linked to metabolic reactions.

		# ### 2) Add DNA polymerase and the ribosome and its rRNAs modifications
		# ~~This uses the ribosome composition and subreaction definitions in **coralme/ribosome.py**~~

		# WARNING: EXPERIMENTAL, not included in the original cobrame paper.
		# 1) Is the coupling coefficient dependent on the length of the DNA?
		# 2) Do we need to modify the DNA replication to have one reaction per replicon?
		# dnapol_id = me.global_info['dnapol_id']

		# me.add_metabolites([cobrame.core.component.Complex(dnapol_id)])
		# data = cobrame.core.processdata.ComplexData(dnapol_id, me)
		# data.stoichiometry.update(coralme.preprocess_inputs.dnapolymerase_stoichiometry(df_data))
		# data.create_complex_formation(verbose = False)

		ribosome_stoich = coralme.builder.preprocess_inputs.ribosome_stoichiometry(df_data)
		ribosome_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Ribosome')
		df_rrna_mods = df_rna_mods[df_rna_mods['bnum'].isin(['16s_rRNAs', '23s_rRNAs'])]
		coralme.builder.ribosome.add_ribosome(me, ribosome_stoich, ribosome_subreactions, df_rrna_mods, verbose = True)

		# ### 3) Add charged tRNA reactions

		# The tRNA charging reactions were automatically added when loading the genome from the GenBank file. However, the charging reactions still need to be made aware of the tRNA synthetases which are responsible. Generic charged tRNAs are added to translation reactions via *SubreactionData* below.

		# tRNA synthetases per organelle
		if hasattr(self, 'org') and len(me.global_info.get('amino_acid_trna_synthetase', {})) == 0:
			aa_synthetase_dict = me.global_info['amino_acid_trna_synthetase']
		elif len(me.global_info.get('amino_acid_trna_synthetase', {})) != 0:
			aa_synthetase_dict = me.global_info.get('amino_acid_trna_synthetase', {})
		else:
			aa_synthetase_dict = coralme.builder.preprocess_inputs.aa_synthetase_dict(df_data)
			logging.warning('Association of tRNA synthetases and amino acids was inferred from GenBank annotation. It can be incomplete.')

		for data in tqdm.tqdm(list(me.tRNA_data), 'Adding tRNA synthetase(s) information into the ME-model...', bar_format = bar_format):
			data.synthetase = str(aa_synthetase_dict.get(data.amino_acid, 'CPLX_dummy'))

		# Correct 'translation_stop_dict' if PrfA and/or PrfB homologs were not identified
		PrfA_mono = me.global_info['peptide_release_factors'].get('UAG', 'PrfA_mono')
		# In Trans Table 4, UGA encodes Trp, not a stop codon. User might choose not to add 'UGA' in peptide_release_factors in organism.json
		PrfB_mono = me.global_info['peptide_release_factors'].get('UGA', 'PrfB_mono')
		generic_RF = me.global_info['peptide_release_factors'].get('UAA', 'generic_RF')

		if     me.metabolites.has_id(PrfA_mono) and not me.metabolites.has_id(PrfB_mono):
			me.global_info['peptide_release_factors']['UGA'] = PrfA_mono # originally assigned to PrfB_mono
			me.global_info['peptide_release_factors']['UAA'] = PrfA_mono # originally assigned to generic_RF
		if not me.metabolites.has_id(PrfA_mono) and     me.metabolites.has_id(PrfB_mono):
			me.global_info['peptide_release_factors']['UAG'] = PrfB_mono # originally assigned to PrfA_mono
			me.global_info['peptide_release_factors']['UAA'] = PrfB_mono # originally assigned to generic_RF
		if not me.metabolites.has_id(PrfA_mono) and not me.metabolites.has_id(PrfB_mono):
			me.global_info['peptide_release_factors']['UAG'] = 'CPLX_dummy' # originally assigned to PrfA_mono
			me.global_info['peptide_release_factors']['UGA'] = 'CPLX_dummy' # originally assigned to PrfB_mono

		if me.global_info['peptide_release_factors']['UAG'] == 'CPLX_dummy' and me.global_info['peptide_release_factors']['UGA'] == 'CPLX_dummy':
			me.global_info['peptide_release_factors']['UAA'] = 'CPLX_dummy'

		# charged tRNAs
		for organelle, transl_table in me.global_info['transl_tables'].items():
			if len(transl_table) == 0:
				continue

			coralme.builder.translation.add_charged_trna_subreactions(me, organelle, transl_table, translation_stop_dict = me.global_info['peptide_release_factors'], selenocysteine_enzymes = me.global_info.get('selenocysteine_enzymes', []))

		# ### 4) Add tRNA modifications into the ME-model and associate them with tRNA charging reactions

		# Add tRNA modifications to ME-model per type of organism
		if me.global_info['domain'].lower() in ['prokaryote', 'bacteria']:
			df_trna_mods = df_rna_mods[~df_rna_mods['bnum'].isin(['16s_rRNAs', '23s_rRNAs'])]
		elif me.global_info['domain'].lower() in ['eukarya', 'eukaryote']:
			df_trna_mods = df_rna_mods[~df_rna_mods['bnum'].isin(['18S_rRNAs', '25S_rRNAs', '28S_rRNAs'])]
		else:
			logging.warning('The \'domain\' property is not valid. A valid value is \'Prokaryote\', \'Bacteria\', \'Eukarya\', or \'Eukaryote\'.')

		coralme.builder.trna_charging.add_trna_modification_procedures(me, df_trna_mods)
		# Associate tRNA modifications to tRNAs
		trna_modifications = coralme.builder.flat_files.get_trna_modification_targets(df_trna_mods)
		for idx, trna in tqdm.tqdm(list(df_trna_mods.iterrows()), 'Associating tRNA modification enzyme(s) to tRNA(s)...', bar_format = bar_format):
			for data in me.process_data.query('tRNA_' + trna['bnum']):
				data.subreactions = trna_modifications[trna['bnum']]

		# ### 5) Add translation SubReactions into the ME-model and associate them with Translation reactions

		initiation_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_initiation')
		elongation_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_elongation')
		termination_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_termination')
		#termination_subreactions.update(coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Translation_termination_generic_RF_mediated'))
		processing_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Protein_processing')

		for data in tqdm.tqdm(list(me.translation_data), 'Adding SubReactions into TranslationReactions...', bar_format = bar_format):
			data.add_initiation_subreactions(start_codons = me.global_info['start_codons'], start_subreactions = initiation_subreactions)
			data.add_elongation_subreactions(elongation_subreactions = elongation_subreactions)
			data.add_termination_subreactions(translation_terminator_dict = me.global_info['peptide_release_factors'])

		# ### 6) Add Transcription Metacomplexes: RNA Polymerase(s)

		rna_polymerases = me.global_info.get('rna_polymerases', {})

		# Create polymerase "metabolites"
		for rnap, components in tqdm.tqdm(rna_polymerases.items(), 'Adding RNA Polymerase(s) into the ME-model...', bar_format = bar_format):
			if me.metabolites.has_id(components['sigma_factor']) and me.metabolites.has_id(components['polymerase']):
				rnap_obj = coralme.core.component.RNAP(rnap)
				me.add_metabolites(rnap_obj)
				logging.warning('The RNA Polymerase \'{:s}\' was created in the ME-model successfully.'.format(rnap))
			else:
				if not me.metabolites.has_id(components['sigma_factor']):
					logging.warning('The complex ID \'{:s}\' from \'rna_polymerases\' in the configuration does not exist in the organism-specific matrix. Please check if it is the correct behaviour.'.format(components['sigma_factor']))
				if not me.metabolites.has_id(components['polymerase']):
					logging.warning('The complex ID \'{:s}\' from \'rna_polymerases\' in the configuration does not exist in the organism-specific matrix. Please check if it is the correct behaviour.'.format(components['polymerase']))

		# Add polymerase complexes in the model
		coralme.builder.transcription.add_rna_polymerase_complexes(me, rna_polymerases, verbose = False)

		# Associate the correct RNA_polymerase and factors to TUs
		sigma_to_rnap = { v['sigma_factor']:k for k,v in rna_polymerases.items() }
		for tu_id in tqdm.tqdm(df_tus.index, 'Associating a RNA Polymerase to each Transcriptional Unit...', bar_format = bar_format):
			if me.process_data.has_id(tu_id):
				transcription_data = me.process_data.get_by_id(tu_id)
				transcription_data.RNA_polymerase = sigma_to_rnap.get(df_tus['rnapol'][tu_id], None)
			else:
				logging.warning('Transcription Unit \'{:s}\' is missing from ProcessData, likely the associated gene(s) (\'{:s}\') were filtered out before the reconstruction. Check if it is the correct behavior.'.format(tu_id, df_tus['genes'][tu_id]))
				pass

		# WARNING: Without a TUs file, the 'most common' polymerase should be an empty string
		rnap_counter = collections.Counter([ x.RNA_polymerase for x in me.transcription_data ])
		# override 'most_common' polymerase if it is empty
		user_default_sigma = me.global_info.get('default_sigma_factor', '')
		user_default_rnap = sigma_to_rnap.get(user_default_sigma, None)

		if user_default_rnap is None:
			msg = 'Assigning most common RNAP \'{:s}\' to missing polymerase in \'{:s}\''
			most_common = max(rnap_counter, key = rnap_counter.get)
		else:
			msg = 'Assigning user default RNAP \'{:s}\' to missing polymerase in \'{:s}\''
			most_common = user_default_rnap

		for transcription_data in me.transcription_data:
			if transcription_data.RNA_polymerase == '' and most_common != '':
				logging.warning(msg.format(most_common,transcription_data.id))
				transcription_data.RNA_polymerase = most_common

		# ### 7) Add Transcription Metacomplexes: Degradosome (both for RNA degradation and RNA splicing)
		degradosome_id = me.global_info['degradosome_id']

		me.add_metabolites([coralme.core.component.Complex(degradosome_id)])
		data = coralme.core.processdata.ComplexData(degradosome_id, me)
		stoich = coralme.builder.preprocess_inputs.degradosome_stoichiometry(df_data)
		if stoich is not None:
			data.stoichiometry.update(stoich)
		else:
			data.stoichiometry.update({'CPLX_dummy' : -1})
		data.create_complex_formation(verbose = False)

		# Used for RNA splicing
		data = coralme.core.processdata.SubreactionData('RNA_degradation_machine', me)
		data.enzyme = [me.global_info['degradosome_id']]

		# .25 water equivalent for ATP hydrolysis per nucleotide
		data = coralme.core.processdata.SubreactionData('RNA_degradation_atp_requirement', me)
		data.stoichiometry = { 'atp_c': -0.25, 'h2o_c': -0.25, 'adp_c': +0.25, 'h_c': +0.25, 'pi_c': +0.25 }

		for excision_type in me.global_info['excision_machinery']:
			stoichiometry = coralme.builder.preprocess_inputs.excision_machinery_stoichiometry(df_data, excision_type)
			if stoichiometry is not None:
				coralme.builder.transcription.add_rna_excision_machinery(me, excision_type, stoichiometry)
			else:
				coralme.builder.transcription.add_rna_excision_machinery(me, excision_type, {'CPLX_dummy' : +1}) # +1 is correct
				logging.warning('All the components of the excision complex for \'{:s}\' were not identified from homology and it was assigned to the \'CPLX_dummy\' complex.'.format(excision_type))

		# add excision machineries into TranscriptionData
		# WARNING: subreactions is now a property of the TranscriptionData recalculated when accessed
		#coralme.builder.transcription.add_rna_splicing(me)

		# ## Part 3: Add remaining modifications (including iron clusters and lipoate)

		## WARNING: This is calculated above (See Building Step 1, subsection 3)
		## mods_dct is a dictionary {complex_name_with_mods: {core_enzyme: complex_name, modifications: {stoichiometry}}
		#mods_dct = coralme.builder.flat_files.get_complex_modifications(
			#reaction_matrix = df_rmsc,
			#protein_complexes = df_cplxs,
			#complex_mods = df_ptms,
			#compartments = { v:k for k,v in me._compartments.items() })

		for complex_id, info in tqdm.tqdm(mods_dct.items(), 'Processing ComplexData in ME-model...', bar_format = bar_format):
			modifications = {}
			for mod, value in info['modifications'].items():
				# stoichiometry of modification determined in subreaction_data.stoichiometry
				modifications['mod_' + mod] = abs(value)
			me.process_data.get_by_id(complex_id).subreactions = modifications

		# Check if complex modifications are set on any component in the organism-specific matrix
		# Dipyrromethane
		# dpm modification never from the free metabolite
		if me.process_data.has_id('mod_dpm_c'):
			if me.metabolites.has_id('dpm_c'):
				me.remove_metabolites([me.metabolites.dpm_c])

		# WARNING: use a different ID for spontaneous modification vs enzymatic modification (?)
		# biotin from the free metabolite in malonate decarboxylase (EC 7.2.4.4); don't remove biotin from the model (EC 4.1.1.88 is biotin-independent)
		# biotin from the free metabolite in acetyl-CoA carboxylase, but using biotin---[acetyl-CoA-carboxylase] ligase
		if me.process_data.has_id('mod_btn_c'):
			coralme.builder.modifications.add_btn_modifications(me)

		# 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA in CitD catalyzed by CitX
		# 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA loaded from the free metabolite; don't remove it from the model
		if me.process_data.has_id('mod_2tpr3dpcoa_c'):
			coralme.builder.modifications.add_2tpr3dpcoa_modifications(me)

		# activation of glycyl radical enzymes
		# glycyl modification never from the free metabolite
		if me.process_data.has_id('mod_glycyl_c'):
			coralme.builder.modifications.add_glycyl_modifications(me)
			if me.metabolites.has_id('glycyl_c'):
				me.remove_metabolites([me.metabolites.glycyl_c])

		# pap4p in AcpP catalyzed by AcpS
		# pan4p loaded from free CoA, producing pap as byproduct; don't remove it from the model
		if me.process_data.has_id('mod_pan4p_c'):
			coralme.builder.modifications.add_pan4p_modifications(me)

		# https://www.genome.jp/pathway/map00785
		# lipoyl is a pseudo metabolite; it rather comes from free lipoate or from octanoate-ACP
		if me.process_data.has_id('mod_lipoyl_c'):
			coralme.builder.modifications.add_lipoyl_modifications(me)
			if me.metabolites.has_id('lipoyl_c'):
				me.remove_metabolites([me.metabolites.lipoyl_c])

		if me.process_data.has_id('mod_3fe4s_c') or me.process_data.has_id('mod_4fe4s_c') or me.process_data.has_id('mod_2fe2s_c'):
			coralme.builder.modifications.add_iron_sulfur_modifications(me)
		if me.process_data.has_id('mod_FeFe_cofactor_c') or me.process_data.has_id('mod_NiFe_cofactor_c'):
			coralme.builder.modifications.add_FeFe_and_NiFe_modifications(me)
		if me.process_data.has_id('mod_bmocogdp_c'):
			coralme.builder.modifications.add_bmocogdp_chaperones(me)

		# add formation reactions for each of the ComplexData
		for data in tqdm.tqdm(list(me.complex_data), 'Adding ComplexFormation into the ME-model...', bar_format = bar_format):
			formation = data.formation
			if formation:
				formation.update()
			else:
				data.create_complex_formation()
				logging.warning('Added a ComplexFormation reaction for \'{:s}\'.'.format(data.id))

		# ## Part 4: Add remaining subreactions
		# ### 1. Add translation related subreactions

		# get list of processed proteins from df_data
		processing_pathways = [
			x.replace('Protein_processing_', '')
			for x in me.global_info['translation_subreactions'].keys()
			if x.startswith('Protein_processing')
			]
		protein_processing = { k:df_data[~df_data[k].isnull() & df_data[k].str.match('1|TRUE|True|true')]['Gene Locus ID'].tolist() for k in processing_pathways }

		# Add the translation subreaction data objects to the ME-model
		coralme.builder.translation.add_subreactions_to_model(
			me, [initiation_subreactions, elongation_subreactions, termination_subreactions, processing_subreactions])

		for data in tqdm.tqdm(list(me.translation_data), 'Adding SubReactions into TranslationReactions...', bar_format = bar_format):
			for process in protein_processing:
				if data.id in protein_processing[process]:
					data.subreactions['Protein_processing_' + process] = 1.

			# This block was run above, but it should be run again to incorporate any subreactions not added previously
			data.add_initiation_subreactions(start_codons = me.global_info['start_codons'], start_subreactions = initiation_subreactions)
			data.add_elongation_subreactions(elongation_subreactions = elongation_subreactions)
			data.add_termination_subreactions(translation_terminator_dict = me.global_info['peptide_release_factors'])

			# Add organism specific subreactions associated with peptide processing
			for subrxn in me.global_info['peptide_processing_subreactions']:
				data.subreactions[subrxn] = 1.

		# ### 2) Add transcription related subreactions
		transcription_subreactions = coralme.builder.preprocess_inputs.get_subreactions(df_data, 'Transcription')
		coralme.builder.transcription.add_subreactions_to_model(me, [transcription_subreactions])

		for transcription_data in tqdm.tqdm(list(me.transcription_data), 'Adding Transcription SubReactions...', bar_format = bar_format):
			# Assume false if not in tu_df
			is_rho_dependent = df_tus.rho_dependent.get(transcription_data.id, 'False')
			rho_dependency = 'dependent' if is_rho_dependent in ['1', 'TRUE', 'True', 'true'] else 'independent'
			stable = 'stable' if transcription_data.codes_stable_rna else 'normal'
			if 'Transcription_{:s}_rho_{:s}'.format(stable, rho_dependency) in me.global_info['transcription_subreactions']:
				transcription_data._subreactions['Transcription_{:s}_rho_{:s}'.format(stable, rho_dependency)] = 1.
			else:
				logging.warning('The SubReaction \'Transcription_{:s}_rho_{:s}\' is not defined in the organism-specific matrix.'.format(stable, rho_dependency))

		# ## Part 5: Add in Translocation reactions

		v1 = { 'fixed_keff' : False, 'length_dependent' : True } # default
		v2 = { 'fixed_keff' : True,  'length_dependent' : False } # only for FtsY in the SRP pathway
		v3 = { 'fixed_keff' : False, 'length_dependent' : False } # for all the enzymes from the tat, tat_alt, lol and bam pathways

		for key, value in me.global_info['translocation_pathway'].items():
			if 'translocation_pathway_' + key in df_transpaths.index:
				me.global_info['translocation_pathway'][key]['enzymes'] = {
					k:(v2 if k == value.get('FtsY', None) else v1 if (key.lower() not in ['tat', 'tat_alt', 'lol', 'bam']) else v3) \
						for k in df_transpaths.loc['translocation_pathway_' + key].tolist()[0] }

			# TO ADD PATHWAYS WITHOUT HOMOLOGS
			# Check if the user wants to add dummies to the translocation pathways
			elif bool(me.global_info.get('add_translocases', False)) and value.get('enzymes', None) is None:
				me.global_info['translocation_pathway'][key]['enzymes'] = { 'CPLX_dummy':(v2 if value.get('FtsY', None) else v1 if (key.lower() not in ['tat', 'tat_alt', 'lol', 'bam']) else v3) }
				logging.warning('The component \'CPLX_dummy\' was associated to translocation pathways without defined homologs.')

		dct = { k:v['abbrev'] for k,v in me.global_info['translocation_pathway'].items() }
		dct = dict([(v, [k + '_translocation' for k,v1 in dct.items() if v1 == v]) for v in set(dct.values())])
		dct = { k:(v[0] if len(v) == 1 else v) for k,v in dct.items() } # tat pathways should be a list, but not the others

		# Check if the user added the reactions for translocation data
		if not me.process_data.has_id('atp_hydrolysis_sec_pathway'):
			stoichiometry = {'atp_c': -0.04, 'h2o_c': -0.04, 'adp_c': +0.04, 'h_c': +0.04, 'pi_c' : +0.04}
			coralme.util.building.add_subreaction_data(
				me, modification_id = 'atp_hydrolysis_sec_pathway', modification_stoichiometry = stoichiometry, modification_enzyme = 'CPLX_dummy')
		if not me.process_data.has_id('atp_hydrolysis_secA'):
			stoichiometry = {'atp_c': -1/75, 'h2o_c': -1/75, 'adp_c': +1/75, 'h_c': +1/75, 'pi_c': +1/75}
			coralme.util.building.add_subreaction_data(
				me, modification_id = 'atp_hydrolysis_secA', modification_stoichiometry = stoichiometry, modification_enzyme = 'CPLX_dummy')
		if not me.process_data.has_id('gtp_hydrolysis_srp_pathway'):
			stoichiometry = {'gtp_c': -2.0, 'h2o_c': -2.0, 'gdp_c': +2.0, 'h_c': +2.0, 'pi_c': +2.0}
			coralme.util.building.add_subreaction_data(
				me, modification_id = 'gtp_hydrolysis_srp_pathway', modification_stoichiometry = stoichiometry, modification_enzyme = 'CPLX_dummy')

		# for pathway, info in coralme.translocation.pathway.items():
		for pathway, info in me.global_info['translocation_pathway'].items():
			if me.global_info['translocation_pathway'][pathway].get('enzymes', None) is not None:
				transloc_data = coralme.core.processdata.TranslocationData(pathway + '_translocation', me)
				transloc_data.enzyme_dict = info['enzymes']
				transloc_data.keff = info['keff']
				transloc_data.length_dependent_energy = info['length_dependent_energy']
				if me.process_data.has_id(info['stoichiometry']):
					transloc_data.stoichiometry = me.process_data.get_by_id(info['stoichiometry']).stoichiometry
				else:
					transloc_data.stoichiometry = {}

		# Associate data and add translocation reactions
		multipliers = collections.defaultdict(dict)
		for enzyme, value in me.global_info.get('translocation_multipliers', {}).items():
			for bnum in value.keys():
				multipliers['protein_' + bnum][enzyme] = value[bnum]

		coralme.builder.translocation.add_translocation_pathways(me, pathways_df = df_protloc, abbreviation_to_pathway = dct, multipliers = multipliers, membrane_constraints = False)

		# Update stoichiometry of membrane complexes
		new_stoich = collections.defaultdict(dict)

		for idx, row in tqdm.tqdm(list(df_protloc.iterrows()), 'Processing StoichiometricData in SubReactionData...', bar_format = bar_format):
			protID = row['Protein']
			protIDLoc = protID + '_' + row['Protein_compartment']
			# to use dict.update() later; It replaces removing the key using the dict.pop method
			new_stoich[idx]['protein_' + protID] = 0
			new_stoich[idx]['protein_' + protIDLoc] = float(me.process_data.get_by_id(idx).stoichiometry['protein_' + protID])

		for cplx, stoich in new_stoich.items():
			complex_data = me.process_data.get_by_id(cplx)
			complex_data.stoichiometry.update(new_stoich[cplx])
			# remove zeroes from complex_data.stoichiometry
			complex_data.stoichiometry = { k:v for k,v in complex_data.stoichiometry.items() if v != 0 }
			complex_data.formation.update()

			# Complex IDs in protein compartment file don't include modifications
			# Some have multiple alternative modifications so must loop through these
			for complex_data in me.process_data.query('^{:s}_mod_'.format(cplx)):
				# WARNING: FeFe and NiFe cofactors reform the formation reactions as follow:
				# requires a formation -> base_complex + FeFe/NiFe => base_complex_mod_FeFe/_mod_NiFe <- should not have a formation reaction
				# base_complex_mod_FeFe/_mod_NiFe + other cofactors => final modified complex
				lst = [ type(me.metabolites.get_by_id(x)) for x in complex_data.stoichiometry.keys() ]
				if coralme.core.component.Complex in lst:
					continue

				complex_data.stoichiometry.update(new_stoich[cplx])
				# remove zeroes from complex_data.stoichiometry
				complex_data.stoichiometry = { k:v for k,v in complex_data.stoichiometry.items() if v != 0 }
				complex_data.formation.update()

		# ## Part 6: Add Cell Wall Components
		# ### 1. Add lipid modification SubreactionData

		compartment_dict = {}
		for idx, compartment in df_protloc.set_index('Protein').Protein_compartment.to_dict().items():
			compartment_dict[idx] = compartment

		lipid_modifications = me.global_info.get('lipid_modifications')
		lipoprotein_precursors = me.global_info.get('lipoprotein_precursors', [])

		# Step1: assign enzymes to lipid modifications
		for data in me.process_data.query('^mod_1st'):
			data.enzyme = me.global_info.get('pg_pe_160', 'CPLX_dummy')
		for data in me.process_data.query('^mod_2nd'):
			data.enzyme = me.global_info.get('other_lipids', 'CPLX_dummy')

		# Step2: add reactions of lipoprotein formation
		if bool(config.get('add_lipoproteins', False)) and lipoprotein_precursors:
			coralme.builder.translocation.add_lipoprotein_formation(
				me, compartment_dict, lipoprotein_precursors, lipid_modifications, membrane_constraints = False, update = True)

		# ### 2. Correct complex formation IDs if they contain lipoproteins

		#for gene in tqdm.tqdm(coralme.builder.translocation.lipoprotein_precursors.values()):
		if bool(config.get('add_lipoproteins', False)) and lipoprotein_precursors:
			for gene in tqdm.tqdm(lipoprotein_precursors.values(), 'Adding lipid precursors and lipoproteins...', bar_format = bar_format):
				compartment = compartment_dict.get(gene)
				if compartment is None:
					pass
				else:
					for rxn in me.metabolites.get_by_id('protein_' + gene + '_' + compartment).reactions:
						if isinstance(rxn, coralme.core.reaction.ComplexFormation):
							data = me.process_data.get_by_id(rxn.complex_data_id)
							value = data.stoichiometry.pop('protein_' + gene + '_' + compartment)
							data.stoichiometry['protein_' + gene + '_lipoprotein' + '_' + compartment] = value
							rxn.update()

		# ### 3. Braun's lipoprotein demand
		# Metabolites and coefficients as defined in [Liu et al 2014](http://bmcsystbiol.biomedcentral.com/articles/10.1186/s12918-014-0110-6)
		brauns_lipid_mod = me.global_info['braun\'s_lipid_mod']

		if len(me.global_info['braun\'s_lipoproteins']) >= 1:
			for brauns_lipoprotein in me.global_info['braun\'s_lipoproteins']:
				# Perform checks before attempt to incorporate the Braun's lipoproteins
				if not me.metabolites.has_id(brauns_lipid_mod):
					logging.warning('The metabolite \'{:s}\' is not present in the ME-model and the Braun\'s lipoprotein demand cannot be set. Please check if it is the correct behavior. See http://bigg.ucsd.edu/universal/metabolites/murein5px4p for more information.'.format(brauns_lipid_mod))
					continue
				if not me.reactions.has_id('formation_{:s}'.format(brauns_lipoprotein)):
					logging.warning('The \'add_lipoproteins\' option is \'False\' or coralme failed to add the \'{:s}\' ComplexFormation reaction and the Braun\'s lipoprotein demand cannot be set. Please check if it is the correct behavior.'.format(brauns_lipoprotein))
					continue

				rxn = coralme.core.reaction.SummaryVariable('core_structural_demand_brauns_{:s}'.format(brauns_lipoprotein))
				murein5px4p = me.metabolites.get_by_id(brauns_lipid_mod)
				murein5px4p_mass = murein5px4p.formula_weight / 1000.
				# ecolime: 1.0 protein_b1677_lipoprotein_Outer_Membrane --> 1.0 EG10544-MONOMER (brauns_lipoprotein ID)
				lipoprotein = me.metabolites.get_by_id(brauns_lipoprotein)
				me.add_reactions([rxn])

				# biomass of lipoprotein accounted for in translation and lipid modification
				rxn.add_metabolites({
					murein5px4p : -abs(me.global_info['braun\'s_murein_flux']),
					lipoprotein : -abs(me.global_info['braun\'s_lpp_flux']),
					me.metabolites.peptidoglycan_biomass : abs(me.global_info['braun\'s_murein_flux']) * murein5px4p_mass
					},
					combine = False)
				rxn.lower_bound = me.mu # coralme.util.mu
				rxn.upper_bound = me.mu # coralme.util.mu
		else:
			logging.warning('No Braun\'s lipoprotein (lpp gene) homolog was set. Please check if it is the correct behavior.')

		# WARNING: Part 7 was originally "set keffs", however, formulas of complexes are corrected later and sasa can be underestimated
		# ## Part 7: Model updates and corrections
		# ### 1. Subsystems

		# Add reaction subsystems from M-model to ME-model
		for rxn in tqdm.tqdm(me.processed_m_model.reactions, 'Adding reaction subsystems from M-model into the ME-model...', bar_format = bar_format):
			if rxn.id in me.process_data:
				data = me.process_data.get_by_id(rxn.id)
			else:
				continue

			for parent_rxn in data.parent_reactions:
				parent_rxn.subsystem = rxn.subsystem

		# ### 2. Add enzymatic coupling for "carriers"
		# These are enzyme complexes that act as metabolites in a metabolic reaction.
		for data in tqdm.tqdm(list(me.stoichiometric_data), 'Processing StoichiometricData in ME-model...', bar_format = bar_format):
			if data.id == 'dummy_reaction':
				continue

			for met, value in data.stoichiometry.items():
				# This will add any complex to the list of enzymes
				if not isinstance(me.metabolites.get_by_id(met), coralme.core.component.Complex):
					continue

				subreaction_id = met + '_carrier_activity'
				if subreaction_id not in me.process_data:
					sub = coralme.core.processdata.SubreactionData(subreaction_id, me)
					sub.enzyme = [met]

				data.subreactions[subreaction_id] = abs(value)

		# ### 3. Update ME-model
		# trick to obtain shadow prices and reduced costs from the optimizer
		me.reactions.dummy_reaction_FWD_SPONT.objective_coefficient = 1.

		if update:
			me.update() # not parallelized

		# ### 4. Add remaining formulas and compartments to the ME-model
		for r in tqdm.tqdm(me.reactions.query('^formation_'), 'Updating all FormationReactions...', bar_format = bar_format):
			r.update()

		modification_formulas = df_mets[df_mets['type'].str.match('COFACTOR|MOD|MODIFICATION')]
		modification_formulas = dict(zip(modification_formulas['me_id'], modification_formulas['formula']))
		me.global_info['modification_formulas'] = modification_formulas

		# Correct formula of complexes based on their base complex
		# This will add the formula to complexes not formed from a complex formation reaction (e.g. CPLX + na2_c -> CPLX_mod_na2(1))
		#coralme.builder.formulas.add_remaining_complex_formulas(me, modification_formulas)
		for met in [ x for x in me.metabolites if '_mod_' in x.id and isinstance(x, coralme.core.component.Complex)]:
			met.formula = None
			met.elements = {}

			base_complex = met.id.split('_mod_')[0]
			base_complex_elements = collections.Counter(me.metabolites.get_by_id(base_complex).elements)

			for mod in met.id.split('_mod_')[1:]:
				#for num in range(int(mod.rstrip(')').split('(')[1])):
				mod_elements = None
				mod_name = mod.split('(')[0]

				if mod_name in modification_formulas:
					mod_elements = coralme.builder.helper_functions.parse_composition(modification_formulas[mod_name])
					# 2fe2s_c and 4fe4s_c appear as free metabolites in reactions and need to have formula for correct mass balance determination
					if me.metabolites.has_id(mod_name + '_c') and me.metabolites.get_by_id(mod_name + '_c').formula is None:
						me.metabolites.get_by_id(mod_name + '_c').formula = modification_formulas[mod_name]
						logging.warning('Formula for \'{:s}\' updated from me_mets.txt file.'.format(mod_name + '_c'))
					logging.warning('Elemental contribution for \'{:s}\' calculated from me_mets.txt file.'.format(mod_name))

				elif me.metabolites.has_id(mod_name + '_c') and me.metabolites.get_by_id(mod_name + '_c').formula is not None:
					mod_elements = me.metabolites.get_by_id(mod_name + '_c').elements
					logging.warning('Elemental contribution for \'{:s}\' calculated from metabolite formula.'.format(mod_name))

				# WARNING: electron carriers can, assuming they are neutral, transfer also protons
				# WARNING: Ferredoxins only transfer electrons; thioredoxins and others transfer protons and electrons.
				elif mod.startswith('Oxidized'):
					if base_complex in me.global_info['electron_transfers'].get('ferredoxins', []):
						mod_elements = {'H': 0}
						logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex))
					elif base_complex in me.global_info['electron_transfers'].get('cytochromes', []):
						mod_elements = {'H': 0}
						logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex))
					elif 'Oxidized(1)' == mod and base_complex not in me.global_info['electron_transfers'].get('flavodoxins', ['FLAVODOXIN']):
						mod_elements = {'H': -2}
						logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex))
					elif 'Oxidized(2)' == mod and base_complex not in me.global_info['electron_transfers'].get('flavodoxins', ['FLAVODOXIN']):
						mod_elements = {'H': -4}
						logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex))
					# TODO: is the fmn cofactor in flavodoxin neutral?
					# WARNING: flavodoxin homologs might have a different base_complex ID compared to the ecolime model
					elif 'Oxidized(1)' == mod and base_complex in me.global_info['electron_transfers'].get('flavodoxins', ['FLAVODOXIN']):
						mod_elements = {'H': 0}
						logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(base_complex))
					else:
						logging.warning('Elemental contribution in \'{:s}\' could not be determined. Please check configuration file and add the base complex into the \'electron_transfers\' key.'.format(base_complex))

				# WARNING: Negative elemental contributions cannot be set in the metabolites.txt input file
				elif 'glycyl(1)' == mod:
					mod_elements = {'H': -1}
					logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(mod_name))
				elif 'cosh(1)' == mod:
					mod_elements = {'H': +1, 'O': -1, 'S': +1}
					logging.warning('Elemental contribution for \'{:s}\' calculated manually.'.format(mod_name))
				else:
					logging.warning('Elemental contribution for \'{:s}\' could not be determined. Please check me_mets.txt file.'.format(mod_name))

				if mod_elements:
					mod_elements = collections.Counter(mod_elements)
					mod_elements = { k:v * int(re.findall(r'\((\d+)\)', mod)[0]) for k,v in mod_elements.items() }
					base_complex_elements.update(mod_elements)
				else:
					logging.warning('Attempt to calculate a corrected formula for \'{:s}\' failed. Please check if it is the correct behaviour, or if the modification \'{:s}_c\' exists as a metabolite in the ME-model or a formula is included in the me_mets.txt file.'.format(met.id, mod_name))
			complex_elements = { k:base_complex_elements[k] for k in sorted(base_complex_elements) if base_complex_elements[k] != 0 }
			met.formula = ''.join([ '{:s}{:d}'.format(k, v) for k,v in complex_elements.items() ])
			met.elements = coralme.builder.helper_functions.parse_composition(met.formula)
			logging.warning('Setting new formula for \'{:s}\' to \'{:s}\' successfully.'.format(met.id, met.formula))

		# Update a second time to incorporate all of the metabolite formulas correctly
		for data in tqdm.tqdm(me.subreaction_data.query(r'(?!^\w\w\w_addition_at_\w\w\w$)'), 'Recalculation of the elemental contribution in SubReactions...', bar_format = bar_format):
			data._element_contribution = data.calculate_element_contribution()

		# Update reactions affected by formula update
		for r in tqdm.tqdm(me.reactions.query('^formation_'), 'Updating all FormationReactions...', bar_format = bar_format):
			r.update()

		for r in tqdm.tqdm(me.reactions.query('_mod_lipoyl'), 'Updating FormationReactions involving a lipoyl prosthetic group...', bar_format = bar_format):
			r.update()

		for r in tqdm.tqdm(me.reactions.query('_mod_glycyl'), 'Updating FormationReactions involving a glycyl radical...', bar_format = bar_format):
			r.update()

		# Update biomass_constituent_demand reaction(s) with components that previously has no formula
		for idx, row in biomass_constituents.iterrows():
			constituent_mass = sum(me.metabolites.get_by_id(c).formula_weight / 1000. * abs(v) for c,v in row.items() if isinstance(v, (float, int)))
			name = 'biomass_constituent_demand' if idx == 'biomass_constituent_demand' else 'biomass_constituent_demand_' + idx
			rxn = me.reactions.get_by_id(name)
			rxn.add_metabolites({ k:-(abs(v)) for k,v in row.items() if isinstance(v, (float, int)) }, combine = False)
			rxn.add_metabolites({me.metabolites.get_by_id('constituent_biomass'): constituent_mass}, combine = False)

		# ## Part 8: Set keffs
		# Step 1. Determine SASA and median SASA
		if bool(config.get('estimate_keffs', True)):
			sasa_dct = {
				x.id:( x.formula_weight ** (3. / 4.) if x.formula_weight else 0, x.id if not x.formula_weight else False )
				for x in me.metabolites if type(x) == coralme.core.component.Complex
				}

			for met in [ v[1] for k,v in sasa_dct.items() ]:
				if met:
					logging.warning('The complex \'{:s}\' has no valid formula to determine its molecular weight.'.format(met))
					logging.warning('Please, set a value in the keff input file for reactions associated to the \'{:s}\' complex.'.format(met))

			median_sasa = numpy.median([ v[0] for k,v in sasa_dct.items() if v[0] != 0 ])

			me.global_info['median_sasa'] = median_sasa
			me.global_info['sasa_estimation'] = { k:v[0] for k,v in sasa_dct.items() if v[0] != 0 }
			me.global_info['sasa_estimation_missing'] = [ v[1] for k,v in sasa_dct.items() if v[0] == 0 ]

			# Step 2: Estimate keff for all the reactions in the model
			#if "complex" not in df_keffs.columns: #df_keffs.empty: # The if True avoids the estimation if the user uses an "incomplete" input
			reaction_ids = [
				rxn for rxn in me.reactions if isinstance(rxn, coralme.core.reaction.MetabolicReaction)
				if rxn.id not in [ 'dummy_reaction_FWD_SPONT', 'dummy_reaction_REV_SPONT' ]
				if rxn._complex_data is not None
				]

			#if 'complex' in df_keffs.columns: # user provided a file with keffs # This is always True because w/o a user file, df_keffs is empty
			with open('{:s}/building_data/reaction_median_keffs.txt'.format(me.global_info['out_directory']), 'r') as infile:
				reaction_median_keffs = pandas.read_csv(infile, sep = '\t').set_index('reaction')

			mapped_keffs = {}
			for rxn in tqdm.tqdm(reaction_ids, 'Estimating effective turnover rates for reactions using the SASA method...', bar_format = bar_format):
				logging.warning('Estimating effective turnover rates for reaction \'{:s}\''.format(rxn.id))

				base_id = rxn._stoichiometric_data.id
				cplx_id = me.metabolites.get_by_id(rxn._complex_data.id).id

				if base_id not in reaction_median_keffs.index:
					continue

				median_keff = reaction_median_keffs.T[base_id]['keff']
				sasa = sasa_dct[cplx_id][0]
				keff = sasa * median_keff / median_sasa
				mapped_keffs[rxn] = 3000. if keff > 3000. else 0.01 if keff < 0.01 else keff

			# WARNING: keff won't be deprecated by coupling_coefficient_enzyme or coupling_coefficient_subreaction properties
			# WARNING: keff maps back to the value used to determine coupling_coefficient_enzyme or coupling_coefficient_subreaction
			# dictionary of reaction IDs : coralme.core.reaction objects
			rxns_to_map = { x.id:x for x in me.subreaction_data + me.reactions if hasattr(x, 'keff') }
			# Step 3: Replace user values if they match
			for idx, row in tqdm.tqdm(list(df_keffs.iterrows()), 'Mapping effective turnover rates from user input...', bar_format = bar_format):
				if row['direction'] == '' and row['complex'] == '' and row['mods'] == '':
					# subreactions have ID = reaction_name
					idx = row['reaction']
				else:
					# metabolic reactions have ID = reaction_name + direction + complex
					idx = '{:s}_{:s}_{:s}'.format(row['reaction'], row['direction'], row['complex'])
					if row['mods'] != '':
						idx = '{:s}_mod_{:s}'.format(idx, '_mod_'.join(row['mods'].split(' AND ')))

				if idx in rxns_to_map.keys():
					mapped_keffs[rxns_to_map[idx]] = 3000. if float(row['keff']) > 3000. else 0.01 if float(row['keff']) < 0.01 else row['keff']
					logging.warning('Mapping of the effective turnover rate for \'{:}\' with a user provided value.'.format(idx))
				else:
					logging.warning('Mapping of the effective turnover rate for \'{:}\' reaction failed. Please check if the reaction or subreaction is in the ME-model.'.format(idx))

			# Step 4: Set keffs
			if mapped_keffs:
				for rxn, keff in tqdm.tqdm(sorted(mapped_keffs.items(), key = lambda x: x[0].id), 'Setting the effective turnover rates using user input...', bar_format = bar_format):
					if isinstance(rxn, coralme.core.reaction.MetabolicReaction):
						rxn.coupling_coefficient_enzyme = float(keff) * me.unit_registry.parse_units('1 per second')
					if isinstance(rxn, coralme.core.processdata.SubreactionData):
						rxn.coupling_coefficient_subreaction = float(keff) * me.unit_registry.parse_units('1 per second')
					# update() will convert the coupling coefficient into mu / value / 3600.
					if hasattr(rxn, 'update'): # subreactions has no update attribute
						rxn.update()
					logging.warning('Setting the effective turnover rate for \'{:s}\' in {:f} successfully.'.format(rxn.id, float(keff)))

		# ### 5. Add metabolite compartments
		coralme.builder.compartments.add_compartments_to_model(me)

		# ### 6. Prune reactions from ME-model
		# WARNING: Do it recursively to reduce further the size of the ME-model.
		if prune:
			rnum = len(me.reactions)
			delta = 1
			while delta > 0:
				me.prune(skip=skip)
				delta = rnum - len(me.reactions)
				rnum = len(me.reactions)

		# Part 9. Save and report
		with open('{:s}/MEModel-step2-{:s}.pkl'.format(out_directory, model), 'wb') as outfile:
			pickle.dump(me, outfile)

		coralme.core.extended_classes.ListHandler.print_and_log('ME-model was saved in the {:s} directory as MEModel-step2-{:s}.pkl'.format(out_directory, model))

		n_mets = len(me.metabolites)
		new_mets = n_mets * 100. / len(me.gem.metabolites) - 100
		n_rxns = len(me.reactions)
		new_rxns = n_rxns * 100. / len(me.gem.reactions) - 100
		n_genes = len(me.metabolites.query(re.compile('^RNA_(?!biomass|dummy|degradosome)')))
		new_genes = n_genes * 100. / len(me.gem.genes) - 100

		coralme.core.extended_classes.ListHandler.print_and_log('ME-model reconstruction is done.')
		coralme.core.extended_classes.ListHandler.print_and_log('Number of metabolites in the ME-model is {:d} (+{:.2f}%, from {:d})'.format(n_mets, new_mets, len(me.gem.metabolites)))
		coralme.core.extended_classes.ListHandler.print_and_log('Number of reactions in the ME-model is {:d} (+{:.2f}%, from {:d})'.format(n_rxns, new_rxns, len(me.gem.reactions)))
		coralme.core.extended_classes.ListHandler.print_and_log('Number of genes in the ME-model is {:d} (+{:.2f}%, from {:d})'.format(n_genes, new_genes, len(me.gem.genes)))
		if hasattr(self, 'ref'):
			coralme.core.extended_classes.ListHandler.print_and_log('Number of missing genes from reconstruction with homology, but no function is {:d}. Check the curation notes for more details.'.format(coralme.builder.helper_functions.check_me_coverage(self)))
		else:
			coralme.core.extended_classes.ListHandler.print_and_log('Number of missing genes from reconstruction cannot be determined.')

		logging.shutdown()

		coralme.builder.notes.save_curation_notes(
				self.curation_notes,
				self.configuration['out_directory'] + '/curation_notes.json'
			)
		coralme.builder.notes.publish_curation_notes(
				self.curation_notes,
				self.configuration['out_directory']+ '/curation_notes.txt'
			)

		with open('{:s}/MEReconstruction-{:s}.log'.format(log_directory, model), 'w') as outfile:
			for filename in [
				'{:s}/MEReconstruction-step1-{:s}.log'.format(log_directory, model),
				'{:s}/MEReconstruction-step2-{:s}.log'.format(log_directory, model)
				]:

				try:
					pathlib.Path(filename).unlink(missing_ok = True) # python>=3.8
				except:
					if pathlib.Path(filename).exists():
						pathlib.Path(filename).unlink() # python==3.7

			logger = self.logger['MEReconstruction-step1'].log_list
			logger += self.logger['MEReconstruction-step2'].log_list
			debug = self.logger['MEReconstruction-step1'].debug

			coralme.core.extended_classes.ListHandler.log_to_file(logger, debug, outfile)


class METroubleshooter(object):
	"""
	METroubleshooter class for troubleshooting growth in a ME-model

	This class contains methods to identify gaps and obtain a feasible ME-model.

	Parameters
	----------
	MEBuilder : coralme.builder.main.MEBuilder
	"""

	def __init__(self, builder):
		self.logger = builder.logger
		self.me_model = builder.me_model
		self.configuration = builder.configuration
		self.curation_notes = builder.curation_notes

	def troubleshoot(self, growth_key_and_value = None, skip = set(),
		guesses = [], met_types = [], platform = None, solver = 'qminos', savefile = None,
		gapfill_cofactors = False):
		"""Performs the Gap-finding step of the reconstruction.

		This function will iterate through different parts of the M-
		and E-matrices, looking for a minimal set of sinks that
		allows for growth.

		Parameters
		----------
		growth_key_and_value : dict
			A dictionary of Sympy.Symbol and value(s) to evaluate. It defines
			the parameters for the feasibility checks in each iteration.
		skip : set
			A set of ME-components to not evaluate
		met_types: list
			Any combination of 'ME-Deadends', 'Cofactors', 'All-Deadends',
			'Metabolite', 'GenerictRNA', 'Complex', 'TranscribedGene',
			'TranslatedGene', 'ProcessedProtein', and/or 'GenericComponent'.
		solver: str
			Solver to use. Values: 'qminos', 'gurobi' or 'cplex'
		"""
		types = {
			'M-matrix' : [ 'ME-Deadends', 'Cofactors', 'All-Deadends', 'Metabolite' ],
			'E-matrix' : [ 'GenerictRNA', 'Complex', 'TranscribedGene', 'TranslatedGene', 'ProcessedProtein', 'GenericComponent' ]
			}

		if hasattr(self, 'notes') and self.notes.get('from cobra', False):
			met_types = [ ('M-matrix', 'Metabolite') ]
		elif len(met_types) > 0:
			met_types = [ ('M-matrix', x) if x in types['M-matrix'] else ('E-matrix', x) if x in types['E-matrix'] else None for x in set(met_types) ]
			met_types = [ x for x in met_types if x is not None ]
		else:
			logging.warning('Metabolite types valid values are {:s}. The predefined order of metabolites will be tested.\n'.format(', '.join(types['M-matrix'] + types['E-matrix'])))
			met_types = []
			for x, y in types.items():
				for met in y:
					met_types.append((x, met))

		if not hasattr(self, 'me_model'):
			me = self
			self = coralme.builder.main.MEBuilder(**{'out_directory' : '.'})
			self.me_model = me
			self.configuration['out_directory'] = './'
			self.configuration['log_directory'] = './'

		self.me_model.troubleshooting = True
		if solver in ['gurobi', 'cplex']:
			self.me_model.get_solution = self.me_model.optimize_windows
			self.me_model.check_feasibility = self.me_model.feas_windows(solver = solver)
		elif solver == "qminos":
			self.me_model.get_solution = self.me_model.optimize
			self.me_model.check_feasibility = self.me_model.feasibility
			print('The MINOS and quad MINOS solvers are a courtesy of Prof Michael A. Saunders. Please cite Ma, D., Yang, L., Fleming, R. et al. Reliable and efficient solution of genome-scale models of Metabolism and macromolecular Expression. Sci Rep 7, 40863 (2017). https://doi.org/10.1038/srep40863\n')
		else:
			raise ValueError('Solver not supported.')

		config = self.configuration
		model = config.get('ME-Model-ID', 'coralME')
		out_directory = config.get('out_directory', '.')
		log_directory = config.get('log_directory', '.')

		# clean restart if troubleshooter is killed
		if not hasattr(self.me_model, "troubleshooted") or self.me_model.troubleshooted:
			pass # do not remove TS reactions
		else:
			rxns = self.me_model.reactions.query('^TS_')
			self.me_model.remove_reactions(rxns)

		# set logger
		log = logging.getLogger() # root logger
		for hdlr in log.handlers[:]: # remove all old handlers
			log.removeHandler(hdlr)

		# Old code works in a separate script; but it works if we remove the old handler
		logging.basicConfig(filename = '{:s}/METroubleshooter-{:s}.log'.format(log_directory, model), filemode = 'w', level = logging.WARNING, format = log_format)
		log.addHandler(self.logger['METroubleshooter'])
		log.addHandler(logging.StreamHandler(sys.stdout))
		logging.captureWarnings(True)

		if growth_key_and_value is None:
			growth_key_and_value = { self.me_model.mu : 0.001 }

		growth_key, growth_value = zip(*growth_key_and_value.items())

		logging.warning('~ '*1 + 'Troubleshooting started...')

		if gapfill_cofactors:
			# Ensure cofactors can be produced
			logging.warning('  '*1 + 'Ensuring the ME-model can produce all cofactors')
			cofactors = coralme.builder.troubleshooting.get_cofactors_in_me_model(self.me_model)
			ts_cofactors = coralme.builder.troubleshooting.add_exchange_reactions(self.me_model, cofactors, prefix = 'COFACTOR_TS_')
			for ts in ts_cofactors:
				ts.bounds = (1e-6,1000)

		# Step 1. Test if current ME-model is feasible
		logging.warning('  '*1 + 'Checking if the ME-model can simulate growth without gapfilling reactions...')
		if self.me_model.check_feasibility(keys = growth_key_and_value):
			logging.warning('  '*1 + 'Original ME-model is feasible with a tested growth rate of {:f} 1/h'.format(list(growth_value)[0]))
			works = True
		else:
			logging.warning('  '*1 + 'Original ME-model is not feasible with a tested growth rate of {:f} 1/h'.format(list(growth_value)[0]))
			works = False

		# Step 2. Test different sets of MEComponents
		if len(guesses) > 0:
			guesses = [ x for x in guesses if self.me_model.metabolites.has_id(x) ]
			if len(guesses) > 0:
				met_types.insert(0, (guesses, 'User guesses'))

		e_gaps = []
		history = dict()
		if works == False:
			#logging.warning('~ '*1 + 'Step 3. Attempt gapfilling different groups of E-matrix components.')
			for idx, met_type in enumerate(met_types):
				logging.warning('  '*1 + 'Step {}. Gapfill reactions to provide components of type \'{:s}\' using brute force.'.format(idx + 1, met_type[1]))
				if met_type[0] == 'E-matrix':
					logging.warning('  '*5 + 'Relaxing bounds for E-matrix gap-fill')
					self.me_model.relax_bounds()
					self.me_model.reactions.protein_biomass_to_biomass.lower_bound = growth_value[0]/100 # Needed to enforce protein production
				if met_type[1] == 'User guesses':
					history, output = coralme.builder.troubleshooting.brute_check(self.me_model, growth_key_and_value, met_type, skip = skip, history = history,solver=solver)
				else:
					history, output = coralme.builder.troubleshooting.brute_check(self.me_model, growth_key_and_value, met_type[1], skip = skip, history = history,solver=solver)
				bf_gaps, no_gaps, works = output
				# close sink reactions that are not gaps
				if no_gaps:
					self.me_model.remove_reactions(no_gaps)
				if works:
					e_gaps = bf_gaps
					break

		if works: # Meaning it can grow in any case
			# Save warnings
			if isinstance(e_gaps, list) and e_gaps:
				self.curation_notes['troubleshoot'].append({
					'msg':'Some metabolites are necessary for growth',
					'triggered_by':e_gaps,#[ x for y in e_gaps for x in y ],
					'importance':'critical',
					'to_do':'Fix the gaps by adding reactions or solving other warnings. If some items are from the E-matrix, fix these first!'})

			# delete added sink reactions with lb == 0 and ub == 0
			sinks = []
			for rxn in self.me_model.reactions.query('^TS_'):
				sinks.append(rxn.id)
				#f = self.me_model.solution.fluxes[rxn.id]
				if rxn.lower_bound == 0 and rxn.upper_bound == 0:# or f == 0:
					self.me_model.remove_reactions([rxn])
			if sinks:
				logging.warning('~ '*1 + 'Troubleshooter added the following sinks: {:s}.'.format(', '.join(sinks)))
			logging.warning('~ '*1 + 'Final step. Fully optimizing with precision 1e-6 and save solution into the ME-model...')

			# Delete demand reactions for cofactor gapfilling
			if gapfill_cofactors:
				rxns = self.me_model.reactions.query('^COFACTOR_TS_')
				self.me_model.remove_reactions(rxns)

			# final optimization
			if self.me_model.get_solution(max_mu = 3.0, precision = 1e-6, verbose = False):
				if hasattr(self.me_model, 'gem'):
					logging.warning('  '*1 + 'Gapfilled ME-model is feasible with growth rate {:f} (M-model: {:f}).'.format(self.me_model.solution.objective_value, self.me_model.gem.optimize().objective_value))
				else:
					logging.warning('  '*1 + 'Gapfilled ME-model is feasible with growth rate {:f}.'.format(self.me_model.solution.objective_value))
			else:
				logging.warning('  '*1 + 'Error: Gapfilled ME-model is not feasible ?')

			# save model as a pickle file
			if savefile is None:
				savefile = '{:s}/MEModel-step3-{:s}-TS.pkl'.format(out_directory, self.me_model.id)
				message = 'ME-model was saved in the {:s} directory as MEModel-step3-{:s}-TS.pkl'.format(out_directory, self.me_model.id)
			elif pathlib.Path(savefile).parent.exists():
				message = 'ME-model was saved to {:s}.'.format(savefile)
			else:
				message = False
				logging.warning('Model was not saved. Please do it manually.')

			self.me_model.troubleshooted = True
			if message:
				with open(savefile, 'wb') as outfile:
					pickle.dump(self.me_model, outfile)
				logging.warning(message)

		else:
			logging.warning('~ '*1 + 'METroubleshooter failed to determine a set of problematic metabolites.')
			self.me_model.troubleshooted = False

		logging.shutdown()

		coralme.builder.notes.save_curation_notes(
				self.curation_notes,
				self.configuration['out_directory'] + '/curation_notes.json'
			)
		coralme.builder.notes.publish_curation_notes(
				self.curation_notes,
				self.configuration['out_directory']+ '/curation_notes.txt'
			)

		# We will remove duplicates entries in the log output
		with open('{:s}/METroubleshooter-{:s}.log'.format(log_directory, model), 'w') as outfile:
			logger = self.logger['METroubleshooter']
			coralme.core.extended_classes.ListHandler.log_to_file(logger.log_list, logger.debug, outfile)

		self.me_model.troubleshooting = False