Source code for ord_rxn_converter.outcomes_module

# import requirements:
from ord_schema.proto import dataset_pb2, reaction_pb2
from google.protobuf.message import Message
from uuid import uuid4
from ord_rxn_converter.utility_functions_module import extract_all_enums
from ord_rxn_converter.identifiers_module import extract_compound_identifiers, generate_compound_table

#generate enums_data to be accessible here TODO - have importable object instead..?
enums_data = extract_all_enums(reaction_pb2)

[docs] def extract_reaction_outcomes(reactionID, outcomes): """ Extracts outcome information from ORD reaction data. Takes a reaction outcome object (in Google Protobuf message type based on ORD structure schema) and extracts data about reaction outcomes including reaction time, conversion percentages, product information, and analytical data. Args: reactionID (str): Unique identifier for the reaction. outcomes (list): List of outcome objects from a reaction, containing reaction time, conversion, products, and analyses data. Returns: tuple: A tuple containing two elements: outcomes_list (list): A list of lists, where each inner list contains: [reactionID, outcomeKey, reaction_time_value, time_unit, conversion_value, products_list, analyses_list] outcome_identifiers (list): A list of compound identifiers associated with the reaction outcomes, or None if no products are present. """ outcomes_list = [] outcome_identifiers = [] outcome_measurements = [] for index, outcome in enumerate(outcomes, start=1): outcomeKey = f"outcomeKey_{index}_{reactionID}" # reaction_time = 1 time_unit = enums_data['Time.TimeUnit'][outcome.reaction_time.units] # conversion = 2 # products = 3 if outcome.products: products = outcome.products products_list, compound_table = extract_product(products) outcome_identifiers.extend(compound_table) else: products_list = None outcome_identifiers = None # analyses = 4 if outcome.analyses: analyses = outcome.analyses analyses_list = extract_analyses(analyses) else: analyses_list = None outcomes_list.append([reactionID, outcomeKey, outcome.reaction_time.value , time_unit, outcome.conversion.value, products_list, analyses_list]) return outcomes_list, outcome_identifiers
[docs] def extract_product (products): """ Extracts product data and related measurements from ORD product objects. Takes product objects from a reaction outcome and extracts information including identifiers, measurements, textures, features, and reaction roles. Also generates compound tables with standardized identifiers. Args: products (list): List of product objects from a reaction outcome. Returns: tuple: A tuple containing two elements: products_list (list): A list of lists, where each inner list contains: [inchi_key, is_desired_product, products_measurements, isolated_color, product_texture, feature_dict, reaction_role] compound_identifiers (list): A list of compound tables containing standardized compound identifiers for all products. """ products_list = [] products_measurements = [] compound_identifiers = [] for product in products: # identifiers = 1 if product.identifiers: identifiers = product.identifiers inchi_key, identifier_list = extract_compound_identifiers(identifiers) compound_identifiers.append(generate_compound_table(identifiers)) else: identifier_list = None inchi_key = None # TODO: need to generate the InChIKey from SMILES or InChI & use the InChI to update the COMPOUND TABLE should this be in the identifiers function? # is_desired_product = 2 # measurements = 3 if product.measurements: measurements = product.measurements measurement_list = extract_product_measurements(measurements) products_measurements.append(measurement_list) else: measurement_list = None products_measurements.append(measurement_list) # isolated_color = 4 # texture = 5 if product.texture: texture = enums_data['Texture.TextureType'][product.texture.type] product_texture = {texture:product.texture.details} else: product_texture = None # features = 6 feature_dict = {feature_key: feature for feature_key, feature in product.features} if product.features else None #for feature_key, feature in product.features.items(): # feature_list.append(dict(zip(feature_key, feature))) # reaction_role = 7 reaction_role = enums_data['ReactionRole.ReactionRoleType'].get(product.reaction_role, 'UNKNOWN') products_list.append([inchi_key, product.is_desired_product, products_measurements, product.isolated_color, product_texture, feature_dict, reaction_role]) return products_list, compound_identifiers
[docs] def extract_product_measurements(measurements): """ Extracts measurement data from ORD product measurements. Processes measurement objects to extract analytical data including measurement types, values, spectroscopic details, and chromatographic information. Args: measurements (list): List of measurement objects associated with a product. Returns: list: A list of lists, where each inner list contains measurement data: [analysis_key, measurement_type, details, uses_internal_standard, is_normalized, uses_authentic_standard, compound_authentic, measurement_value, retention_time, time_unit, mass_spec_type, mass_spec_details, tic_minimum, tic_maximum, eic_masses, selectivity, wavelength, wavelength_unit] """ measurement_list = [] for index, measurement in enumerate(measurements): analysis_key = measurement.analysis_key if measurement.analysis_key else None compound_authentic = measurement.authentic_standard if measurement.authentic_standard else None measurement_type = enums_data['ProductMeasurement.ProductMeasurementType'][measurement.type] measurement_value_type = measurement.WhichOneof('value') if measurement.WhichOneof('value') else None # percentage = 8 if measurement_value_type == 'percentage': measurement_value = measurement.percentage.value measurement_value_unit = 'Percent' # float_value = 9 elif measurement_value_type == 'float_value': measurement_value = measurement.float_value.value measurement_value_unit = None # string_value = 10 elif measurement_value_type == 'string_value': measurement_value = measurement.string_value measurement_value_unit = None # amount = 11 elif measurement_value_type == 'amount': amount_type, measurement_value, measurement_value_unit = extract_amount(measurement.amount) else: measurement_value = None measurement_value_unit = None # retention_time = 12 if measurement.retention_time: retention_time = measurement.retention_time.value time_unit = enums_data['Time.TimeUnit'][measurement.retention_time.units] else: retention_time = None time_unit = None if measurement.selectivity: select_type = enums_data['ProductMeasurement.Selectivity.SelectivityType'][measurement.selectivity.type] # mass_spec_details = 13 if measurement.mass_spec_details: mass_spec_type = enums_data['ProductMeasurement.MassSpecMeasurementDetails.MassSpecMeasurementType'][measurement.mass_spec_details.type] mass_spec_details = measurement.mass_spec_details.details tic_minimum = measurement.mass_spec_details.tic_minimum_mz tic_maximum = measurement.mass_spec_details.tic_maximum_mz eic_masses = [] for eic_mass in measurement.mass_spec_details.eic_masses: eic_masses.append(eic_mass) else: mass_spec_type = None mass_spec_details = None tic_minimum = None tic_maximum = None eic_masses = None # wavelength = 15 if measurement.wavelength: wavelength = measurement.wavelength.value wavelength_unit = enums_data['Wavelength.WavelengthUnit'][measurement.wavelength.units] else: wavelength = None wavelength_unit = None measurement_list.append([index, inchi_key, identifier_list, analysis_key, measurement_type, measurement.details if measurement.details else None, measurement.uses_internal_standard if measurement.uses_internal_standard else None, measurement.is_normalized if measurement.is_normalized else None, measurement.authentic_standard if measurement.authentic_standard else None, measurement_value_type, measurement_value, measurement_value_unit, retention_time, time_unit, select_type, wavelength, wavelength_unit]) return measurement_list
[docs] def extract_analyses(analyses): """ Extracts analytical data from ORD reaction analyses. Processes analysis objects to extract information about analytical techniques, instrument details, and associated data for reaction outcome characterization. Args: analyses (dict): Dictionary of analysis objects keyed by analysis_key. Returns: list: A list of dictionaries, where each dictionary contains: {'analysisKey': str, 'analysisType': str, 'Details': str, 'CHMO_ID': str, 'IsolatedSpecies': bool, 'data': dict, 'instrumentManufacturer': str, 'lastCalibrated': datetime} """ analyses_list = [] data_dict = {} for analysis_key, analysis in analyses.items(): analysis_type = enums_data['Analysis.AnalysisType'].get(analysis.type, 'UNKNOWN') for data_key, data in analysis.data.items(): value = getattr(data, data.WhichOneof('kind')) dict_value = [value, data.description] data_dict.update([(data_key, dict_value)]) analyses_list.append( ({'analysisKey':analysis_key, 'analysisType':analysis_type, 'Details':analysis.details, 'CHMO_ID':analysis.chmo_id, 'IsolatedSpecies':analysis.is_of_isolated_species, 'data':data_dict, 'instrumentManufacturer':analysis.instrument_manufacturer, 'lastCalibrated':analysis.instrument_last_calibrated }) ) return analyses_list