Source code for ord_rxn_converter.utility_functions_module
from ord_schema.proto import dataset_pb2, reaction_pb2
from google.protobuf.message import Message
# =============================================================================
# FUNCTIONS TO EXTRACT ENUMS FROM ALL MESSAGE TYPES
# =============================================================================
# Get all of enum field names and numbers:
[docs]
def extract_enums_from_message(descriptor, parent_name=''):
"""
Recursively extract enums from messages and nested messages.
This function traverses through a protobuf message descriptor and extracts
all enum types defined within it and its nested messages. For each enum type,
it creates a mapping between enum value numbers and their names.
Args:
descriptor: The descriptor of the protobuf message to extract enums from.
parent_name: The name of the parent message for nested messages, used for
constructing fully qualified enum names. Default is an empty string.
Returns:
A dictionary mapping fully qualified enum names to dictionaries that map
enum value numbers to their names. The structure is:
{
'EnumName': {value_number: 'VALUE_NAME', ...},
'ParentMessage.NestedEnum': {value_number: 'VALUE_NAME', ...},
...
}
"""
#ensure descriptor is not None
if not descriptor:
return {}
enums = {}
# Get enums within this message
for enum_type in descriptor.enum_types:
full_enum_name = f'{parent_name}.{enum_type.name}' if parent_name else enum_type.name
enums[full_enum_name] = {v.number: v.name for v in enum_type.values}
# Recursively check nested messages
for nested_type in descriptor.nested_types:
nested_enums = extract_enums_from_message(nested_type, f"{parent_name}.{nested_type.name}" if parent_name else nested_type.name)
enums.update(nested_enums)
return enums
#should be called by public - gets full enums_data set from proto_module
[docs]
def extract_all_enums(proto_module):
"""
Extract enums from all message types in the proto module.
This function serves as the main entry point for extracting all enum types from
a protobuf module. It iterates through all attributes of the module, identifies
protobuf message types, and extracts all enum types defined within them.
Args:
proto_module: The protobuf module (e.g., dataset_pb2, reaction_pb2) to
extract enums from.
Returns:
A dictionary mapping fully qualified enum names to dictionaries that map
enum value numbers to their names. The structure is:
{
'MessageName.EnumName': {value_number: 'VALUE_NAME', ...},
'MessageName.NestedMessage.NestedEnum': {value_number: 'VALUE_NAME', ...},
...
}
Example:
>>> from ord_schema.proto import reaction_pb2
>>> from utility_functions_module import extract_all_enums
>>> enums_data = extract_all_enums(reaction_pb2)
>>> print(enums_data['Analysis.AnalysisType'])
{0: 'UNSPECIFIED', 1: 'CUSTOM', 2: 'LC', 3: 'GC', ...}
"""
all_enums = {}
for name in dir(proto_module):
obj = getattr(proto_module, name)
# Check if it's a message type
if isinstance(obj, type) and hasattr(obj, 'DESCRIPTOR'):
descriptor = obj.DESCRIPTOR
message_enums = extract_enums_from_message(descriptor, descriptor.name)
if message_enums:
all_enums.update(message_enums)
return all_enums