Source code for chemfileconverter

__version__ = '0.1.0'

from typing import IO
from typing import Any
from typing import TextIO


[docs]def load(fp: TextIO) -> Any: """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a Chemical Table document) to a Python object. """ return loads(fp.read())
[docs]def loads(s): """Deserialize ``s`` (a ``str`` instance containing a Chemical Table document) to a Python object. """ return ChemDecoder().decode(s)
class ChemDecoder(): def __init__(self): pass def decode(self, s): """Return the Python representation of ``s`` (a ``str`` instance containing a Chemical Table document). """ obj = {} content = s.split('\n') obj.setdefault('name', content[2].split()[1]) obj.setdefault('program', content[2].split()[0]) obj.setdefault('substrate_count', int(content[4].split()[0])) obj.setdefault('product_count', int(content[4].split()[1])) metabolite_count = int(content[4].split()[0]) + int(content[4].split()[1]) mol_files = self._read_molfiles(content, metabolite_count) obj.setdefault('mol_files', mol_files) return obj def _read_molfiles(self, s: str, metabolite_count: int): mol_files = [] mol_indices = [i for i in range(len(s)) if s[i] == '$MOL'] if len(mol_indices) != metabolite_count: raise ValueError('Metabolites do not match the substrate and product counts') for index in range(len(mol_indices)): mol_file = {} try: mol = s[mol_indices[index]:mol_indices[index + 1]] except IndexError: mol = s[mol_indices[index]:] mol_file.setdefault('name', mol[1]) mol_file.setdefault('program', mol[2]) mol_file.setdefault('comment', mol[3]) mol_file.setdefault('index', index) # Count Line counts_line = mol[4].split() mol_file.setdefault('counts_line', {}) mol_file['counts_line'].setdefault('atom_count', int(counts_line[0])) mol_file['counts_line'].setdefault('bond_count', int(counts_line[1])) mol_file['counts_line'].setdefault('atom_list_number', int(counts_line[2])) mol_file['counts_line'].setdefault('chiral_flag', int(counts_line[4])) mol_file['counts_line'].setdefault('stext_entries', int(counts_line[5])) mol_file['counts_line'].setdefault('additional_properties', int(counts_line[9])) mol_file['counts_line'].setdefault('version', counts_line[10]) # Atom Block atom_count = mol_file['counts_line']['atom_count'] atom_block = mol[5:atom_count + 5] mol_file.setdefault('atom_block', []) for index_atom, atom in enumerate(atom_block): atom_dict = {} atom_dict.setdefault('id', index_atom + 1) atom_dict.setdefault('x', float(atom[0:10])) atom_dict.setdefault('y', float(atom[10:20])) atom_dict.setdefault('z', float(atom[20:30])) atom_dict.setdefault('symbol', atom[30:33].strip()) atom_dict.setdefault('mass_difference', float(atom[33:36])) atom_dict.setdefault('charge', int(atom[36:39])) atom_dict.setdefault('stereo_parity', int(atom[39:42])) atom_dict.setdefault('hydrogen_count', int(atom[42:45])) atom_dict.setdefault('stereo_care', int(atom[45:48])) atom_dict.setdefault('valence', int(atom[48:51])) atom_dict.setdefault('h0_designator', int(atom[51:54])) atom_dict.setdefault('aam', atom[60:63].strip()) atom_dict.setdefault('inversion_retention_flag', int(atom[63:66])) atom_dict.setdefault('exact_change_flag', int(atom[66:69])) mol_file['atom_block'].append(atom_dict) # Bond Block bond_count = mol_file['counts_line']['bond_count'] atom_end = atom_count + 5 bond_block = mol[atom_end:atom_end + bond_count] mol_file.setdefault('bond_block', []) if bond_block: for index_bond, bond in enumerate(bond_block): bond_dict = {} bond_dict.setdefault('id', index_bond + 1) bond_dict.setdefault('first_atom', int(bond[0:3])) bond_dict.setdefault('second_atom', int(bond[3:6])) bond_dict.setdefault('bond_type', int(bond[6:9])) bond_dict.setdefault('bond_stereo', int(bond[9:12])) bond_dict.setdefault('bond_topology', int(bond[12:15])) bond_dict.setdefault('reaction_center', int(bond[15:18])) mol_file['bond_block'].append(bond_dict) # Properties Block properties_block = mol[atom_end + bond_count:] properties = [] for prop_index in range(len(properties_block)): if properties_block[prop_index] == 'M END' or not properties_block[prop_index]: continue properties.append(properties_block[prop_index]) mol_file.setdefault('properties_block', properties) mol_files.append(mol_file) return mol_files def dump(obj: Any, fp: IO[str]) -> None: s = dumps(obj) fp.write(s) def dumps(obj: Any) -> str: s = '$RXN\n\n' s += f' {obj["program"]} {obj["name"]}\n\n' substrate_whitespace = ' '*(3-len(str(obj["substrate_count"]))) product_whitespace = ' '*(3-len(str(obj["product_count"]))) s += f'{substrate_whitespace}{obj["substrate_count"]}{product_whitespace}{obj["product_count"]}\n' for mol in obj['mol_files']: s += f'$MOL\n{mol["name"]}\n{mol["program"]}\n{mol["comment"]}\n' counts_line = '' for key, value in mol['counts_line'].items(): if key == 'version': white_space = ' ' else: white_space = ' '*(3-len(str(value))) counts_line += f'{white_space}{value}' counts_line += '\n' s += counts_line atom_block = '' for entry in mol['atom_block']: for key, value in entry.items(): if key in ['x', 'y', 'z']: white_space = ' '*(10-len(str(value))) atom_block += f'{white_space}{value}' elif key == 'symbol': white_space = ' ' white_space_after = ' '*(2-len(str(value))) atom_block += f'{white_space}{value}{white_space_after}' elif key == 'id': continue else: white_space = ' '*(3-len(str(value))) atom_block += f'{white_space}{value}' atom_block += '\n' s += atom_block bond_block = '' for entry in mol['bond_block']: for key, value in entry.items(): if key == 'id': continue white_space = ' '*(3-len(str(value))) bond_block += f'{white_space}{value}' bond_block += '\n' s += bond_block properties_block = '' for entry in mol['properties_block']: properties_block += entry properties_block += '\n' s += 'M END\n' return s