Source code for hoomdxml_reader.hoomdxml_reader

"""
Module for reading legacy hoomd XML formatted files.

"""

__all__ = ["System"]

import networkx as nx
import gsd.hoomd

import xml.etree.ElementTree as ET

from hoomdxml_reader.molecule import Molecule
from warnings import warn

[docs]class System(object): """ Class that stores system information from XML and GSD files. A class to load hoomd XML or GSD formatted configuration files and store the information encoded in these files. This class will also optionally goup the underlying particles into molecules, inferred based upon the connectivity of particles as defined in the bonds section of the configuration file. If an XML or GSD file is passed during instantiation, it will load and parse file provided. Parameters ---------- file : string, optional, default=None Name of the hoomd xml or gsd file to load frame : int, optional, default=0 identify_molecules : bool, optional, default=True If True, the code will group the particles based upon their underlying connectivity. Particles bonded together will be considered a molecule ignore_zero_bond_order : bool, optional, default=False If True, particles without any bonds (i.e., bond order = 0) will be ignored when identifying molecules (i.e., they will not appear in the molecule list). If False, a particle with bond order = 0 will be considered to be a molecule. molecule_dict : dict, dtype=str, optional, default=None A dict that defines the molecule 'pattern' and associated user defined name. This is used for renaming molecules automatically identified. Returns ------ """ def __init__(self, file=None, frame=0, identify_molecules=True, ignore_zero_bond_order=False, molecule_dict=None): """Initialize the System class. This initializes the System class. If an XML or GSD file is passed during instantiation, it will load and parse file provided. Parameters ---------- file : string, optional, default=None Name of the hoomd xml or gsd file to load frame : int, optional, default=0 identify_molecules : bool, optional, default=True If True, the code will group the particles based upon their underlying connectivity. Particles bonded together will be considered a molecule ignore_zero_bond_order : bool, optional, default=False If True, particles without any bonds (i.e., bond order = 0) will be ignored when identifying molecules (i.e., they will not appear in the molecule list). If False, a particle with bond order = 0 will be considered to be a molecule. molecule_dict : dict, dtype=str, optional, default=None A dict that defines the molecule 'pattern' and associated user defined name. This is used for renaming molecules automatically identified. Returns ------ """ self._filename = None self._xyz = [] self._n_particles = 0 self._types = [] self._bond_order = [] self._bonds = [] self._angles = [] self._dihedrals = [] self._impropers = [] self._charges = [] self._masses = [] self._frame = frame self._box = [] self._molecules = [] self._unique_molecules = {} self._identify_molecules = identify_molecules self._ignore_zero_bond_order = ignore_zero_bond_order if file is not None: self._filename = file ext = file.split('.')[-1] if "xml" in ext: self._load_xml() elif "gsd" in ext: self._load_gsd(frame=self._frame) if self._identify_molecules == True: self._infer_molecules() if molecule_dict is not None: self.set_molecule_name_by_dictionary(molecule_dict) def _clear(self): self._filename = None self._xyz = [] self._n_particles = 0 self._types = [] self._bond_order = [] self._bonds = [] self._angles = [] self._dihedrals = [] self._impropers = [] self._charges = [] self._masses = [] self._box = [] self._molecules = [] self._unique_molecules = {} # essentially the same workflow as the constructor
[docs] def load(self, file=None, frame=0, identify_molecules=True, ignore_zero_bond_order=False, molecule_dict=None): """Loads an xml or gsd file. Load the xml or GSD file into the system class. This function will clear any information already loaded into the System class. Parameters ---------- file : string, optional, default=None Name of the hoomd xml or gsd file to load frame : int, optional, default=0 identify_molecules : bool, optional, default=True If True, the code will group the particles based upon their underlying connectivity. Particles bonded together will be considered a molecule ignore_zero_bond_order : bool, optional, default=False If True, particles without any bonds (i.e., bond order = 0) will be ignored when identifying molecules (i.e., they will not appear in the molecule list). If False, a particle with bond order = 0 will be considered to be a molecule. molecule_dict : dict, dtype=str, optional, default=None A dict that defines the molecule 'pattern' and associated user defined name. This is used for renaming molecules automatically identified. Returns ------ """ if file is None: raise Exception("You must provide the name of the XML or GSD file to parse.") else: self._clear() self._identify_molecules = identify_molecules self._ignore_zero_bond_order = ignore_zero_bond_order self._frame = frame self._filename = file ext = file.split('.')[-1] if "xml" in ext: self._load_xml() elif "gsd" in ext: self._load_gsd(frame=self._frame) if self._identify_molecules == True: self._infer_molecules() if molecule_dict is not None: self.set_molecule_name_by_dictionary(molecule_dict)
""" # This would populate the fields from an mdtraj trajectory. # I've commented this out because mdtraj format requires us to assume information, # for example, about charge and mass, and does not including angle, dihedral, or improper information def convert_mdtraj(self, mdtraj=None, frame=0, identify_molecules=True, ignore_zero_bond_order=False): if mdtraj is None: raise Exception("mdtraj traj not defined") else: self._ignore_zero_bond_order = ignore_zero_bond_order self._identify_molecules = identify_molecules self._clear() for xyz in fconfig.particles.position: self._xyz.append(xyz) self._charges.append(0) #assumes charge is zero self._masses.append(1) #assumes mass is unity beacuse mdtraj doesn't store it. self._n_particles = len(self._xyz) for atom in mdtraj.top.atoms: self._types.append(atom.name) self._box = list(mdtraj.unitcell_lengths[frame]) self._bonds = [] for bond in mdtraj.top.bonds: temp_bond = [f'{bond.atom1.name}{bond.atom2.name}', bond.atom1.index, bond.atom2.index] self._bonds.append(temp_bond) self._calc_bond_order() if self._identify_molecules == True: self._infer_molecules() """ # generic function to parse the topology entries, # takes the element as an argument and number of entries per line def _parse_topology(self, element, length): temp_element = self._config.find(element) if temp_element is not None: temp_text = temp_element.text agg_array = [] entry_temp = temp_text.split() for i in range(0, len(entry_temp), length): temp_array = [] temp_array.append(entry_temp[i]) for j in range(1, length): temp_array.append(int(entry_temp[i+j])) agg_array.append(temp_array) return agg_array # a generic function to parse a list of floats defined in the text between opening/closing tags for a given element def _parse_floats(self, element): temp_element = self._config.find(element) temp_text = temp_element.text agg_array = [] entry_temp = temp_text.split() for i in range(0, len(entry_temp)): agg_array.append(float(entry_temp[i])) return agg_array def _calc_bond_order(self): # calculate bond_order for i in range(0, self.n_particles): self._bond_order.append(0) for bond in self._bonds: i = bond[1] j = bond[2] self._bond_order[i] += 1 self._bond_order[j] += 1 # function to load and parse the XML def _load_xml(self): self._tree = ET.parse(self._filename) self._root = self._tree.getroot() self._config = self._root.find('configuration') # parse box information box_element = self._config.find('box') self._box = [float(box_element.attrib['Lx']), float(box_element.attrib['Ly']), float(box_element.attrib['Lz'])] # parse position data pos_element = self._config.find('position') self._n_particles = int(pos_element.attrib['num']) positions_text = pos_element.text pos_temp = positions_text.split() xyz_temp = [] for i in range(0, len(pos_temp), 3): temp_array = [float(pos_temp[i]), float(pos_temp[i+1]), float(pos_temp[i+2])] self._xyz.append(temp_array) # parse types type_element = self._config.find('type') type_text = type_element.text self._types = type_text.split() # parse mass self._masses = self._parse_floats(element='mass') # parse charge self._charges = self._parse_floats(element='charge') # parse topological info self._bonds = self._parse_topology(element='bond', length=3) self._angles = self._parse_topology(element='angle', length=4) self._dihedrals = self._parse_topology(element='dihedral', length=5) self._impropers = self._parse_topology(element='improper', length=5) # calculate bond_order self._calc_bond_order() # function to load and parse the GSD def _load_gsd(self, frame): f = gsd.hoomd.open(name=self._filename, mode='rb') snapshot = f[frame] for i, xyz in enumerate(snapshot.particles.position): self._xyz.append(list(xyz)) self._masses.append(float(snapshot.particles.mass[i])) self._charges.append(float(snapshot.particles.charge[i])) self._box = [float(snapshot.configuration.box[0]), float(snapshot.configuration.box[1]), float(snapshot.configuration.box[2])] for typeid in snapshot.particles.typeid: self._types.append(snapshot.particles.types[typeid]) for bond, typeid in zip(snapshot.bonds.group,snapshot.bonds.typeid): temp_bond = [snapshot.bonds.types[typeid], int(bond[0]), int(bond[1])] self._bonds.append(temp_bond) for angle, typeid in zip(snapshot.angles.group,snapshot.angles.typeid): temp_angle = [snapshot.angles.types[typeid], int(angle[0]), int(angle[1]), int(angle[2])] self._angles.append(temp_angle) for dihedral, typeid in zip(snapshot.dihedrals.group,snapshot.dihedrals.typeid): temp_dihedral = [snapshot.dihedrals.types[typeid], int(dihedral[0]), int(dihedral[1]), int(dihedral[2]), int(dihedral[3])] self._dihedrals.append(temp_dihedral) for improper, typeid in zip(snapshot.impropers.group,snapshot.impropers.typeid): temp_improper = [snapshot.impropers.types[typeid], int(improper[0]), int(improper[1]), int(improper[2]), int(improper[3])] self._impropers.append(temp_improper) self._n_particles = len(self._xyz) # calculate bond_order self._calc_bond_order() # use networkx to create a graph, then look for which components are connected def _infer_molecules(self): self._graph = nx.Graph() for bond in self._bonds: self._graph.add_edge(bond[1],bond[2]) for c in nx.connected_components(self._graph): mol_temp = Molecule() temp = list(c) temp.sort() for item in temp: mol_temp.add_particle(item, self._types[item]) for edge in self._graph.subgraph(c).edges: mol_temp.add_bond(list(edge)) self._molecules.append(mol_temp) if self._ignore_zero_bond_order == False: for i, bo in enumerate(self._bond_order): if bo == 0: mol_temp = Molecule() mol_temp.add_particle(i, self._types[i]) self._molecules.append(mol_temp) temp_set = set() self._unique_molecules = {} for molecule in self._molecules: temp_set.add(molecule.pattern) temp_list = list(temp_set) for i, temp_str in enumerate(temp_list): self._unique_molecules[temp_str] = f'molecule{i}' for molecule in self._molecules: molecule.set_molecule_name(self._unique_molecules[molecule.pattern]) # reads in a dictionary that includes the molecule pattern as a key with the user defined name as the associated value, # and re-assigns names of each molecule found for that pattern.
[docs] def set_molecule_name_by_dictionary(self, molecule_dict): """Assign molecule names. This function will assign names to the molecules in the system based upon the provided dictionary. A pattern is constructed by concatenating partice names together into a single string. Unique patterns in the system can be found in the `unique_molecules` dictionary. Parameters ---------- molecule_dict : dict, dtype=str A dict that defines the molecule 'pattern' and associated user defined name. This is used for renaming molecules automatically identified. Molecule pattern as the key and associated molecule name as the value in the dict, i.e., {pattern: name} Returns ------- """ for mol_name in molecule_dict: self._unique_molecules[mol_name] = molecule_dict[mol_name] for molecule in self._molecules: molecule.set_molecule_name(self._unique_molecules[molecule.pattern])
@property def n_particles(self): """The total number of particles in the system Parameters ---------- Returns ------- n_particles : int Number of particles in the system """ return self._n_particles @property def n_bonds(self): """The total number of bonds in the system Parameters ---------- Returns ------- n_bonds : int Number of bonds in the system """ return len(self._bonds) @property def n_angles(self): """The total number of angles in the system Parameters ---------- Returns ------- n_angles : int Number of angles in the system """ return len(self._angles) @property def n_dihedrals(self): """The total number of dihedrals in the system Parameters ---------- Returns ------- n_dihedrals : int Number of dihedrals in the system """ return len(self._dihedrals) @property def n_impropers(self): """The total number of impropers in the system Parameters ---------- Returns ------- n_impropers : int Number of impropers in the system """ return len(self._impropers) @property def xyz(self): """A list of xyz coordinates for each particle. Parameters ---------- Returns ------- xyz : list shape=(3,n_particles), dtype=float List containing a list of x, y, z coordinates of each particle. """ return self._xyz @property def types(self): """A list of all particle types. Parameters ---------- Returns ------- types : list, shape=(1,n_particles), dtype=str List of type names of all particles in the system. """ return self._types @property def masses(self): """A list of all masses defined in the source file. Parameters ---------- Returns ------- masses : list, shape=(1,n_particles), dtype=float List of masses of all particles in the system. """ return self._masses @property def charges(self): """A list of all charges defined in the source file. Parameters ---------- Returns ------- charges : list, shape=(1,n_particles), dtype=float List of charges of all particles in the system. """ return self._charges @property def bonds(self): """A list of all bonds defined in the source file. Parameters ---------- Returns ------- bonds : list, shape=(3, n_bonds), dtype=(str, int, int) List of all bonds in the system. The first entry per bond is the name of the bond (str) as defined in the source file. """ return self._bonds @property def angles(self): """A list of all angles defined in the source file. Parameters ---------- Returns ------- angles : list, shape=(4, n_angles), dtype=(str, int, int, int) List of all angles in the system. The first entry per angles is the name of the angle (str) as defined in the source file. """ return self._angles @property def dihedrals(self): """A list of all dihedrals defined in the source file Parameters ---------- Returns ------- dihedrals : list, shape=(5, n_dihedrals), dtype=(str, int, int, int, int) List of all dihedrals in the system. The first entry per dihedral is the name of the dihedral (str) as defined in the source file. """ return self._dihedrals @property def impropers(self): """A list of all impropers defined in the source file Parameters ---------- Returns ------- impropers : list, shape=(5, n_impropers), dtype=(str, int, int, int, int) List of all impropers in the system. The first entry per improper is the name of the improper (str) as defined in the source file. """ return self._impropers @property def molecules(self): """This is a list of all molecules found in the system, where each entry corresponds to an instance of the Molecule class. Parameters ---------- Returns ------- molecules : list, dtype=Molecule List containing each molecule identified by the code. Each entry in the list is an instance of the Molecule class. """ return self._molecules @property def unique_molecules(self): """A dict that contains the molecule pattern as the key and associated molecule name as the value, for each unique molecule in the system. Parameters ---------- Returns ------- unique_molecules : dict, dtype=str A dict that provides the molecule 'pattern' and associated assigned name ("molecule{i}") for each unique molecule type identified in the system. This is useful for allowing definition of the molecule_dict for assigning molecules names. """ return self._unique_molecules @property def graph(self): """A networkx graph generated from the bond information included in the source file. Parameters ---------- Returns ------- graph : networkx graph NetworkX graph constructed from all bonds defined in the XML file """ return self._graph @property def bond_order(self): """A list containing an integer bond order (i.e., total number of bonds) of each particle in the system. Parameters ---------- Returns ------- bond_order : list, shape=(1,n_particles), dtype=int A list of the bond order of each particle in the system. """ return self._bond_order @property def box(self): """List of the box lengths defined in the source file. Parameters ---------- Returns ------- box : list, shape(3), dtype=float, List of the box length formatted as [Lx, Ly, Lz] """ return self._box