Coverage for /builds/ericyuan00000/ase/ase/io/lammpsrun.py: 88.69%
221 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-06-18 01:20 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-06-18 01:20 +0000
1# fmt: off
3import gzip
4import struct
5from collections import deque
6from os.path import splitext
8import numpy as np
10from ase.atoms import Atoms
11from ase.calculators.lammps import convert
12from ase.calculators.singlepoint import SinglePointCalculator
13from ase.data import atomic_masses, chemical_symbols
14from ase.parallel import paropen
15from ase.quaternions import Quaternions
18def read_lammps_dump(infileobj, **kwargs):
19 """Method which reads a LAMMPS dump file.
21 LAMMPS chooses output method depending on the given suffix:
22 - .bin : binary file
23 - .gz : output piped through gzip
24 - .mpiio: using mpiio (should be like cleartext,
25 with different ordering)
26 - else : normal clear-text format
28 :param infileobj: string to file, opened file or file-like stream
30 """
31 # !TODO: add support for lammps-regex naming schemes (output per
32 # processor and timestep wildcards)
34 opened = False
35 if isinstance(infileobj, str):
36 opened = True
37 suffix = splitext(infileobj)[-1]
38 if suffix == ".bin":
39 fileobj = paropen(infileobj, "rb")
40 elif suffix == ".gz":
41 # !TODO: save for parallel execution?
42 fileobj = gzip.open(infileobj, "rb")
43 else:
44 fileobj = paropen(infileobj)
45 else:
46 suffix = splitext(infileobj.name)[-1]
47 fileobj = infileobj
49 if suffix == ".bin":
50 out = read_lammps_dump_binary(fileobj, **kwargs)
51 if opened:
52 fileobj.close()
53 return out
55 out = read_lammps_dump_text(fileobj, **kwargs)
57 if opened:
58 fileobj.close()
60 return out
63def lammps_data_to_ase_atoms(
64 data,
65 colnames,
66 cell,
67 celldisp,
68 pbc=False,
69 atomsobj=Atoms,
70 order=True,
71 specorder=None,
72 prismobj=None,
73 units="metal",
74):
75 """Extract positions and other per-atom parameters and create Atoms
77 :param data: per atom data
78 :param colnames: index for data
79 :param cell: cell dimensions
80 :param celldisp: origin shift
81 :param pbc: periodic boundaries
82 :param atomsobj: function to create ase-Atoms object
83 :param order: sort atoms by id. Might be faster to turn off.
84 Disregarded in case `id` column is not given in file.
85 :param specorder: list of species to map lammps types to ase-species
86 (usually .dump files to not contain type to species mapping)
87 :param prismobj: Coordinate transformation between lammps and ase
88 :type prismobj: Prism
89 :param units: lammps units for unit transformation between lammps and ase
90 :returns: Atoms object
91 :rtype: Atoms
93 """
94 if len(data.shape) == 1:
95 data = data[np.newaxis, :]
97 # read IDs if given and order if needed
98 if "id" in colnames:
99 ids = data[:, colnames.index("id")].astype(int)
100 if order:
101 sort_order = np.argsort(ids)
102 data = data[sort_order, :]
104 # determine the elements
105 if "element" in colnames:
106 # priority to elements written in file
107 elements = data[:, colnames.index("element")]
108 elif "mass" in colnames:
109 # try to determine elements from masses
110 elements = [
111 _mass2element(m)
112 for m in data[:, colnames.index("mass")].astype(float)
113 ]
114 elif "type" in colnames:
115 # fall back to `types` otherwise
116 elements = data[:, colnames.index("type")].astype(int)
118 # reconstruct types from given specorder
119 if specorder:
120 elements = [specorder[t - 1] for t in elements]
121 else:
122 # todo: what if specorder give but no types?
123 # in principle the masses could work for atoms, but that needs
124 # lots of cases and new code I guess
125 raise ValueError("Cannot determine atom types form LAMMPS dump file")
127 def get_quantity(labels, quantity=None):
128 try:
129 cols = [colnames.index(label) for label in labels]
130 if quantity:
131 return convert(data[:, cols].astype(float), quantity,
132 units, "ASE")
134 return data[:, cols].astype(float)
135 except ValueError:
136 return None
138 # Positions
139 positions = None
140 scaled_positions = None
141 if "x" in colnames:
142 # doc: x, y, z = unscaled atom coordinates
143 positions = get_quantity(["x", "y", "z"], "distance")
144 elif "xs" in colnames:
145 # doc: xs,ys,zs = scaled atom coordinates
146 scaled_positions = get_quantity(["xs", "ys", "zs"])
147 elif "xu" in colnames:
148 # doc: xu,yu,zu = unwrapped atom coordinates
149 positions = get_quantity(["xu", "yu", "zu"], "distance")
150 elif "xsu" in colnames:
151 # xsu,ysu,zsu = scaled unwrapped atom coordinates
152 scaled_positions = get_quantity(["xsu", "ysu", "zsu"])
153 else:
154 raise ValueError("No atomic positions found in LAMMPS output")
156 velocities = get_quantity(["vx", "vy", "vz"], "velocity")
157 charges = get_quantity(["q"], "charge")
158 forces = get_quantity(["fx", "fy", "fz"], "force")
159 # !TODO: how need quaternions be converted?
160 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])
162 # convert cell
163 cell = convert(cell, "distance", units, "ASE")
164 celldisp = convert(celldisp, "distance", units, "ASE")
165 if prismobj:
166 celldisp = prismobj.vector_to_ase(celldisp)
167 cell = prismobj.update_cell(cell)
169 if quaternions is not None:
170 out_atoms = Quaternions(
171 symbols=elements,
172 positions=positions,
173 cell=cell,
174 celldisp=celldisp,
175 pbc=pbc,
176 quaternions=quaternions,
177 )
178 elif positions is not None:
179 # reverse coordinations transform to lammps system
180 # (for all vectors = pos, vel, force)
181 if prismobj:
182 positions = prismobj.vector_to_ase(positions, wrap=True)
184 out_atoms = atomsobj(
185 symbols=elements,
186 positions=positions,
187 pbc=pbc,
188 celldisp=celldisp,
189 cell=cell
190 )
191 elif scaled_positions is not None:
192 out_atoms = atomsobj(
193 symbols=elements,
194 scaled_positions=scaled_positions,
195 pbc=pbc,
196 celldisp=celldisp,
197 cell=cell,
198 )
200 if velocities is not None:
201 if prismobj:
202 velocities = prismobj.vector_to_ase(velocities)
203 out_atoms.set_velocities(velocities)
204 if charges is not None:
205 out_atoms.set_initial_charges([charge[0] for charge in charges])
206 if forces is not None:
207 if prismobj:
208 forces = prismobj.vector_to_ase(forces)
209 # !TODO: use another calculator if available (or move forces
210 # to atoms.property) (other problem: synchronizing
211 # parallel runs)
212 calculator = SinglePointCalculator(out_atoms, energy=0.0,
213 forces=forces)
214 out_atoms.calc = calculator
216 # process the extra columns of fixes, variables and computes
217 # that can be dumped, add as additional arrays to atoms object
218 for colname in colnames:
219 # determine if it is a compute, fix or
220 # custom property/atom (but not the quaternian)
221 if (colname.startswith('f_') or colname.startswith('v_') or
222 colname.startswith('d_') or colname.startswith('d2_') or
223 (colname.startswith('c_') and not colname.startswith('c_q['))):
224 out_atoms.new_array(colname, get_quantity([colname]),
225 dtype='float')
227 elif colname.startswith('i_') or colname.startswith('i2_'):
228 out_atoms.new_array(colname, get_quantity([colname]),
229 dtype='int')
231 return out_atoms
234def construct_cell(diagdisp, offdiag):
235 """Help function to create an ASE-cell with displacement vector from
236 the lammps coordination system parameters.
238 :param diagdisp: cell dimension convoluted with the displacement vector
239 :param offdiag: off-diagonal cell elements
240 :returns: cell and cell displacement vector
241 :rtype: tuple
242 """
243 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp
244 xy, xz, yz = offdiag
246 # create ase-cell from lammps-box
247 xhilo = (xhi - xlo) - abs(xy) - abs(xz)
248 yhilo = (yhi - ylo) - abs(yz)
249 zhilo = zhi - zlo
250 celldispx = xlo - min(0, xy) - min(0, xz)
251 celldispy = ylo - min(0, yz)
252 celldispz = zlo
253 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])
254 celldisp = np.array([celldispx, celldispy, celldispz])
256 return cell, celldisp
259def get_max_index(index):
260 if np.isscalar(index):
261 return index
262 elif isinstance(index, slice):
263 return index.stop if (index.stop is not None) else float("inf")
266def read_lammps_dump_text(fileobj, index=-1, **kwargs):
267 """Process cleartext lammps dumpfiles
269 :param fileobj: filestream providing the trajectory data
270 :param index: integer or slice object (default: get the last timestep)
271 :returns: list of Atoms objects
272 :rtype: list
273 """
274 # Load all dumped timesteps into memory simultaneously
275 lines = deque(fileobj.readlines())
276 index_end = get_max_index(index)
278 n_atoms = 0
279 images = []
281 # avoid references before assignment in case of incorrect file structure
282 cell, celldisp, pbc, info = None, None, False, {}
284 while len(lines) > n_atoms:
285 line = lines.popleft()
287 if "ITEM: TIMESTEP" in line:
288 line = lines.popleft()
289 # !TODO: pyflakes complains about this line -> do something
290 ntimestep = int(line.split()[0]) # NOQA
291 info["timestep"] = ntimestep
293 if "ITEM: NUMBER OF ATOMS" in line:
294 line = lines.popleft()
295 n_atoms = int(line.split()[0])
297 if "ITEM: BOX BOUNDS" in line:
298 # save labels behind "ITEM: BOX BOUNDS" in triclinic case
299 # (>=lammps-7Jul09)
300 tilt_items = line.split()[3:]
301 celldatarows = [lines.popleft() for _ in range(3)]
302 celldata = np.loadtxt(celldatarows)
303 diagdisp = celldata[:, :2].reshape(6, 1).flatten()
305 # determine cell tilt (triclinic case!)
306 if len(celldata[0]) > 2:
307 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"
308 # to assign tilt (vector) elements ...
309 offdiag = celldata[:, 2]
310 # ... otherwise assume default order in 3rd column
311 # (if the latter was present)
312 if len(tilt_items) >= 3:
313 sort_index = [tilt_items.index(i)
314 for i in ["xy", "xz", "yz"]]
315 offdiag = offdiag[sort_index]
316 else:
317 offdiag = (0.0,) * 3
319 cell, celldisp = construct_cell(diagdisp, offdiag)
321 # Handle pbc conditions
322 if len(tilt_items) == 3:
323 pbc_items = tilt_items
324 elif len(tilt_items) > 3:
325 pbc_items = tilt_items[3:6]
326 else:
327 pbc_items = ["f", "f", "f"]
328 pbc = ["p" in d.lower() for d in pbc_items]
330 if "ITEM: ATOMS" in line:
331 colnames = line.split()[2:]
332 datarows = [lines.popleft() for _ in range(n_atoms)]
333 data = np.loadtxt(datarows, dtype=str, ndmin=2)
334 out_atoms = lammps_data_to_ase_atoms(
335 data=data,
336 colnames=colnames,
337 cell=cell,
338 celldisp=celldisp,
339 atomsobj=Atoms,
340 pbc=pbc,
341 **kwargs,
342 )
343 out_atoms.info.update(info)
344 images.append(out_atoms)
346 if len(images) > index_end >= 0:
347 break
349 return images[index]
352def read_lammps_dump_binary(
353 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs
354):
355 """Read binary dump-files (after binary2txt.cpp from lammps/tools)
357 :param fileobj: file-stream containing the binary lammps data
358 :param index: integer or slice object (default: get the last timestep)
359 :param colnames: data is columns and identified by a header
360 :param intformat: lammps support different integer size. Parameter set \
361 at compile-time and can unfortunately not derived from data file
362 :returns: list of Atoms-objects
363 :rtype: list
364 """
365 # depending on the chosen compilation flag lammps uses either normal
366 # integers or long long for its id or timestep numbering
367 # !TODO: tags are cast to double -> missing/double ids (add check?)
368 _tagformat, bigformat = dict(
369 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")
370 )[intformat]
372 index_end = get_max_index(index)
374 # Standard columns layout from lammpsrun
375 if not colnames:
376 colnames = ["id", "type", "x", "y", "z",
377 "vx", "vy", "vz", "fx", "fy", "fz"]
379 images = []
381 # wrap struct.unpack to raise EOFError
382 def read_variables(string):
383 obj_len = struct.calcsize(string)
384 data_obj = fileobj.read(obj_len)
385 if obj_len != len(data_obj):
386 raise EOFError
387 return struct.unpack(string, data_obj)
389 while True:
390 try:
391 # Assume that the binary dump file is in the old (pre-29Oct2020)
392 # format
393 magic_string = None
395 # read header
396 ntimestep, = read_variables("=" + bigformat)
398 # In the new LAMMPS binary dump format (version 29Oct2020 and
399 # onward), a negative timestep is used to indicate that the next
400 # few bytes will contain certain metadata
401 if ntimestep < 0:
402 # First bigint was actually encoding the negative of the format
403 # name string length (we call this 'magic_string' to
404 magic_string_len = -ntimestep
406 # The next `magic_string_len` bytes will hold a string
407 # indicating the format of the dump file
408 magic_string = b''.join(read_variables(
409 "=" + str(magic_string_len) + "c"))
411 # Read endianness (integer). For now, we'll disregard the value
412 # and simply use the host machine's endianness (via '='
413 # character used with struct.calcsize).
414 #
415 # TODO: Use the endianness of the dump file in subsequent
416 # read_variables rather than just assuming it will match
417 # that of the host
418 read_variables("=i")
420 # Read revision number (integer)
421 revision, = read_variables("=i")
423 # Finally, read the actual timestep (bigint)
424 ntimestep, = read_variables("=" + bigformat)
426 _n_atoms, triclinic = read_variables("=" + bigformat + "i")
427 boundary = read_variables("=6i")
428 diagdisp = read_variables("=6d")
429 if triclinic != 0:
430 offdiag = read_variables("=3d")
431 else:
432 offdiag = (0.0,) * 3
433 size_one, = read_variables("=i")
435 if len(colnames) != size_one:
436 raise ValueError("Provided columns do not match binary file")
438 if magic_string and revision > 1:
439 # New binary dump format includes units string,
440 # columns string, and time
441 units_str_len, = read_variables("=i")
443 if units_str_len > 0:
444 # Read lammps units style
445 _ = b''.join(
446 read_variables("=" + str(units_str_len) + "c"))
448 flag, = read_variables("=c")
449 if flag != b'\x00':
450 # Flag was non-empty string
451 read_variables("=d")
453 # Length of column string
454 columns_str_len, = read_variables("=i")
456 # Read column string (e.g., "id type x y z vx vy vz fx fy fz")
457 _ = b''.join(read_variables("=" + str(columns_str_len) + "c"))
459 nchunk, = read_variables("=i")
461 # lammps cells/boxes can have different boundary conditions on each
462 # sides (makes mainly sense for different non-periodic conditions
463 # (e.g. [f]ixed and [s]hrink for a irradiation simulation))
464 # periodic case: b 0 = 'p'
465 # non-peridic cases 1: 'f', 2 : 's', 3: 'm'
466 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0
468 cell, celldisp = construct_cell(diagdisp, offdiag)
470 data = []
471 for _ in range(nchunk):
472 # number-of-data-entries
473 n_data, = read_variables("=i")
474 # retrieve per atom data
475 data += read_variables("=" + str(n_data) + "d")
476 data = np.array(data).reshape((-1, size_one))
478 # map data-chunk to ase atoms
479 out_atoms = lammps_data_to_ase_atoms(
480 data=data,
481 colnames=colnames,
482 cell=cell,
483 celldisp=celldisp,
484 pbc=pbc,
485 **kwargs
486 )
488 images.append(out_atoms)
490 # stop if requested index has been found
491 if len(images) > index_end >= 0:
492 break
494 except EOFError:
495 break
497 return images[index]
500def _mass2element(mass):
501 """
502 Guess the element corresponding to a given atomic mass.
504 :param mass: Atomic mass for searching.
505 :return: Element symbol as a string.
506 """
507 min_idx = np.argmin(np.abs(atomic_masses - mass))
508 element = chemical_symbols[min_idx]
509 return element