Coverage for /builds/ericyuan00000/ase/ase/io/lammpsrun.py: 88.69%

221 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-06-18 01:20 +0000

1# fmt: off 

2 

3import gzip 

4import struct 

5from collections import deque 

6from os.path import splitext 

7 

8import numpy as np 

9 

10from ase.atoms import Atoms 

11from ase.calculators.lammps import convert 

12from ase.calculators.singlepoint import SinglePointCalculator 

13from ase.data import atomic_masses, chemical_symbols 

14from ase.parallel import paropen 

15from ase.quaternions import Quaternions 

16 

17 

18def read_lammps_dump(infileobj, **kwargs): 

19 """Method which reads a LAMMPS dump file. 

20 

21 LAMMPS chooses output method depending on the given suffix: 

22 - .bin : binary file 

23 - .gz : output piped through gzip 

24 - .mpiio: using mpiio (should be like cleartext, 

25 with different ordering) 

26 - else : normal clear-text format 

27 

28 :param infileobj: string to file, opened file or file-like stream 

29 

30 """ 

31 # !TODO: add support for lammps-regex naming schemes (output per 

32 # processor and timestep wildcards) 

33 

34 opened = False 

35 if isinstance(infileobj, str): 

36 opened = True 

37 suffix = splitext(infileobj)[-1] 

38 if suffix == ".bin": 

39 fileobj = paropen(infileobj, "rb") 

40 elif suffix == ".gz": 

41 # !TODO: save for parallel execution? 

42 fileobj = gzip.open(infileobj, "rb") 

43 else: 

44 fileobj = paropen(infileobj) 

45 else: 

46 suffix = splitext(infileobj.name)[-1] 

47 fileobj = infileobj 

48 

49 if suffix == ".bin": 

50 out = read_lammps_dump_binary(fileobj, **kwargs) 

51 if opened: 

52 fileobj.close() 

53 return out 

54 

55 out = read_lammps_dump_text(fileobj, **kwargs) 

56 

57 if opened: 

58 fileobj.close() 

59 

60 return out 

61 

62 

63def lammps_data_to_ase_atoms( 

64 data, 

65 colnames, 

66 cell, 

67 celldisp, 

68 pbc=False, 

69 atomsobj=Atoms, 

70 order=True, 

71 specorder=None, 

72 prismobj=None, 

73 units="metal", 

74): 

75 """Extract positions and other per-atom parameters and create Atoms 

76 

77 :param data: per atom data 

78 :param colnames: index for data 

79 :param cell: cell dimensions 

80 :param celldisp: origin shift 

81 :param pbc: periodic boundaries 

82 :param atomsobj: function to create ase-Atoms object 

83 :param order: sort atoms by id. Might be faster to turn off. 

84 Disregarded in case `id` column is not given in file. 

85 :param specorder: list of species to map lammps types to ase-species 

86 (usually .dump files to not contain type to species mapping) 

87 :param prismobj: Coordinate transformation between lammps and ase 

88 :type prismobj: Prism 

89 :param units: lammps units for unit transformation between lammps and ase 

90 :returns: Atoms object 

91 :rtype: Atoms 

92 

93 """ 

94 if len(data.shape) == 1: 

95 data = data[np.newaxis, :] 

96 

97 # read IDs if given and order if needed 

98 if "id" in colnames: 

99 ids = data[:, colnames.index("id")].astype(int) 

100 if order: 

101 sort_order = np.argsort(ids) 

102 data = data[sort_order, :] 

103 

104 # determine the elements 

105 if "element" in colnames: 

106 # priority to elements written in file 

107 elements = data[:, colnames.index("element")] 

108 elif "mass" in colnames: 

109 # try to determine elements from masses 

110 elements = [ 

111 _mass2element(m) 

112 for m in data[:, colnames.index("mass")].astype(float) 

113 ] 

114 elif "type" in colnames: 

115 # fall back to `types` otherwise 

116 elements = data[:, colnames.index("type")].astype(int) 

117 

118 # reconstruct types from given specorder 

119 if specorder: 

120 elements = [specorder[t - 1] for t in elements] 

121 else: 

122 # todo: what if specorder give but no types? 

123 # in principle the masses could work for atoms, but that needs 

124 # lots of cases and new code I guess 

125 raise ValueError("Cannot determine atom types form LAMMPS dump file") 

126 

127 def get_quantity(labels, quantity=None): 

128 try: 

129 cols = [colnames.index(label) for label in labels] 

130 if quantity: 

131 return convert(data[:, cols].astype(float), quantity, 

132 units, "ASE") 

133 

134 return data[:, cols].astype(float) 

135 except ValueError: 

136 return None 

137 

138 # Positions 

139 positions = None 

140 scaled_positions = None 

141 if "x" in colnames: 

142 # doc: x, y, z = unscaled atom coordinates 

143 positions = get_quantity(["x", "y", "z"], "distance") 

144 elif "xs" in colnames: 

145 # doc: xs,ys,zs = scaled atom coordinates 

146 scaled_positions = get_quantity(["xs", "ys", "zs"]) 

147 elif "xu" in colnames: 

148 # doc: xu,yu,zu = unwrapped atom coordinates 

149 positions = get_quantity(["xu", "yu", "zu"], "distance") 

150 elif "xsu" in colnames: 

151 # xsu,ysu,zsu = scaled unwrapped atom coordinates 

152 scaled_positions = get_quantity(["xsu", "ysu", "zsu"]) 

153 else: 

154 raise ValueError("No atomic positions found in LAMMPS output") 

155 

156 velocities = get_quantity(["vx", "vy", "vz"], "velocity") 

157 charges = get_quantity(["q"], "charge") 

158 forces = get_quantity(["fx", "fy", "fz"], "force") 

159 # !TODO: how need quaternions be converted? 

160 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"]) 

161 

162 # convert cell 

163 cell = convert(cell, "distance", units, "ASE") 

164 celldisp = convert(celldisp, "distance", units, "ASE") 

165 if prismobj: 

166 celldisp = prismobj.vector_to_ase(celldisp) 

167 cell = prismobj.update_cell(cell) 

168 

169 if quaternions is not None: 

170 out_atoms = Quaternions( 

171 symbols=elements, 

172 positions=positions, 

173 cell=cell, 

174 celldisp=celldisp, 

175 pbc=pbc, 

176 quaternions=quaternions, 

177 ) 

178 elif positions is not None: 

179 # reverse coordinations transform to lammps system 

180 # (for all vectors = pos, vel, force) 

181 if prismobj: 

182 positions = prismobj.vector_to_ase(positions, wrap=True) 

183 

184 out_atoms = atomsobj( 

185 symbols=elements, 

186 positions=positions, 

187 pbc=pbc, 

188 celldisp=celldisp, 

189 cell=cell 

190 ) 

191 elif scaled_positions is not None: 

192 out_atoms = atomsobj( 

193 symbols=elements, 

194 scaled_positions=scaled_positions, 

195 pbc=pbc, 

196 celldisp=celldisp, 

197 cell=cell, 

198 ) 

199 

200 if velocities is not None: 

201 if prismobj: 

202 velocities = prismobj.vector_to_ase(velocities) 

203 out_atoms.set_velocities(velocities) 

204 if charges is not None: 

205 out_atoms.set_initial_charges([charge[0] for charge in charges]) 

206 if forces is not None: 

207 if prismobj: 

208 forces = prismobj.vector_to_ase(forces) 

209 # !TODO: use another calculator if available (or move forces 

210 # to atoms.property) (other problem: synchronizing 

211 # parallel runs) 

212 calculator = SinglePointCalculator(out_atoms, energy=0.0, 

213 forces=forces) 

214 out_atoms.calc = calculator 

215 

216 # process the extra columns of fixes, variables and computes 

217 # that can be dumped, add as additional arrays to atoms object 

218 for colname in colnames: 

219 # determine if it is a compute, fix or 

220 # custom property/atom (but not the quaternian) 

221 if (colname.startswith('f_') or colname.startswith('v_') or 

222 colname.startswith('d_') or colname.startswith('d2_') or 

223 (colname.startswith('c_') and not colname.startswith('c_q['))): 

224 out_atoms.new_array(colname, get_quantity([colname]), 

225 dtype='float') 

226 

227 elif colname.startswith('i_') or colname.startswith('i2_'): 

228 out_atoms.new_array(colname, get_quantity([colname]), 

229 dtype='int') 

230 

231 return out_atoms 

232 

233 

234def construct_cell(diagdisp, offdiag): 

235 """Help function to create an ASE-cell with displacement vector from 

236 the lammps coordination system parameters. 

237 

238 :param diagdisp: cell dimension convoluted with the displacement vector 

239 :param offdiag: off-diagonal cell elements 

240 :returns: cell and cell displacement vector 

241 :rtype: tuple 

242 """ 

243 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp 

244 xy, xz, yz = offdiag 

245 

246 # create ase-cell from lammps-box 

247 xhilo = (xhi - xlo) - abs(xy) - abs(xz) 

248 yhilo = (yhi - ylo) - abs(yz) 

249 zhilo = zhi - zlo 

250 celldispx = xlo - min(0, xy) - min(0, xz) 

251 celldispy = ylo - min(0, yz) 

252 celldispz = zlo 

253 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]]) 

254 celldisp = np.array([celldispx, celldispy, celldispz]) 

255 

256 return cell, celldisp 

257 

258 

259def get_max_index(index): 

260 if np.isscalar(index): 

261 return index 

262 elif isinstance(index, slice): 

263 return index.stop if (index.stop is not None) else float("inf") 

264 

265 

266def read_lammps_dump_text(fileobj, index=-1, **kwargs): 

267 """Process cleartext lammps dumpfiles 

268 

269 :param fileobj: filestream providing the trajectory data 

270 :param index: integer or slice object (default: get the last timestep) 

271 :returns: list of Atoms objects 

272 :rtype: list 

273 """ 

274 # Load all dumped timesteps into memory simultaneously 

275 lines = deque(fileobj.readlines()) 

276 index_end = get_max_index(index) 

277 

278 n_atoms = 0 

279 images = [] 

280 

281 # avoid references before assignment in case of incorrect file structure 

282 cell, celldisp, pbc, info = None, None, False, {} 

283 

284 while len(lines) > n_atoms: 

285 line = lines.popleft() 

286 

287 if "ITEM: TIMESTEP" in line: 

288 line = lines.popleft() 

289 # !TODO: pyflakes complains about this line -> do something 

290 ntimestep = int(line.split()[0]) # NOQA 

291 info["timestep"] = ntimestep 

292 

293 if "ITEM: NUMBER OF ATOMS" in line: 

294 line = lines.popleft() 

295 n_atoms = int(line.split()[0]) 

296 

297 if "ITEM: BOX BOUNDS" in line: 

298 # save labels behind "ITEM: BOX BOUNDS" in triclinic case 

299 # (>=lammps-7Jul09) 

300 tilt_items = line.split()[3:] 

301 celldatarows = [lines.popleft() for _ in range(3)] 

302 celldata = np.loadtxt(celldatarows) 

303 diagdisp = celldata[:, :2].reshape(6, 1).flatten() 

304 

305 # determine cell tilt (triclinic case!) 

306 if len(celldata[0]) > 2: 

307 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS" 

308 # to assign tilt (vector) elements ... 

309 offdiag = celldata[:, 2] 

310 # ... otherwise assume default order in 3rd column 

311 # (if the latter was present) 

312 if len(tilt_items) >= 3: 

313 sort_index = [tilt_items.index(i) 

314 for i in ["xy", "xz", "yz"]] 

315 offdiag = offdiag[sort_index] 

316 else: 

317 offdiag = (0.0,) * 3 

318 

319 cell, celldisp = construct_cell(diagdisp, offdiag) 

320 

321 # Handle pbc conditions 

322 if len(tilt_items) == 3: 

323 pbc_items = tilt_items 

324 elif len(tilt_items) > 3: 

325 pbc_items = tilt_items[3:6] 

326 else: 

327 pbc_items = ["f", "f", "f"] 

328 pbc = ["p" in d.lower() for d in pbc_items] 

329 

330 if "ITEM: ATOMS" in line: 

331 colnames = line.split()[2:] 

332 datarows = [lines.popleft() for _ in range(n_atoms)] 

333 data = np.loadtxt(datarows, dtype=str, ndmin=2) 

334 out_atoms = lammps_data_to_ase_atoms( 

335 data=data, 

336 colnames=colnames, 

337 cell=cell, 

338 celldisp=celldisp, 

339 atomsobj=Atoms, 

340 pbc=pbc, 

341 **kwargs, 

342 ) 

343 out_atoms.info.update(info) 

344 images.append(out_atoms) 

345 

346 if len(images) > index_end >= 0: 

347 break 

348 

349 return images[index] 

350 

351 

352def read_lammps_dump_binary( 

353 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs 

354): 

355 """Read binary dump-files (after binary2txt.cpp from lammps/tools) 

356 

357 :param fileobj: file-stream containing the binary lammps data 

358 :param index: integer or slice object (default: get the last timestep) 

359 :param colnames: data is columns and identified by a header 

360 :param intformat: lammps support different integer size. Parameter set \ 

361 at compile-time and can unfortunately not derived from data file 

362 :returns: list of Atoms-objects 

363 :rtype: list 

364 """ 

365 # depending on the chosen compilation flag lammps uses either normal 

366 # integers or long long for its id or timestep numbering 

367 # !TODO: tags are cast to double -> missing/double ids (add check?) 

368 _tagformat, bigformat = dict( 

369 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q") 

370 )[intformat] 

371 

372 index_end = get_max_index(index) 

373 

374 # Standard columns layout from lammpsrun 

375 if not colnames: 

376 colnames = ["id", "type", "x", "y", "z", 

377 "vx", "vy", "vz", "fx", "fy", "fz"] 

378 

379 images = [] 

380 

381 # wrap struct.unpack to raise EOFError 

382 def read_variables(string): 

383 obj_len = struct.calcsize(string) 

384 data_obj = fileobj.read(obj_len) 

385 if obj_len != len(data_obj): 

386 raise EOFError 

387 return struct.unpack(string, data_obj) 

388 

389 while True: 

390 try: 

391 # Assume that the binary dump file is in the old (pre-29Oct2020) 

392 # format 

393 magic_string = None 

394 

395 # read header 

396 ntimestep, = read_variables("=" + bigformat) 

397 

398 # In the new LAMMPS binary dump format (version 29Oct2020 and 

399 # onward), a negative timestep is used to indicate that the next 

400 # few bytes will contain certain metadata 

401 if ntimestep < 0: 

402 # First bigint was actually encoding the negative of the format 

403 # name string length (we call this 'magic_string' to 

404 magic_string_len = -ntimestep 

405 

406 # The next `magic_string_len` bytes will hold a string 

407 # indicating the format of the dump file 

408 magic_string = b''.join(read_variables( 

409 "=" + str(magic_string_len) + "c")) 

410 

411 # Read endianness (integer). For now, we'll disregard the value 

412 # and simply use the host machine's endianness (via '=' 

413 # character used with struct.calcsize). 

414 # 

415 # TODO: Use the endianness of the dump file in subsequent 

416 # read_variables rather than just assuming it will match 

417 # that of the host 

418 read_variables("=i") 

419 

420 # Read revision number (integer) 

421 revision, = read_variables("=i") 

422 

423 # Finally, read the actual timestep (bigint) 

424 ntimestep, = read_variables("=" + bigformat) 

425 

426 _n_atoms, triclinic = read_variables("=" + bigformat + "i") 

427 boundary = read_variables("=6i") 

428 diagdisp = read_variables("=6d") 

429 if triclinic != 0: 

430 offdiag = read_variables("=3d") 

431 else: 

432 offdiag = (0.0,) * 3 

433 size_one, = read_variables("=i") 

434 

435 if len(colnames) != size_one: 

436 raise ValueError("Provided columns do not match binary file") 

437 

438 if magic_string and revision > 1: 

439 # New binary dump format includes units string, 

440 # columns string, and time 

441 units_str_len, = read_variables("=i") 

442 

443 if units_str_len > 0: 

444 # Read lammps units style 

445 _ = b''.join( 

446 read_variables("=" + str(units_str_len) + "c")) 

447 

448 flag, = read_variables("=c") 

449 if flag != b'\x00': 

450 # Flag was non-empty string 

451 read_variables("=d") 

452 

453 # Length of column string 

454 columns_str_len, = read_variables("=i") 

455 

456 # Read column string (e.g., "id type x y z vx vy vz fx fy fz") 

457 _ = b''.join(read_variables("=" + str(columns_str_len) + "c")) 

458 

459 nchunk, = read_variables("=i") 

460 

461 # lammps cells/boxes can have different boundary conditions on each 

462 # sides (makes mainly sense for different non-periodic conditions 

463 # (e.g. [f]ixed and [s]hrink for a irradiation simulation)) 

464 # periodic case: b 0 = 'p' 

465 # non-peridic cases 1: 'f', 2 : 's', 3: 'm' 

466 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0 

467 

468 cell, celldisp = construct_cell(diagdisp, offdiag) 

469 

470 data = [] 

471 for _ in range(nchunk): 

472 # number-of-data-entries 

473 n_data, = read_variables("=i") 

474 # retrieve per atom data 

475 data += read_variables("=" + str(n_data) + "d") 

476 data = np.array(data).reshape((-1, size_one)) 

477 

478 # map data-chunk to ase atoms 

479 out_atoms = lammps_data_to_ase_atoms( 

480 data=data, 

481 colnames=colnames, 

482 cell=cell, 

483 celldisp=celldisp, 

484 pbc=pbc, 

485 **kwargs 

486 ) 

487 

488 images.append(out_atoms) 

489 

490 # stop if requested index has been found 

491 if len(images) > index_end >= 0: 

492 break 

493 

494 except EOFError: 

495 break 

496 

497 return images[index] 

498 

499 

500def _mass2element(mass): 

501 """ 

502 Guess the element corresponding to a given atomic mass. 

503 

504 :param mass: Atomic mass for searching. 

505 :return: Element symbol as a string. 

506 """ 

507 min_idx = np.argmin(np.abs(atomic_masses - mass)) 

508 element = chemical_symbols[min_idx] 

509 return element