Coverage for /builds/ericyuan00000/ase/ase/io/lammpsrun.py: 88.69%

1# fmt: off

3import gzip

4import struct

5from collections import deque

6from os.path import splitext

8import numpy as np

10from ase.atoms import Atoms

11from ase.calculators.lammps import convert

12from ase.calculators.singlepoint import SinglePointCalculator

13from ase.data import atomic_masses, chemical_symbols

14from ase.parallel import paropen

15from ase.quaternions import Quaternions

18def read_lammps_dump(infileobj, **kwargs):

19 """Method which reads a LAMMPS dump file.

21 LAMMPS chooses output method depending on the given suffix:

22 - .bin : binary file

23 - .gz : output piped through gzip

24 - .mpiio: using mpiio (should be like cleartext,

25 with different ordering)

26 - else : normal clear-text format

28 :param infileobj: string to file, opened file or file-like stream

30 """

31 # !TODO: add support for lammps-regex naming schemes (output per

32 # processor and timestep wildcards)

34 opened = False

35 if isinstance(infileobj, str):

36 opened = True

37 suffix = splitext(infileobj)[-1]

38 if suffix == ".bin":

39 fileobj = paropen(infileobj, "rb")

40 elif suffix == ".gz":

41 # !TODO: save for parallel execution?

42 fileobj = gzip.open(infileobj, "rb")

43 else:

44 fileobj = paropen(infileobj)

45 else:

46 suffix = splitext(infileobj.name)[-1]

47 fileobj = infileobj

49 if suffix == ".bin":

50 out = read_lammps_dump_binary(fileobj, **kwargs)

51 if opened:

52 fileobj.close()

53 return out

55 out = read_lammps_dump_text(fileobj, **kwargs)

57 if opened:

58 fileobj.close()

60 return out

63def lammps_data_to_ase_atoms(

64 data,

65 colnames,

66 cell,

67 celldisp,

68 pbc=False,

69 atomsobj=Atoms,

70 order=True,

71 specorder=None,

72 prismobj=None,

73 units="metal",

74):

75 """Extract positions and other per-atom parameters and create Atoms

77 :param data: per atom data

78 :param colnames: index for data

79 :param cell: cell dimensions

80 :param celldisp: origin shift

81 :param pbc: periodic boundaries

82 :param atomsobj: function to create ase-Atoms object

83 :param order: sort atoms by id. Might be faster to turn off.

84 Disregarded in case `id` column is not given in file.

85 :param specorder: list of species to map lammps types to ase-species

86 (usually .dump files to not contain type to species mapping)

87 :param prismobj: Coordinate transformation between lammps and ase

88 :type prismobj: Prism

89 :param units: lammps units for unit transformation between lammps and ase

90 :returns: Atoms object

91 :rtype: Atoms

93 """

94 if len(data.shape) == 1:

95 data = data[np.newaxis, :]

97 # read IDs if given and order if needed

98 if "id" in colnames:

99 ids = data[:, colnames.index("id")].astype(int)

100 if order:

101 sort_order = np.argsort(ids)

102 data = data[sort_order, :]

103

104 # determine the elements

105 if "element" in colnames:

106 # priority to elements written in file

107 elements = data[:, colnames.index("element")]

108 elif "mass" in colnames:

109 # try to determine elements from masses

110 elements = [

111 _mass2element(m)

112 for m in data[:, colnames.index("mass")].astype(float)

113 ]

114 elif "type" in colnames:

115 # fall back to `types` otherwise

116 elements = data[:, colnames.index("type")].astype(int)

117

118 # reconstruct types from given specorder

119 if specorder:

120 elements = [specorder[t - 1] for t in elements]

121 else:

122 # todo: what if specorder give but no types?

123 # in principle the masses could work for atoms, but that needs

124 # lots of cases and new code I guess

125 raise ValueError("Cannot determine atom types form LAMMPS dump file")

126

127 def get_quantity(labels, quantity=None):

128 try:

129 cols = [colnames.index(label) for label in labels]

130 if quantity:

131 return convert(data[:, cols].astype(float), quantity,

132 units, "ASE")

133

134 return data[:, cols].astype(float)

135 except ValueError:

136 return None

137

138 # Positions

139 positions = None

140 scaled_positions = None

141 if "x" in colnames:

142 # doc: x, y, z = unscaled atom coordinates

143 positions = get_quantity(["x", "y", "z"], "distance")

144 elif "xs" in colnames:

145 # doc: xs,ys,zs = scaled atom coordinates

146 scaled_positions = get_quantity(["xs", "ys", "zs"])

147 elif "xu" in colnames:

148 # doc: xu,yu,zu = unwrapped atom coordinates

149 positions = get_quantity(["xu", "yu", "zu"], "distance")

150 elif "xsu" in colnames:

151 # xsu,ysu,zsu = scaled unwrapped atom coordinates

152 scaled_positions = get_quantity(["xsu", "ysu", "zsu"])

153 else:

154 raise ValueError("No atomic positions found in LAMMPS output")

155

156 velocities = get_quantity(["vx", "vy", "vz"], "velocity")

157 charges = get_quantity(["q"], "charge")

158 forces = get_quantity(["fx", "fy", "fz"], "force")

159 # !TODO: how need quaternions be converted?

160 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])

161

162 # convert cell

163 cell = convert(cell, "distance", units, "ASE")

164 celldisp = convert(celldisp, "distance", units, "ASE")

165 if prismobj:

166 celldisp = prismobj.vector_to_ase(celldisp)

167 cell = prismobj.update_cell(cell)

168

169 if quaternions is not None:

170 out_atoms = Quaternions(

171 symbols=elements,

172 positions=positions,

173 cell=cell,

174 celldisp=celldisp,

175 pbc=pbc,

176 quaternions=quaternions,

177 )

178 elif positions is not None:

179 # reverse coordinations transform to lammps system

180 # (for all vectors = pos, vel, force)

181 if prismobj:

182 positions = prismobj.vector_to_ase(positions, wrap=True)

183

184 out_atoms = atomsobj(

185 symbols=elements,

186 positions=positions,

187 pbc=pbc,

188 celldisp=celldisp,

189 cell=cell

190 )

191 elif scaled_positions is not None:

192 out_atoms = atomsobj(

193 symbols=elements,

194 scaled_positions=scaled_positions,

195 pbc=pbc,

196 celldisp=celldisp,

197 cell=cell,

198 )

199

200 if velocities is not None:

201 if prismobj:

202 velocities = prismobj.vector_to_ase(velocities)

203 out_atoms.set_velocities(velocities)

204 if charges is not None:

205 out_atoms.set_initial_charges([charge[0] for charge in charges])

206 if forces is not None:

207 if prismobj:

208 forces = prismobj.vector_to_ase(forces)

209 # !TODO: use another calculator if available (or move forces

210 # to atoms.property) (other problem: synchronizing

211 # parallel runs)

212 calculator = SinglePointCalculator(out_atoms, energy=0.0,

213 forces=forces)

214 out_atoms.calc = calculator

215

216 # process the extra columns of fixes, variables and computes

217 # that can be dumped, add as additional arrays to atoms object

218 for colname in colnames:

219 # determine if it is a compute, fix or

220 # custom property/atom (but not the quaternian)

221 if (colname.startswith('f_') or colname.startswith('v_') or

222 colname.startswith('d_') or colname.startswith('d2_') or

223 (colname.startswith('c_') and not colname.startswith('c_q['))):

224 out_atoms.new_array(colname, get_quantity([colname]),

225 dtype='float')

226

227 elif colname.startswith('i_') or colname.startswith('i2_'):

228 out_atoms.new_array(colname, get_quantity([colname]),

229 dtype='int')

230

231 return out_atoms

232

233

234def construct_cell(diagdisp, offdiag):

235 """Help function to create an ASE-cell with displacement vector from

236 the lammps coordination system parameters.

237

238 :param diagdisp: cell dimension convoluted with the displacement vector

239 :param offdiag: off-diagonal cell elements

240 :returns: cell and cell displacement vector

241 :rtype: tuple

242 """

243 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp

244 xy, xz, yz = offdiag

245

246 # create ase-cell from lammps-box

247 xhilo = (xhi - xlo) - abs(xy) - abs(xz)

248 yhilo = (yhi - ylo) - abs(yz)

249 zhilo = zhi - zlo

250 celldispx = xlo - min(0, xy) - min(0, xz)

251 celldispy = ylo - min(0, yz)

252 celldispz = zlo

253 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])

254 celldisp = np.array([celldispx, celldispy, celldispz])

255

256 return cell, celldisp

257

258

259def get_max_index(index):

260 if np.isscalar(index):

261 return index

262 elif isinstance(index, slice):

263 return index.stop if (index.stop is not None) else float("inf")

264

265

266def read_lammps_dump_text(fileobj, index=-1, **kwargs):

267 """Process cleartext lammps dumpfiles

268

269 :param fileobj: filestream providing the trajectory data

270 :param index: integer or slice object (default: get the last timestep)

271 :returns: list of Atoms objects

272 :rtype: list

273 """

274 # Load all dumped timesteps into memory simultaneously

275 lines = deque(fileobj.readlines())

276 index_end = get_max_index(index)

277

278 n_atoms = 0

279 images = []

280

281 # avoid references before assignment in case of incorrect file structure

282 cell, celldisp, pbc, info = None, None, False, {}

283

284 while len(lines) > n_atoms:

285 line = lines.popleft()

286

287 if "ITEM: TIMESTEP" in line:

288 line = lines.popleft()

289 # !TODO: pyflakes complains about this line -> do something

290 ntimestep = int(line.split()[0]) # NOQA

291 info["timestep"] = ntimestep

292

293 if "ITEM: NUMBER OF ATOMS" in line:

294 line = lines.popleft()

295 n_atoms = int(line.split()[0])

296

297 if "ITEM: BOX BOUNDS" in line:

298 # save labels behind "ITEM: BOX BOUNDS" in triclinic case

299 # (>=lammps-7Jul09)

300 tilt_items = line.split()[3:]

301 celldatarows = [lines.popleft() for _ in range(3)]

302 celldata = np.loadtxt(celldatarows)

303 diagdisp = celldata[:, :2].reshape(6, 1).flatten()

304

305 # determine cell tilt (triclinic case!)

306 if len(celldata[0]) > 2:

307 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"

308 # to assign tilt (vector) elements ...

309 offdiag = celldata[:, 2]

310 # ... otherwise assume default order in 3rd column

311 # (if the latter was present)

312 if len(tilt_items) >= 3:

313 sort_index = [tilt_items.index(i)

314 for i in ["xy", "xz", "yz"]]

315 offdiag = offdiag[sort_index]

316 else:

317 offdiag = (0.0,) * 3

318

319 cell, celldisp = construct_cell(diagdisp, offdiag)

320

321 # Handle pbc conditions

322 if len(tilt_items) == 3:

323 pbc_items = tilt_items

324 elif len(tilt_items) > 3:

325 pbc_items = tilt_items[3:6]

326 else:

327 pbc_items = ["f", "f", "f"]

328 pbc = ["p" in d.lower() for d in pbc_items]

329

330 if "ITEM: ATOMS" in line:

331 colnames = line.split()[2:]

332 datarows = [lines.popleft() for _ in range(n_atoms)]

333 data = np.loadtxt(datarows, dtype=str, ndmin=2)

334 out_atoms = lammps_data_to_ase_atoms(

335 data=data,

336 colnames=colnames,

337 cell=cell,

338 celldisp=celldisp,

339 atomsobj=Atoms,

340 pbc=pbc,

341 **kwargs,

342 )

343 out_atoms.info.update(info)

344 images.append(out_atoms)

345

346 if len(images) > index_end >= 0:

347 break

348

349 return images[index]

350

351

352def read_lammps_dump_binary(

353 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs

354):

355 """Read binary dump-files (after binary2txt.cpp from lammps/tools)

356

357 :param fileobj: file-stream containing the binary lammps data

358 :param index: integer or slice object (default: get the last timestep)

359 :param colnames: data is columns and identified by a header

360 :param intformat: lammps support different integer size. Parameter set \

361 at compile-time and can unfortunately not derived from data file

362 :returns: list of Atoms-objects

363 :rtype: list

364 """

365 # depending on the chosen compilation flag lammps uses either normal

366 # integers or long long for its id or timestep numbering

367 # !TODO: tags are cast to double -> missing/double ids (add check?)

368 _tagformat, bigformat = dict(

369 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")

370 )[intformat]

371

372 index_end = get_max_index(index)

373

374 # Standard columns layout from lammpsrun

375 if not colnames:

376 colnames = ["id", "type", "x", "y", "z",

377 "vx", "vy", "vz", "fx", "fy", "fz"]

378

379 images = []

380

381 # wrap struct.unpack to raise EOFError

382 def read_variables(string):

383 obj_len = struct.calcsize(string)

384 data_obj = fileobj.read(obj_len)

385 if obj_len != len(data_obj):

386 raise EOFError

387 return struct.unpack(string, data_obj)

388

389 while True:

390 try:

391 # Assume that the binary dump file is in the old (pre-29Oct2020)

392 # format

393 magic_string = None

394

395 # read header

396 ntimestep, = read_variables("=" + bigformat)

397

398 # In the new LAMMPS binary dump format (version 29Oct2020 and

399 # onward), a negative timestep is used to indicate that the next

400 # few bytes will contain certain metadata

401 if ntimestep < 0:

402 # First bigint was actually encoding the negative of the format

403 # name string length (we call this 'magic_string' to

404 magic_string_len = -ntimestep

405

406 # The next `magic_string_len` bytes will hold a string

407 # indicating the format of the dump file

408 magic_string = b''.join(read_variables(

409 "=" + str(magic_string_len) + "c"))

410

411 # Read endianness (integer). For now, we'll disregard the value

412 # and simply use the host machine's endianness (via '='

413 # character used with struct.calcsize).

414 #

415 # TODO: Use the endianness of the dump file in subsequent

416 # read_variables rather than just assuming it will match

417 # that of the host

418 read_variables("=i")

419

420 # Read revision number (integer)

421 revision, = read_variables("=i")

422

423 # Finally, read the actual timestep (bigint)

424 ntimestep, = read_variables("=" + bigformat)

425

426 _n_atoms, triclinic = read_variables("=" + bigformat + "i")

427 boundary = read_variables("=6i")

428 diagdisp = read_variables("=6d")

429 if triclinic != 0:

430 offdiag = read_variables("=3d")

431 else:

432 offdiag = (0.0,) * 3

433 size_one, = read_variables("=i")

434

435 if len(colnames) != size_one:

436 raise ValueError("Provided columns do not match binary file")

437

438 if magic_string and revision > 1:

439 # New binary dump format includes units string,

440 # columns string, and time

441 units_str_len, = read_variables("=i")

442

443 if units_str_len > 0:

444 # Read lammps units style

445 _ = b''.join(

446 read_variables("=" + str(units_str_len) + "c"))

447

448 flag, = read_variables("=c")

449 if flag != b'\x00':

450 # Flag was non-empty string

451 read_variables("=d")

452

453 # Length of column string

454 columns_str_len, = read_variables("=i")

455

456 # Read column string (e.g., "id type x y z vx vy vz fx fy fz")

457 _ = b''.join(read_variables("=" + str(columns_str_len) + "c"))

458

459 nchunk, = read_variables("=i")

460

461 # lammps cells/boxes can have different boundary conditions on each

462 # sides (makes mainly sense for different non-periodic conditions

463 # (e.g. [f]ixed and [s]hrink for a irradiation simulation))

464 # periodic case: b 0 = 'p'

465 # non-peridic cases 1: 'f', 2 : 's', 3: 'm'

466 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0

467

468 cell, celldisp = construct_cell(diagdisp, offdiag)

469

470 data = []

471 for _ in range(nchunk):

472 # number-of-data-entries

473 n_data, = read_variables("=i")

474 # retrieve per atom data

475 data += read_variables("=" + str(n_data) + "d")

476 data = np.array(data).reshape((-1, size_one))

477

478 # map data-chunk to ase atoms

479 out_atoms = lammps_data_to_ase_atoms(

480 data=data,

481 colnames=colnames,

482 cell=cell,

483 celldisp=celldisp,

484 pbc=pbc,

485 **kwargs

486 )

487

488 images.append(out_atoms)

489

490 # stop if requested index has been found

491 if len(images) > index_end >= 0:

492 break

493

494 except EOFError:

495 break

496

497 return images[index]

498

499

500def _mass2element(mass):

501 """

502 Guess the element corresponding to a given atomic mass.

503

504 :param mass: Atomic mass for searching.

505 :return: Element symbol as a string.

506 """

507 min_idx = np.argmin(np.abs(atomic_masses - mass))

508 element = chemical_symbols[min_idx]

509 return element