Coverage for /builds/ericyuan00000/ase/ase/io/magres.py: 84.52%

1# fmt: off

3"""This module provides I/O functions for the MAGRES file format, introduced

4by CASTEP as an output format to store structural data and ab-initio

5calculated NMR parameters.

6Authors: Simone Sturniolo (ase implementation), Tim Green (original magres

7 parser code)

8"""

10import re

11from collections import OrderedDict

13import numpy as np

15import ase.units

16from ase.atoms import Atoms

17from ase.calculators.singlepoint import SinglePointDFTCalculator

18from ase.spacegroup import Spacegroup

19from ase.spacegroup.spacegroup import SpacegroupNotFoundError

21_mprops = {

22 'ms': ('sigma', 1),

23 'sus': ('S', 0),

24 'efg': ('V', 1),

25 'isc': ('K', 2)}

26# (matrix name, number of atoms in interaction) for various magres quantities

29def read_magres(fd, include_unrecognised=False):

30 """

31 Reader function for magres files.

32 """

34 blocks_re = re.compile(r'[\[<](?P<block_name>.*?)[>\]](.*?)[<\[]/' +

35 r'(?P=block_name)[\]>]', re.M | re.S)

37 """

38 Here are defined the various functions required to parse

39 different blocks.

40 """

42 def tensor33(x):

43 return np.squeeze(np.reshape(x, (3, 3))).tolist()

45 def tensor31(x):

46 return np.squeeze(np.reshape(x, (3, 1))).tolist()

48 def get_version(file_contents):

49 """

50 Look for and parse the magres file format version line

51 """

53 lines = file_contents.split('\n')

54 match = re.match(r'\#\$magres-abinitio-v([0-9]+).([0-9]+)', lines[0])

56 if match:

57 version = match.groups()

58 version = tuple(vnum for vnum in version)

59 else:

60 version = None

62 return version

64 def parse_blocks(file_contents):

65 """

66 Parse series of XML-like deliminated blocks into a list of

67 (block_name, contents) tuples

68 """

70 blocks = blocks_re.findall(file_contents)

72 return blocks

74 def parse_block(block):

75 """

76 Parse block contents into a series of (tag, data) records

77 """

79 def clean_line(line):

80 # Remove comments and whitespace at start and ends of line

81 line = re.sub('#(.*?)\n', '', line)

82 line = line.strip()

84 return line

86 name, data = block

88 lines = [clean_line(line) for line in data.split('\n')]

90 records = []

92 for line in lines:

93 xs = line.split()

95 if len(xs) > 0:

96 tag = xs[0]

97 data = xs[1:]

99 records.append((tag, data))

100

101 return (name, records)

102

103 def check_units(d):

104 """

105 Verify that given units for a particular tag are correct.

106 """

107

108 allowed_units = {'lattice': 'Angstrom',

109 'atom': 'Angstrom',

110 'ms': 'ppm',

111 'efg': 'au',

112 'efg_local': 'au',

113 'efg_nonlocal': 'au',

114 'isc': '10^19.T^2.J^-1',

115 'isc_fc': '10^19.T^2.J^-1',

116 'isc_orbital_p': '10^19.T^2.J^-1',

117 'isc_orbital_d': '10^19.T^2.J^-1',

118 'isc_spin': '10^19.T^2.J^-1',

119 'sus': '10^-6.cm^3.mol^-1',

120 'calc_cutoffenergy': 'Hartree', }

121

122 if d[0] in d and d[1] == allowed_units[d[0]]:

123 pass

124 else:

125 raise RuntimeError(f'Unrecognized units: {d[0]} {d[1]}')

126

127 return d

128

129 def parse_magres_block(block):

130 """

131 Parse magres block into data dictionary given list of record

132 tuples.

133 """

134

135 _name, records = block

136

137 # 3x3 tensor

138 def ntensor33(name):

139 return lambda d: {name: tensor33([float(x) for x in data])}

140

141 # Atom label, atom index and 3x3 tensor

142 def sitensor33(name):

143 return lambda d: {'atom': {'label': data[0],

144 'index': int(data[1])},

145 name: tensor33([float(x) for x in data[2:]])}

146

147 # 2x(Atom label, atom index) and 3x3 tensor

148 def sisitensor33(name):

149 return lambda d: {'atom1': {'label': data[0],

150 'index': int(data[1])},

151 'atom2': {'label': data[2],

152 'index': int(data[3])},

153 name: tensor33([float(x) for x in data[4:]])}

154

155 tags = {'ms': sitensor33('sigma'),

156 'sus': ntensor33('S'),

157 'efg': sitensor33('V'),

158 'efg_local': sitensor33('V'),

159 'efg_nonlocal': sitensor33('V'),

160 'isc': sisitensor33('K'),

161 'isc_fc': sisitensor33('K'),

162 'isc_spin': sisitensor33('K'),

163 'isc_orbital_p': sisitensor33('K'),

164 'isc_orbital_d': sisitensor33('K'),

165 'units': check_units}

166

167 data_dict = {}

168

169 for record in records:

170 tag, data = record

171

172 if tag not in data_dict:

173 data_dict[tag] = []

174

175 data_dict[tag].append(tags[tag](data))

176

177 return data_dict

178

179 def parse_atoms_block(block):

180 """

181 Parse atoms block into data dictionary given list of record tuples.

182 """

183

184 _name, records = block

185

186 # Lattice record: a1, a2 a3, b1, b2, b3, c1, c2 c3

187 def lattice(d):

188 return tensor33([float(x) for x in data])

189

190 # Atom record: label, index, x, y, z

191 def atom(d):

192 return {'species': data[0],

193 'label': data[1],

194 'index': int(data[2]),

195 'position': tensor31([float(x) for x in data[3:]])}

196

197 def symmetry(d):

198 return ' '.join(data)

199

200 tags = {'lattice': lattice,

201 'atom': atom,

202 'units': check_units,

203 'symmetry': symmetry}

204

205 data_dict = {}

206

207 for record in records:

208 tag, data = record

209 if tag not in data_dict:

210 data_dict[tag] = []

211 data_dict[tag].append(tags[tag](data))

212

213 return data_dict

214

215 def parse_generic_block(block):

216 """

217 Parse any other block into data dictionary given list of record

218 tuples.

219 """

220

221 _name, records = block

222

223 data_dict = {}

224

225 for record in records:

226 tag, data = record

227

228 if tag not in data_dict:

229 data_dict[tag] = []

230

231 data_dict[tag].append(data)

232

233 return data_dict

234

235 """

236 Actual parser code.

237 """

238

239 block_parsers = {'magres': parse_magres_block,

240 'atoms': parse_atoms_block,

241 'calculation': parse_generic_block, }

242

243 file_contents = fd.read()

244

245 # Solve incompatibility for atomic indices above 100

246 pattern_ms = r'(ms [a-zA-Z]{1,2})(\d+)'

247 file_contents = re.sub(pattern_ms, r'\g<1> \g<2>', file_contents)

248 pattern_efg = r'(efg [a-zA-Z]{1,2})(\d+)'

249 file_contents = re.sub(pattern_efg, r'\g<1> \g<2>', file_contents)

250

251 # This works as a validity check

252 version = get_version(file_contents)

253 if version is None:

254 # This isn't even a .magres file!

255 raise RuntimeError('File is not in standard Magres format')

256 blocks = parse_blocks(file_contents)

257

258 data_dict = {}

259

260 for block_data in blocks:

261 block = parse_block(block_data)

262

263 if block[0] in block_parsers:

264 block_dict = block_parsers[block[0]](block)

265 data_dict[block[0]] = block_dict

266 else:

267 # Throw in the text content of blocks we don't recognise

268 if include_unrecognised:

269 data_dict[block[0]] = block_data[1]

270

271 # Now the loaded data must be turned into an ASE Atoms object

272

273 # First check if the file is even viable

274 if 'atoms' not in data_dict:

275 raise RuntimeError('Magres file does not contain structure data')

276

277 # Allowed units handling. This is redundant for now but

278 # could turn out useful in the future

279

280 magres_units = {'Angstrom': ase.units.Ang}

281

282 # Lattice parameters?

283 if 'lattice' in data_dict['atoms']:

284 try:

285 u = dict(data_dict['atoms']['units'])['lattice']

286 except KeyError:

287 raise RuntimeError('No units detected in file for lattice')

288 u = magres_units[u]

289 cell = np.array(data_dict['atoms']['lattice'][0]) * u

290 pbc = True

291 else:

292 cell = None

293 pbc = False

294

295 # Now the atoms

296 symbols = []

297 positions = []

298 indices = []

299 labels = []

300

301 if 'atom' in data_dict['atoms']:

302 try:

303 u = dict(data_dict['atoms']['units'])['atom']

304 except KeyError:

305 raise RuntimeError('No units detected in file for atom positions')

306 u = magres_units[u]

307 # Now we have to account for the possibility of there being CASTEP

308 # 'custom' species amongst the symbols

309 custom_species = None

310 for a in data_dict['atoms']['atom']:

311 spec_custom = a['species'].split(':', 1)

312 if len(spec_custom) > 1 and custom_species is None:

313 # Add it to the custom info!

314 custom_species = list(symbols)

315 symbols.append(spec_custom[0])

316 positions.append(a['position'])

317 indices.append(a['index'])

318 labels.append(a['label'])

319 if custom_species is not None:

320 custom_species.append(a['species'])

321

322 atoms = Atoms(cell=cell,

323 pbc=pbc,

324 symbols=symbols,

325 positions=positions)

326

327 # Add custom species if present

328 if custom_species is not None:

329 atoms.new_array('castep_custom_species', np.array(custom_species))

330

331 # Add the spacegroup, if present and recognizable

332 if 'symmetry' in data_dict['atoms']:

333 try:

334 spg = Spacegroup(data_dict['atoms']['symmetry'][0])

335 except SpacegroupNotFoundError:

336 # Not found

337 spg = Spacegroup(1) # Most generic one

338 atoms.info['spacegroup'] = spg

339

340 # Set up the rest of the properties as arrays

341 atoms.new_array('indices', np.array(indices))

342 atoms.new_array('labels', np.array(labels))

343

344 # Now for the magres specific stuff

345 li_list = list(zip(labels, indices))

346

347 def create_magres_array(name, order, block):

348

349 if order == 1:

350 u_arr = [None] * len(li_list)

351 elif order == 2:

352 u_arr = [[None] * (i + 1) for i in range(len(li_list))]

353 else:

354 raise ValueError(

355 'Invalid order value passed to create_magres_array')

356

357 for s in block:

358 # Find the atom index/indices

359 if order == 1:

360 # First find out which atom this is

361 at = (s['atom']['label'], s['atom']['index'])

362 try:

363 ai = li_list.index(at)

364 except ValueError:

365 raise RuntimeError('Invalid data in magres block')

366 # Then add the relevant quantity

367 u_arr[ai] = s[mn]

368 else:

369 at1 = (s['atom1']['label'], s['atom1']['index'])

370 at2 = (s['atom2']['label'], s['atom2']['index'])

371 ai1 = li_list.index(at1)

372 ai2 = li_list.index(at2)

373 # Sort them

374 ai1, ai2 = sorted((ai1, ai2), reverse=True)

375 u_arr[ai1][ai2] = s[mn]

376

377 if order == 1:

378 return np.array(u_arr)

379 else:

380 return np.array(u_arr, dtype=object)

381

382 if 'magres' in data_dict:

383 if 'units' in data_dict['magres']:

384 atoms.info['magres_units'] = dict(data_dict['magres']['units'])

385 for u in atoms.info['magres_units']:

386 # This bit to keep track of tags

387 u0 = u.split('_')[0]

388

389 if u0 not in _mprops:

390 raise RuntimeError('Invalid data in magres block')

391

392 mn, order = _mprops[u0]

393

394 if order > 0:

395 u_arr = create_magres_array(mn, order,

396 data_dict['magres'][u])

397 atoms.new_array(u, u_arr)

398 else:

399 # atoms.info['magres_data'] = atoms.info.get('magres_data',

400 # {})

401 # # We only take element 0 because for this sort of data

402 # # there should be only that

403 # atoms.info['magres_data'][u] = \

404 # data_dict['magres'][u][0][mn]

405 if atoms.calc is None:

406 calc = SinglePointDFTCalculator(atoms)

407 atoms.calc = calc

408 atoms.calc.results[u] = data_dict['magres'][u][0][mn]

409

410 if 'calculation' in data_dict:

411 atoms.info['magresblock_calculation'] = data_dict['calculation']

412

413 if include_unrecognised:

414 for b in data_dict:

415 if b not in block_parsers:

416 atoms.info['magresblock_' + b] = data_dict[b]

417

418 return atoms

419

420

421def tensor_string(tensor):

422 return ' '.join(' '.join(str(x) for x in xs) for xs in tensor)

423

424

425def write_magres(fd, image):

426 """

427 A writing function for magres files. Two steps: first data are arranged

428 into structures, then dumped to the actual file

429 """

430

431 image_data = {}

432 image_data['atoms'] = {'units': []}

433 # Contains units, lattice and each individual atom

434 if np.all(image.get_pbc()):

435 # Has lattice!

436 image_data['atoms']['units'].append(['lattice', 'Angstrom'])

437 image_data['atoms']['lattice'] = [image.get_cell()]

438

439 # Now for the atoms

440 if image.has('labels'):

441 labels = image.get_array('labels')

442 else:

443 labels = image.get_chemical_symbols()

444

445 if image.has('indices'):

446 indices = image.get_array('indices')

447 else:

448 indices = [labels[:i + 1].count(labels[i]) for i in range(len(labels))]

449

450 # Iterate over atoms

451 symbols = (image.get_array('castep_custom_species')

452 if image.has('castep_custom_species')

453 else image.get_chemical_symbols())

454

455 atom_info = list(zip(symbols,

456 image.get_positions()))

457 if len(atom_info) > 0:

458 image_data['atoms']['units'].append(['atom', 'Angstrom'])

459 image_data['atoms']['atom'] = []

460

461 for i, a in enumerate(atom_info):

462 image_data['atoms']['atom'].append({

463 'index': indices[i],

464 'position': a[1],

465 'species': a[0],

466 'label': labels[i]})

467

468 # Spacegroup, if present

469 if 'spacegroup' in image.info:

470 image_data['atoms']['symmetry'] = [image.info['spacegroup']

471 .symbol.replace(' ', '')]

472

473 # Now go on to do the same for magres information

474 if 'magres_units' in image.info:

475

476 image_data['magres'] = {'units': []}

477

478 for u in image.info['magres_units']:

479 # Get the type

480 p = u.split('_')[0]

481 if p in _mprops:

482 image_data['magres']['units'].append(

483 [u, image.info['magres_units'][u]])

484 image_data['magres'][u] = []

485 mn, order = _mprops[p]

486

487 if order == 0:

488 # The case of susceptibility

489 tens = {

490 mn: image.calc.results[u]

491 }

492 image_data['magres'][u] = tens

493 else:

494 arr = image.get_array(u)

495 li_tab = zip(labels, indices)

496 for i, (lab, ind) in enumerate(li_tab):

497 if order == 2:

498 for j, (lab2, ind2) in enumerate(li_tab[:i + 1]):

499 if arr[i][j] is not None:

500 tens = {mn: arr[i][j],

501 'atom1': {'label': lab,

502 'index': ind},

503 'atom2': {'label': lab2,

504 'index': ind2}}

505 image_data['magres'][u].append(tens)

506 elif order == 1:

507 if arr[i] is not None:

508 tens = {mn: arr[i],

509 'atom': {'label': lab,

510 'index': ind}}

511 image_data['magres'][u].append(tens)

512

513 # Calculation block, if present

514 if 'magresblock_calculation' in image.info:

515 image_data['calculation'] = image.info['magresblock_calculation']

516

517 def write_units(data, out):

518 if 'units' in data:

519 for tag, units in data['units']:

520 out.append(f' units {tag} {units}')

521

522 def write_magres_block(data):

523 """

524 Write out a <magres> block from its dictionary representation

525 """

526

527 out = []

528

529 def nout(tag, tensor_name):

530 if tag in data:

531 out.append(' '.join([' ', tag,

532 tensor_string(data[tag][tensor_name])]))

533

534 def siout(tag, tensor_name):

535 if tag in data:

536 for atom_si in data[tag]:

537 out.append((' %s %s %d '

538 '%s') % (tag,

539 atom_si['atom']['label'],

540 atom_si['atom']['index'],

541 tensor_string(atom_si[tensor_name])))

542

543 write_units(data, out)

544

545 nout('sus', 'S')

546 siout('ms', 'sigma')

547

548 siout('efg_local', 'V')

549 siout('efg_nonlocal', 'V')

550 siout('efg', 'V')

551

552 def sisiout(tag, tensor_name):

553 if tag in data:

554 for isc in data[tag]:

555 out.append((' %s %s %d %s %d '

556 '%s') % (tag,

557 isc['atom1']['label'],

558 isc['atom1']['index'],

559 isc['atom2']['label'],

560 isc['atom2']['index'],

561 tensor_string(isc[tensor_name])))

562

563 sisiout('isc_fc', 'K')

564 sisiout('isc_orbital_p', 'K')

565 sisiout('isc_orbital_d', 'K')

566 sisiout('isc_spin', 'K')

567 sisiout('isc', 'K')

568

569 return '\n'.join(out)

570

571 def write_atoms_block(data):

572 out = []

573

574 write_units(data, out)

575

576 if 'lattice' in data:

577 for lat in data['lattice']:

578 out.append(f" lattice {tensor_string(lat)}")

579

580 if 'symmetry' in data:

581 for sym in data['symmetry']:

582 out.append(f' symmetry {sym}')

583

584 if 'atom' in data:

585 for a in data['atom']:

586 out.append((' atom %s %s %s '

587 '%s') % (a['species'],

588 a['label'],

589 a['index'],

590 ' '.join(str(x) for x in a['position'])))

591

592 return '\n'.join(out)

593

594 def write_generic_block(data):

595 out = []

596

597 for tag, data in data.items():

598 for value in data:

599 out.append('{} {}'.format(tag, ' '.join(str(x) for x in value)))

600

601 return '\n'.join(out)

602

603 # Using this to preserve order

604 block_writers = OrderedDict([('calculation', write_generic_block),

605 ('atoms', write_atoms_block),

606 ('magres', write_magres_block)])

607

608 # First, write the header

609 fd.write('#$magres-abinitio-v1.0\n')

610 fd.write('# Generated by the Atomic Simulation Environment library\n')

611

612 for b in block_writers:

613 if b in image_data:

614 fd.write(f'[{b}]\n')

615 fd.write(block_writers[b](image_data[b]))

616 fd.write(f'\n[/{b}]\n')

617

618 # Now on to check for any non-standard blocks...

619 for i in image.info:

620 if '_' in i:

621 ismag, b = i.split('_', 1)

622 if ismag == 'magresblock' and b not in block_writers:

623 fd.write(f'[{b}]\n')

624 fd.write(image.info[i])

625 fd.write(f'[/{b}]\n')