Coverage for /builds/ericyuan00000/ase/ase/io/formats.py: 89.03%

1# fmt: off

3"""File formats.

5This module implements the read(), iread() and write() functions in ase.io.

6For each file format there is an IOFormat object.

8There is a dict, ioformats, which stores the objects.

10Example

11=======

13The xyz format is implemented in the ase/io/xyz.py file which has a

14read_xyz() generator and a write_xyz() function. This and other

15information can be obtained from ioformats['xyz'].

16"""

18import functools

19import inspect

20import io

21import numbers

22import os

23import re

24import sys

25import warnings

26from importlib import import_module

27from importlib.metadata import entry_points

28from pathlib import Path, PurePath

29from typing import (

30 IO,

31 Any,

32 Dict,

33 Iterator,

34 List,

35 Optional,

36 Sequence,

37 Tuple,

38 Union,

39)

41from ase.atoms import Atoms

42from ase.parallel import parallel_function, parallel_generator

43from ase.utils import string2index

44from ase.utils.plugins import ExternalIOFormat

46PEEK_BYTES = 50000

49class UnknownFileTypeError(Exception):

50 pass

53class IOFormat:

54 def __init__(self, name: str, desc: str, code: str, module_name: str,

55 encoding: str = None) -> None:

56 self.name = name

57 self.description = desc

58 assert len(code) == 2

59 assert code[0] in list('+1')

60 assert code[1] in list('BFS')

61 self.code = code

62 self.module_name = module_name

63 self.encoding = encoding

65 # (To be set by define_io_format())

66 self.extensions: List[str] = []

67 self.globs: List[str] = []

68 self.magic: List[str] = []

69 self.magic_regex: Optional[bytes] = None

71 def open(self, fname, mode: str = 'r') -> IO:

72 # We might want append mode, too

73 # We can allow more flags as needed (buffering etc.)

74 if mode not in list('rwa'):

75 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")

76 if mode == 'r' and not self.can_read:

77 raise NotImplementedError('No reader implemented for {} format'

78 .format(self.name))

79 if mode == 'w' and not self.can_write:

80 raise NotImplementedError('No writer implemented for {} format'

81 .format(self.name))

82 if mode == 'a' and not self.can_append:

83 raise NotImplementedError('Appending not supported by {} format'

84 .format(self.name))

86 if self.isbinary:

87 mode += 'b'

89 path = Path(fname)

90 return path.open(mode, encoding=self.encoding)

92 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:

93 encoding = self.encoding

94 if encoding is None:

95 encoding = 'utf-8' # Best hacky guess.

97 if self.isbinary:

98 if isinstance(data, str):

99 data = data.encode(encoding)

100 else:

101 if isinstance(data, bytes):

102 data = data.decode(encoding)

103

104 return self._ioclass(data)

105

106 @property

107 def _ioclass(self):

108 if self.isbinary:

109 return io.BytesIO

110 else:

111 return io.StringIO

112

113 def parse_images(self, data: Union[str, bytes],

114 **kwargs) -> Sequence[Atoms]:

115 with self._buf_as_filelike(data) as fd:

116 outputs = self.read(fd, **kwargs)

117 if self.single:

118 assert isinstance(outputs, Atoms)

119 return [outputs]

120 else:

121 return list(self.read(fd, **kwargs))

122

123 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:

124 images = self.parse_images(data, **kwargs)

125 return images[-1]

126

127 @property

128 def can_read(self) -> bool:

129 return self._readfunc() is not None

130

131 @property

132 def can_write(self) -> bool:

133 return self._writefunc() is not None

134

135 @property

136 def can_append(self) -> bool:

137 writefunc = self._writefunc()

138 return self.can_write and 'append' in writefunc.__code__.co_varnames

139

140 def __repr__(self) -> str:

141 tokens = [f'{name}={value!r}'

142 for name, value in vars(self).items()]

143 return 'IOFormat({})'.format(', '.join(tokens))

144

145 def __getitem__(self, i):

146 # For compatibility.

147 #

148 # Historically, the ioformats were listed as tuples

149 # with (description, code). We look like such a tuple.

150 return (self.description, self.code)[i]

151

152 @property

153 def single(self) -> bool:

154 """Whether this format is for a single Atoms object."""

155 return self.code[0] == '1'

156

157 @property

158 def _formatname(self) -> str:

159 return self.name.replace('-', '_')

160

161 def _readfunc(self):

162 return getattr(self.module, 'read_' + self._formatname, None)

163

164 def _writefunc(self):

165 return getattr(self.module, 'write_' + self._formatname, None)

166

167 @property

168 def read(self):

169 if not self.can_read:

170 self._warn_none('read')

171 return None

172

173 return self._read_wrapper

174

175 def _read_wrapper(self, *args, **kwargs):

176 function = self._readfunc()

177 if function is None:

178 self._warn_none('read')

179 return None

180 if not inspect.isgeneratorfunction(function):

181 function = functools.partial(wrap_read_function, function)

182 return function(*args, **kwargs)

183

184 def _warn_none(self, action):

185 msg = ('Accessing the IOFormat.{action} property on a format '

186 'without {action} support will change behaviour in the '

187 'future and return a callable instead of None. '

188 'Use IOFormat.can_{action} to check whether {action} '

189 'is supported.')

190 warnings.warn(msg.format(action=action), FutureWarning)

191

192 @property

193 def write(self):

194 if not self.can_write:

195 self._warn_none('write')

196 return None

197

198 return self._write_wrapper

199

200 def _write_wrapper(self, *args, **kwargs):

201 function = self._writefunc()

202 if function is None:

203 raise ValueError(f'Cannot write to {self.name}-format')

204 return function(*args, **kwargs)

205

206 @property

207 def modes(self) -> str:

208 modes = ''

209 if self.can_read:

210 modes += 'r'

211 if self.can_write:

212 modes += 'w'

213 return modes

214

215 def full_description(self) -> str:

216 lines = [f'Name: {self.name}',

217 f'Description: {self.description}',

218 f'Modes: {self.modes}',

219 f'Encoding: {self.encoding}',

220 f'Module: {self.module_name}',

221 f'Code: {self.code}',

222 f'Extensions: {self.extensions}',

223 f'Globs: {self.globs}',

224 f'Magic: {self.magic}']

225 return '\n'.join(lines)

226

227 @property

228 def acceptsfd(self) -> bool:

229 return self.code[1] != 'S'

230

231 @property

232 def isbinary(self) -> bool:

233 return self.code[1] == 'B'

234

235 @property

236 def module(self):

237 try:

238 return import_module(self.module_name)

239 except ImportError as err:

240 raise UnknownFileTypeError(

241 f'File format not recognized: {self.name}. Error: {err}')

242

243 def match_name(self, basename: str) -> bool:

244 from fnmatch import fnmatch

245 return any(fnmatch(basename, pattern)

246 for pattern in self.globs)

247

248 def match_magic(self, data: bytes) -> bool:

249 if self.magic_regex:

250 assert not self.magic, 'Define only one of magic and magic_regex'

251 match = re.match(self.magic_regex, data, re.M | re.S)

252 return match is not None

253

254 from fnmatch import fnmatchcase

255 return any(

256 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]

257 for magic in self.magic

258 )

259

260

261ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.

262extension2format = {}

263

264

265all_formats = ioformats # Aliased for compatibility only. Please do not use.

266format2modulename = {} # Left for compatibility only.

267

268

269def define_io_format(name, desc, code, *, module=None, ext=None,

270 glob=None, magic=None, encoding=None,

271 magic_regex=None, external=False):

272 if module is None:

273 module = name.replace('-', '_')

274 format2modulename[name] = module

275

276 if not external:

277 module = 'ase.io.' + module

278

279 def normalize_patterns(strings):

280 if strings is None:

281 strings = []

282 elif isinstance(strings, (str, bytes)):

283 strings = [strings]

284 else:

285 strings = list(strings)

286 return strings

287

288 fmt = IOFormat(name, desc, code, module_name=module,

289 encoding=encoding)

290 fmt.extensions = normalize_patterns(ext)

291 fmt.globs = normalize_patterns(glob)

292 fmt.magic = normalize_patterns(magic)

293

294 if magic_regex is not None:

295 fmt.magic_regex = magic_regex

296

297 for ext in fmt.extensions:

298 if ext in extension2format:

299 raise ValueError(f'extension "{ext}" already registered')

300 extension2format[ext] = fmt

301

302 ioformats[name] = fmt

303 return fmt

304

305

306def get_ioformat(name: str) -> IOFormat:

307 """Return ioformat object or raise appropriate error."""

308 if name not in ioformats:

309 raise UnknownFileTypeError(name)

310 fmt = ioformats[name]

311 # Make sure module is importable, since this could also raise an error.

312 fmt.module

313 return ioformats[name]

314

315

316def register_external_io_formats(group):

317 if hasattr(entry_points(), 'select'):

318 fmt_entry_points = entry_points().select(group=group)

319 else:

320 fmt_entry_points = entry_points().get(group, ())

321

322 for entry_point in fmt_entry_points:

323 try:

324 define_external_io_format(entry_point)

325 except Exception as exc:

326 warnings.warn(

327 'Failed to register external '

328 f'IO format {entry_point.name}: {exc}'

329 )

330

331

332def define_external_io_format(entry_point):

333

334 fmt = entry_point.load()

335 if entry_point.name in ioformats:

336 raise ValueError(f'Format {entry_point.name} already defined')

337 if not isinstance(fmt, ExternalIOFormat):

338 raise TypeError('Wrong type for registering external IO formats '

339 f'in format {entry_point.name}, expected '

340 'ExternalIOFormat')

341 F(entry_point.name, **fmt._asdict(), external=True)

342

343

344# We define all the IO formats below. Each IO format has a code,

345# such as '1F', which defines some of the format's properties:

346#

347# 1=single atoms object

348# +=multiple atoms objects

349# F=accepts a file-descriptor

350# S=needs a file-name str

351# B=like F, but opens in binary mode

352

353F = define_io_format

354F('abinit-gsr', 'ABINIT GSR file', '1S',

355 module='abinit', glob='*o_GSR.nc')

356F('abinit-in', 'ABINIT input file', '1F',

357 module='abinit', magic=b'*znucl *')

358F('abinit-out', 'ABINIT output file', '1F',

359 module='abinit', magic=b'*.Version * of ABINIT')

360F('aims', 'FHI-aims geometry file', '1S', ext='in')

361F('aims-output', 'FHI-aims output', '+S',

362 module='aims', magic=b'*Invoking FHI-aims ...')

363F('bundletrajectory', 'ASE bundle trajectory', '+S')

364# XXX: Define plugin in ase db backends package:

365# F('aselmdb', 'ASE LMDB format', '+F')

366F('castep-castep', 'CASTEP output file', '+F',

367 module='castep', ext='castep')

368F('castep-cell', 'CASTEP geom file', '1F',

369 module='castep', ext='cell')

370F('castep-geom', 'CASTEP trajectory file', '+F',

371 module='castep', ext='geom')

372F('castep-md', 'CASTEP molecular dynamics file', '+F',

373 module='castep', ext='md')

374F('castep-phonon', 'CASTEP phonon file', '1F',

375 module='castep', ext='phonon')

376F('cfg', 'AtomEye configuration', '1F')

377F('cif', 'CIF-file', '+B', ext='cif')

378F('cmdft', 'CMDFT-file', '1F', glob='*I_info')

379F('cjson', 'Chemical json file', '1F', ext='cjson')

380F('cp2k-dcd', 'CP2K DCD file', '+B',

381 module='cp2k', ext='dcd')

382F('cp2k-restart', 'CP2K restart file', '1F',

383 module='cp2k', ext='restart')

384F('crystal', 'Crystal fort.34 format', '1F',

385 ext=['f34', '34'], glob=['f34', '34'])

386F('cube', 'CUBE file', '1F', ext='cube')

387F('dacapo-text', 'Dacapo text output', '1F',

388 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')

389F('db', 'ASE SQLite database file', '+S')

390F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')

391F('dlp4', 'DL_POLY_4 CONFIG file', '1F',

392 module='dlp4', ext='config', glob=['*CONFIG*'])

393F('dlp-history', 'DL_POLY HISTORY file', '+F',

394 module='dlp4', glob='HISTORY')

395F('dmol-arc', 'DMol3 arc file', '+S',

396 module='dmol', ext='arc')

397F('dmol-car', 'DMol3 structure file', '1S',

398 module='dmol', ext='car')

399F('dmol-incoor', 'DMol3 structure file', '1S',

400 module='dmol')

401F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',

402 glob=['GEOMETRY.OUT'])

403F('elk-in', 'ELK input file', '1F', module='elk')

404F('eon', 'EON CON file', '+F',

405 ext='con')

406F('eps', 'Encapsulated Postscript', '1S')

407F('espresso-in', 'Quantum espresso in file', '1F',

408 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])

409F('espresso-out', 'Quantum espresso out file', '+F',

410 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')

411F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')

412F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')

413F('extxyz', 'Extended XYZ file', '+F', ext='xyz')

414F('findsym', 'FINDSYM-format', '+F')

415F('gamess-us-out', 'GAMESS-US output file', '1F',

416 module='gamess_us', magic=b'*GAMESS')

417F('gamess-us-in', 'GAMESS-US input file', '1F',

418 module='gamess_us')

419F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',

420 module='gamess_us', magic=b' $DATA', ext='dat')

421F('gaussian-in', 'Gaussian com (input) file', '1F',

422 module='gaussian', ext=['com', 'gjf'])

423F('gaussian-out', 'Gaussian output file', '+F',

424 module='gaussian', ext='log', magic=b'*Entering Gaussian System')

425F('acemolecule-out', 'ACE output file', '1S',

426 module='acemolecule')

427F('acemolecule-input', 'ACE input file', '1S',

428 module='acemolecule')

429F('gen', 'DFTBPlus GEN format', '1F')

430F('gif', 'Graphics interchange format', '+S',

431 module='animation')

432F('gpaw-out', 'GPAW text output', '+F',

433 magic=b'* ___ ___ ___ _ _ _')

434F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')

435F('gpw', 'GPAW restart-file', '1S',

436 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])

437F('gromacs', 'Gromacs coordinates', '1F',

438 ext='gro')

439F('gromos', 'Gromos96 geometry file', '1F', ext='g96')

440F('html', 'X3DOM HTML', '1F', module='x3d')

441F('json', 'ASE JSON database file', '+F', ext='json', module='db')

442F('jsv', 'JSV file format', '1F')

443F('lammps-dump-text', 'LAMMPS text dump file', '+F',

444 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')

445F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',

446 module='lammpsrun')

447F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',

448 encoding='ascii')

449F('magres', 'MAGRES ab initio NMR data file', '1F')

450F('mol', 'MDL Molfile', '1F')

451F('mp4', 'MP4 animation', '+S',

452 module='animation')

453F('mustem', 'muSTEM xtl file', '1F',

454 ext='xtl')

455F('mysql', 'ASE MySQL database file', '+S',

456 module='db')

457F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',

458 magic=b'CDF')

459F('nomad-json', 'JSON from Nomad archive', '+F',

460 ext='nomad-json')

461F('nwchem-in', 'NWChem input file', '1F',

462 module='nwchem', ext='nwi')

463F('nwchem-out', 'NWChem output file', '+F',

464 module='nwchem', ext='nwo',

465 magic=b'*Northwest Computational Chemistry Package')

466F('octopus-in', 'Octopus input file', '1F',

467 module='octopus', glob='inp')

468F('onetep-out', 'ONETEP output file', '+F',

469 module='onetep',

470 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')

471F('onetep-in', 'ONETEP input file', '1F',

472 module='onetep',

473 magic=[b'*lock species ',

474 b'*LOCK SPECIES ',

475 b'*--- INPUT FILE ---*'])

476F('orca-output', 'ORCA output', '+F',

477 module='orca', magic=b'* O R C A *')

478F('proteindatabank', 'Protein Data Bank', '+F',

479 ext='pdb')

480F('png', 'Portable Network Graphics', '1B')

481F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')

482F('pov', 'Persistance of Vision', '1S')

483# prismatic: Should have ext='xyz' if/when multiple formats can have the same

484# extension

485F('prismatic', 'prismatic and computem XYZ-file', '1F')

486F('py', 'Python file', '+F')

487F('sys', 'qball sys file', '1F')

488F('qbox', 'QBOX output file', '+F',

489 magic=b'*:simulation xmlns:')

490F('res', 'SHELX format', '1S', ext='shelx')

491F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')

492F('sdf', 'SDF format', '1F')

493F('siesta-xv', 'Siesta .XV file', '1F',

494 glob='*.XV', module='siesta')

495F('struct', 'WIEN2k structure file', '1S', module='wien2k')

496F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')

497F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',

498 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])

499F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',

500 magic=b'$coord')

501F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',

502 module='turbomole', glob='gradient', magic=b'$grad')

503F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')

504F('vasp', 'VASP POSCAR/CONTCAR', '1F',

505 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])

506F('vasp-out', 'VASP OUTCAR file', '+F',

507 module='vasp', glob='*OUTCAR*')

508F('vasp-xdatcar', 'VASP XDATCAR file', '+F',

509 module='vasp', glob='*XDATCAR*')

510F('vasp-xml', 'VASP vasprun.xml file', '+F',

511 module='vasp', glob='*vasp*.xml')

512F('vti', 'VTK XML Image Data', '1F', module='vtkxml')

513F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')

514F('wout', 'Wannier90 output', '1F', module='wannier90')

515F('x3d', 'X3D', '1S')

516F('xsd', 'Materials Studio file', '1F')

517F('xsf', 'XCrySDen Structure File', '+F',

518 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',

519 b'*\nMOLECULE', b'*\nATOMS'])

520F('xtd', 'Materials Studio file', '+F')

521# xyz: No `ext='xyz'` in the definition below.

522# The .xyz files are handled by the extxyz module by default.

523F('xyz', 'XYZ-file', '+F')

524

525# Register IO formats exposed through the ase.ioformats entry point

526register_external_io_formats('ase.ioformats')

527

528

529def get_compression(filename: str) -> Tuple[str, Optional[str]]:

530 """

531 Parse any expected file compression from the extension of a filename.

532 Return the filename without the extension, and the extension. Recognises

533 ``.gz``, ``.bz2``, ``.xz``.

534

535 >>> get_compression('H2O.pdb.gz')

536 ('H2O.pdb', 'gz')

537 >>> get_compression('crystal.cif')

538 ('crystal.cif', None)

539

540 Parameters

541 ==========

542 filename: str

543 Full filename including extension.

544

545 Returns

546 =======

547 (root, extension): (str, str or None)

548 Filename split into root without extension, and the extension

549 indicating compression format. Will not split if compression

550 is not recognised.

551 """

552 # Update if anything is added

553 valid_compression = ['gz', 'bz2', 'xz']

554

555 # Use stdlib as it handles most edge cases

556 root, compression = os.path.splitext(filename)

557

558 # extension keeps the '.' so remember to remove it

559 if compression.strip('.') in valid_compression:

560 return root, compression.strip('.')

561 else:

562 return filename, None

563

564

565def open_with_compression(filename: str, mode: str = 'r') -> IO:

566 """

567 Wrapper around builtin `open` that will guess compression of a file

568 from the filename and open it for reading or writing as if it were

569 a standard file.

570

571 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).

572

573 Supported modes are:

574 * 'r', 'rt', 'w', 'wt' for text mode read and write.

575 * 'rb, 'wb' for binary read and write.

576

577 Parameters

578 ==========

579 filename: str

580 Path to the file to open, including any extensions that indicate

581 the compression used.

582 mode: str

583 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.

584

585 Returns

586 =======

587 fd: file

588 File-like object open with the specified mode.

589 """

590

591 # Compressed formats sometimes default to binary, so force text mode.

592 if mode == 'r':

593 mode = 'rt'

594 elif mode == 'w':

595 mode = 'wt'

596 elif mode == 'a':

597 mode = 'at'

598

599 _root, compression = get_compression(filename)

600

601 if compression == 'gz':

602 import gzip

603 return gzip.open(filename, mode=mode) # type: ignore[return-value]

604 elif compression == 'bz2':

605 import bz2

606 return bz2.open(filename, mode=mode)

607 elif compression == 'xz':

608 import lzma

609 return lzma.open(filename, mode)

610 else:

611 # Either None or unknown string

612 return open(filename, mode)

613

614

615def is_compressed(fd: io.BufferedIOBase) -> bool:

616 """Check if the file object is in a compressed format."""

617 compressed = False

618

619 # We'd like to avoid triggering imports unless already imported.

620 # Also, Python can be compiled without e.g. lzma so we need to

621 # protect against that:

622 if 'gzip' in sys.modules:

623 import gzip

624 compressed = compressed or isinstance(fd, gzip.GzipFile)

625 if 'bz2' in sys.modules:

626 import bz2

627 compressed = compressed or isinstance(fd, bz2.BZ2File)

628 if 'lzma' in sys.modules:

629 import lzma

630 compressed = compressed or isinstance(fd, lzma.LZMAFile)

631 return compressed

632

633

634def wrap_read_function(read, filename, index=None, **kwargs):

635 """Convert read-function to generator."""

636 if index is None:

637 yield read(filename, **kwargs)

638 else:

639 yield from read(filename, index, **kwargs)

640

641

642NameOrFile = Union[str, PurePath, IO]

643

644

645def write(

646 filename: NameOrFile,

647 images: Union[Atoms, Sequence[Atoms]],

648 format: str = None,

649 parallel: bool = True,

650 append: bool = False,

651 **kwargs: Any

652) -> None:

653 """Write Atoms object(s) to file.

654

655 filename: str or file

656 Name of the file to write to or a file descriptor. The name '-'

657 means standard output.

658 images: Atoms object or list of Atoms objects

659 A single Atoms object or a list of Atoms objects.

660 format: str

661 Used to specify the file-format. If not given, the

662 file-format will be taken from suffix of the filename.

663 parallel: bool

664 Default is to write on master only. Use parallel=False to write

665 from all slaves.

666 append: bool

667 Default is to open files in 'w' or 'wb' mode, overwriting

668 existing files. In some cases opening the file in 'a' or 'ab'

669 mode (appending) is useful,

670 e.g. writing trajectories or saving multiple Atoms objects in one file.

671 WARNING: If the file format does not support multiple entries without

672 additional keywords/headers, files created using 'append=True'

673 might not be readable by any program! They will nevertheless be

674 written without error message.

675

676 The use of additional keywords is format specific. write() may

677 return an object after writing certain formats, but this behaviour

678 may change in the future.

679

680 """

681

682 if isinstance(filename, PurePath):

683 filename = str(filename)

684

685 if isinstance(filename, str):

686 fd = None

687 if filename == '-':

688 fd = sys.stdout

689 filename = None # type: ignore[assignment]

690 elif format is None:

691 format = filetype(filename, read=False)

692 assert isinstance(format, str)

693 else:

694 fd = filename # type: ignore[assignment]

695 if format is None:

696 try:

697 format = filetype(filename, read=False)

698 assert isinstance(format, str)

699 except UnknownFileTypeError:

700 format = None

701 filename = None # type: ignore[assignment]

702

703 format = format or 'json' # default is json

704

705 io = get_ioformat(format)

706

707 return _write(filename, fd, format, io, images,

708 parallel=parallel, append=append, **kwargs)

709

710

711@parallel_function

712def _write(filename, fd, format, io, images, parallel=None, append=False,

713 **kwargs):

714 if isinstance(images, Atoms):

715 images = [images]

716

717 if io.single:

718 if len(images) > 1:

719 raise ValueError('{}-format can only store 1 Atoms object.'

720 .format(format))

721 images = images[0]

722

723 if not io.can_write:

724 raise ValueError(f"Can't write to {format}-format")

725

726 # Special case for json-format:

727 if format == 'json' and (len(images) > 1 or append):

728 if filename is not None:

729 return io.write(filename, images, append=append, **kwargs)

730 raise ValueError("Can't write more than one image to file-descriptor "

731 'using json-format.')

732

733 if io.acceptsfd:

734 open_new = (fd is None)

735 try:

736 if open_new:

737 mode = 'wb' if io.isbinary else 'w'

738 if append:

739 mode = mode.replace('w', 'a')

740 fd = open_with_compression(filename, mode)

741 # XXX remember to re-enable compressed open

742 # fd = io.open(filename, mode)

743 return io.write(fd, images, **kwargs)

744 finally:

745 if open_new and fd is not None:

746 fd.close()

747 else:

748 if fd is not None:

749 raise ValueError("Can't write {}-format to file-descriptor"

750 .format(format))

751 if io.can_append:

752 return io.write(filename, images, append=append, **kwargs)

753 elif append:

754 raise ValueError("Cannot append to {}-format, write-function "

755 "does not support the append keyword."

756 .format(format))

757 else:

758 return io.write(filename, images, **kwargs)

759

760

761def read(

762 filename: NameOrFile,

763 index: Any = None,

764 format: Optional[str] = None,

765 parallel: bool = True,

766 do_not_split_by_at_sign: bool = False,

767 **kwargs

768) -> Union[Atoms, List[Atoms]]:

769 """Read Atoms object(s) from file.

770

771 filename: str or file

772 Name of the file to read from or a file descriptor.

773 index: int, slice or str

774 The last configuration will be returned by default. Examples:

775

776 * ``index=0``: first configuration

777 * ``index=-2``: second to last

778 * ``index=':'`` or ``index=slice(None)``: all

779 * ``index='-3:'`` or ``index=slice(-3, None)``: three last

780 * ``index='::2'`` or ``index=slice(0, None, 2)``: even

781 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd

782 format: str

783 Used to specify the file-format. If not given, the

784 file-format will be guessed by the *filetype* function.

785 parallel: bool

786 Default is to read on master and broadcast to slaves. Use

787 parallel=False to read on all slaves.

788 do_not_split_by_at_sign: bool

789 If False (default) ``filename`` is splitted by at sign ``@``

790

791 Many formats allow on open file-like object to be passed instead

792 of ``filename``. In this case the format cannot be auto-detected,

793 so the ``format`` argument should be explicitly given."""

794

795 if isinstance(filename, PurePath):

796 filename = str(filename)

797 if filename == '-':

798 filename = sys.stdin

799 if isinstance(index, str):

800 try:

801 index = string2index(index)

802 except ValueError:

803 pass

804

805 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

806 if index is None:

807 index = -1

808 format = format or filetype(filename, read=isinstance(filename, str))

809

810 io = get_ioformat(format)

811 if isinstance(index, (slice, str)):

812 return list(_iread(filename, index, format, io, parallel=parallel,

813 **kwargs))

814 else:

815 return next(_iread(filename, slice(index, None), format, io,

816 parallel=parallel, **kwargs))

817

818

819def iread(

820 filename: NameOrFile,

821 index: Any = None,

822 format: str = None,

823 parallel: bool = True,

824 do_not_split_by_at_sign: bool = False,

825 **kwargs

826) -> Iterator[Atoms]:

827 """Iterator for reading Atoms objects from file.

828

829 Works as the `read` function, but yields one Atoms object at a time

830 instead of all at once."""

831

832 if isinstance(filename, PurePath):

833 filename = str(filename)

834

835 if isinstance(index, str):

836 index = string2index(index)

837

838 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

839

840 if index is None or index == ':':

841 index = slice(None, None, None)

842

843 if not isinstance(index, (slice, str)):

844 index = slice(index, (index + 1) or None)

845

846 format = format or filetype(filename, read=isinstance(filename, str))

847 io = get_ioformat(format)

848

849 yield from _iread(filename, index, format, io, parallel=parallel,

850 **kwargs)

851

852

853@parallel_generator

854def _iread(filename, index, format, io, parallel=None, full_output=False,

855 **kwargs):

856

857 if not io.can_read:

858 raise ValueError(f"Can't read from {format}-format")

859

860 if io.single:

861 start = index.start

862 assert start is None or start == 0 or start == -1

863 args = ()

864 else:

865 args = (index,)

866

867 must_close_fd = False

868 if isinstance(filename, str):

869 if io.acceptsfd:

870 mode = 'rb' if io.isbinary else 'r'

871 fd = open_with_compression(filename, mode)

872 must_close_fd = True

873 else:

874 fd = filename

875 else:

876 assert io.acceptsfd

877 fd = filename

878

879 # Make sure fd is closed in case loop doesn't finish:

880 try:

881 for dct in io.read(fd, *args, **kwargs):

882 if not isinstance(dct, dict):

883 dct = {'atoms': dct}

884 if full_output:

885 yield dct

886 else:

887 yield dct['atoms']

888 finally:

889 if must_close_fd:

890 fd.close()

891

892

893def parse_filename(filename, index=None, do_not_split_by_at_sign=False):

894 if not isinstance(filename, str):

895 return filename, index

896

897 basename = os.path.basename(filename)

898 if do_not_split_by_at_sign or '@' not in basename:

899 return filename, index

900

901 newindex = None

902 newfilename, newindex = filename.rsplit('@', 1)

903

904 if isinstance(index, slice):

905 return newfilename, index

906 try:

907 newindex = string2index(newindex)

908 except ValueError:

909 warnings.warn('Can not parse index for path \n'

910 ' "%s" \nConsider set '

911 'do_not_split_by_at_sign=True \nif '

912 'there is no index.' % filename)

913 return newfilename, newindex

914

915

916def match_magic(data: bytes) -> IOFormat:

917 data = data[:PEEK_BYTES]

918 for ioformat in ioformats.values():

919 if ioformat.match_magic(data):

920 return ioformat

921 raise UnknownFileTypeError('Cannot guess file type from contents')

922

923

924def filetype(

925 filename: NameOrFile,

926 read: bool = True,

927 guess: bool = True,

928) -> str:

929 """Try to guess the type of the file.

930

931 First, special signatures in the filename will be checked for. If that

932 does not identify the file type, then the first 2000 bytes of the file

933 will be read and analysed. Turn off this second part by using

934 read=False.

935

936 Can be used from the command-line also::

937

938 $ ase info filename ...

939 """

940

941 orig_filename = filename

942 if hasattr(filename, 'name'):

943 filename = filename.name

944

945 ext = None

946 if isinstance(filename, str):

947 if os.path.isdir(filename):

948 if os.path.basename(os.path.normpath(filename)) == 'states':

949 return 'eon'

950 return 'bundletrajectory'

951

952 if filename.startswith('postgres'):

953 return 'postgresql'

954

955 if filename.startswith('mysql') or filename.startswith('mariadb'):

956 return 'mysql'

957

958 if filename.endswith('aselmdb'):

959 return 'db'

960

961 # strip any compression extensions that can be read

962 root, _compression = get_compression(filename)

963 basename = os.path.basename(root)

964

965 if '.' in basename:

966 ext = os.path.splitext(basename)[1].strip('.').lower()

967

968 for fmt in ioformats.values():

969 if fmt.match_name(basename):

970 return fmt.name

971

972 if not read:

973 if ext is None:

974 raise UnknownFileTypeError('Could not guess file type')

975 ioformat = extension2format.get(ext)

976 if ioformat:

977 return ioformat.name

978

979 # askhl: This is strange, we don't know if ext is a format:

980 return ext

981

982 if orig_filename == filename:

983 fd = open_with_compression(filename, 'rb')

984 else:

985 fd = orig_filename # type: ignore[assignment]

986 else:

987 fd = filename

988 if fd is sys.stdin:

989 return 'json'

990

991 data = fd.read(PEEK_BYTES)

992 if fd is not filename:

993 fd.close()

994 else:

995 fd.seek(0)

996

997 if len(data) == 0:

998 raise UnknownFileTypeError('Empty file: ' + filename)

999

1000 try:

1001 return match_magic(data).name

1002 except UnknownFileTypeError:

1003 pass

1004

1005 format = None

1006 if ext in extension2format:

1007 format = extension2format[ext].name

1008

1009 if format is None and guess:

1010 format = ext

1011 if format is None:

1012 # Do quick xyz check:

1013 lines = data.splitlines()

1014 if lines and lines[0].strip().isdigit():

1015 return extension2format['xyz'].name

1016

1017 raise UnknownFileTypeError('Could not guess file type')

1018 assert isinstance(format, str)

1019 return format

1020

1021

1022def index2range(index, length):

1023 """Convert slice or integer to range.

1024

1025 If index is an integer, range will contain only that integer."""

1026 obj = range(length)[index]

1027 if isinstance(obj, numbers.Integral):

1028 obj = range(obj, obj + 1)

1029 return obj