Coverage for /builds/ericyuan00000/ase/ase/io/formats.py: 89.03%

547 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-06-18 01:20 +0000

1# fmt: off 

2 

3"""File formats. 

4 

5This module implements the read(), iread() and write() functions in ase.io. 

6For each file format there is an IOFormat object. 

7 

8There is a dict, ioformats, which stores the objects. 

9 

10Example 

11======= 

12 

13The xyz format is implemented in the ase/io/xyz.py file which has a 

14read_xyz() generator and a write_xyz() function. This and other 

15information can be obtained from ioformats['xyz']. 

16""" 

17 

18import functools 

19import inspect 

20import io 

21import numbers 

22import os 

23import re 

24import sys 

25import warnings 

26from importlib import import_module 

27from importlib.metadata import entry_points 

28from pathlib import Path, PurePath 

29from typing import ( 

30 IO, 

31 Any, 

32 Dict, 

33 Iterator, 

34 List, 

35 Optional, 

36 Sequence, 

37 Tuple, 

38 Union, 

39) 

40 

41from ase.atoms import Atoms 

42from ase.parallel import parallel_function, parallel_generator 

43from ase.utils import string2index 

44from ase.utils.plugins import ExternalIOFormat 

45 

46PEEK_BYTES = 50000 

47 

48 

49class UnknownFileTypeError(Exception): 

50 pass 

51 

52 

53class IOFormat: 

54 def __init__(self, name: str, desc: str, code: str, module_name: str, 

55 encoding: str = None) -> None: 

56 self.name = name 

57 self.description = desc 

58 assert len(code) == 2 

59 assert code[0] in list('+1') 

60 assert code[1] in list('BFS') 

61 self.code = code 

62 self.module_name = module_name 

63 self.encoding = encoding 

64 

65 # (To be set by define_io_format()) 

66 self.extensions: List[str] = [] 

67 self.globs: List[str] = [] 

68 self.magic: List[str] = [] 

69 self.magic_regex: Optional[bytes] = None 

70 

71 def open(self, fname, mode: str = 'r') -> IO: 

72 # We might want append mode, too 

73 # We can allow more flags as needed (buffering etc.) 

74 if mode not in list('rwa'): 

75 raise ValueError("Only modes allowed are 'r', 'w', and 'a'") 

76 if mode == 'r' and not self.can_read: 

77 raise NotImplementedError('No reader implemented for {} format' 

78 .format(self.name)) 

79 if mode == 'w' and not self.can_write: 

80 raise NotImplementedError('No writer implemented for {} format' 

81 .format(self.name)) 

82 if mode == 'a' and not self.can_append: 

83 raise NotImplementedError('Appending not supported by {} format' 

84 .format(self.name)) 

85 

86 if self.isbinary: 

87 mode += 'b' 

88 

89 path = Path(fname) 

90 return path.open(mode, encoding=self.encoding) 

91 

92 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

93 encoding = self.encoding 

94 if encoding is None: 

95 encoding = 'utf-8' # Best hacky guess. 

96 

97 if self.isbinary: 

98 if isinstance(data, str): 

99 data = data.encode(encoding) 

100 else: 

101 if isinstance(data, bytes): 

102 data = data.decode(encoding) 

103 

104 return self._ioclass(data) 

105 

106 @property 

107 def _ioclass(self): 

108 if self.isbinary: 

109 return io.BytesIO 

110 else: 

111 return io.StringIO 

112 

113 def parse_images(self, data: Union[str, bytes], 

114 **kwargs) -> Sequence[Atoms]: 

115 with self._buf_as_filelike(data) as fd: 

116 outputs = self.read(fd, **kwargs) 

117 if self.single: 

118 assert isinstance(outputs, Atoms) 

119 return [outputs] 

120 else: 

121 return list(self.read(fd, **kwargs)) 

122 

123 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

124 images = self.parse_images(data, **kwargs) 

125 return images[-1] 

126 

127 @property 

128 def can_read(self) -> bool: 

129 return self._readfunc() is not None 

130 

131 @property 

132 def can_write(self) -> bool: 

133 return self._writefunc() is not None 

134 

135 @property 

136 def can_append(self) -> bool: 

137 writefunc = self._writefunc() 

138 return self.can_write and 'append' in writefunc.__code__.co_varnames 

139 

140 def __repr__(self) -> str: 

141 tokens = [f'{name}={value!r}' 

142 for name, value in vars(self).items()] 

143 return 'IOFormat({})'.format(', '.join(tokens)) 

144 

145 def __getitem__(self, i): 

146 # For compatibility. 

147 # 

148 # Historically, the ioformats were listed as tuples 

149 # with (description, code). We look like such a tuple. 

150 return (self.description, self.code)[i] 

151 

152 @property 

153 def single(self) -> bool: 

154 """Whether this format is for a single Atoms object.""" 

155 return self.code[0] == '1' 

156 

157 @property 

158 def _formatname(self) -> str: 

159 return self.name.replace('-', '_') 

160 

161 def _readfunc(self): 

162 return getattr(self.module, 'read_' + self._formatname, None) 

163 

164 def _writefunc(self): 

165 return getattr(self.module, 'write_' + self._formatname, None) 

166 

167 @property 

168 def read(self): 

169 if not self.can_read: 

170 self._warn_none('read') 

171 return None 

172 

173 return self._read_wrapper 

174 

175 def _read_wrapper(self, *args, **kwargs): 

176 function = self._readfunc() 

177 if function is None: 

178 self._warn_none('read') 

179 return None 

180 if not inspect.isgeneratorfunction(function): 

181 function = functools.partial(wrap_read_function, function) 

182 return function(*args, **kwargs) 

183 

184 def _warn_none(self, action): 

185 msg = ('Accessing the IOFormat.{action} property on a format ' 

186 'without {action} support will change behaviour in the ' 

187 'future and return a callable instead of None. ' 

188 'Use IOFormat.can_{action} to check whether {action} ' 

189 'is supported.') 

190 warnings.warn(msg.format(action=action), FutureWarning) 

191 

192 @property 

193 def write(self): 

194 if not self.can_write: 

195 self._warn_none('write') 

196 return None 

197 

198 return self._write_wrapper 

199 

200 def _write_wrapper(self, *args, **kwargs): 

201 function = self._writefunc() 

202 if function is None: 

203 raise ValueError(f'Cannot write to {self.name}-format') 

204 return function(*args, **kwargs) 

205 

206 @property 

207 def modes(self) -> str: 

208 modes = '' 

209 if self.can_read: 

210 modes += 'r' 

211 if self.can_write: 

212 modes += 'w' 

213 return modes 

214 

215 def full_description(self) -> str: 

216 lines = [f'Name: {self.name}', 

217 f'Description: {self.description}', 

218 f'Modes: {self.modes}', 

219 f'Encoding: {self.encoding}', 

220 f'Module: {self.module_name}', 

221 f'Code: {self.code}', 

222 f'Extensions: {self.extensions}', 

223 f'Globs: {self.globs}', 

224 f'Magic: {self.magic}'] 

225 return '\n'.join(lines) 

226 

227 @property 

228 def acceptsfd(self) -> bool: 

229 return self.code[1] != 'S' 

230 

231 @property 

232 def isbinary(self) -> bool: 

233 return self.code[1] == 'B' 

234 

235 @property 

236 def module(self): 

237 try: 

238 return import_module(self.module_name) 

239 except ImportError as err: 

240 raise UnknownFileTypeError( 

241 f'File format not recognized: {self.name}. Error: {err}') 

242 

243 def match_name(self, basename: str) -> bool: 

244 from fnmatch import fnmatch 

245 return any(fnmatch(basename, pattern) 

246 for pattern in self.globs) 

247 

248 def match_magic(self, data: bytes) -> bool: 

249 if self.magic_regex: 

250 assert not self.magic, 'Define only one of magic and magic_regex' 

251 match = re.match(self.magic_regex, data, re.M | re.S) 

252 return match is not None 

253 

254 from fnmatch import fnmatchcase 

255 return any( 

256 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var] 

257 for magic in self.magic 

258 ) 

259 

260 

261ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

262extension2format = {} 

263 

264 

265all_formats = ioformats # Aliased for compatibility only. Please do not use. 

266format2modulename = {} # Left for compatibility only. 

267 

268 

269def define_io_format(name, desc, code, *, module=None, ext=None, 

270 glob=None, magic=None, encoding=None, 

271 magic_regex=None, external=False): 

272 if module is None: 

273 module = name.replace('-', '_') 

274 format2modulename[name] = module 

275 

276 if not external: 

277 module = 'ase.io.' + module 

278 

279 def normalize_patterns(strings): 

280 if strings is None: 

281 strings = [] 

282 elif isinstance(strings, (str, bytes)): 

283 strings = [strings] 

284 else: 

285 strings = list(strings) 

286 return strings 

287 

288 fmt = IOFormat(name, desc, code, module_name=module, 

289 encoding=encoding) 

290 fmt.extensions = normalize_patterns(ext) 

291 fmt.globs = normalize_patterns(glob) 

292 fmt.magic = normalize_patterns(magic) 

293 

294 if magic_regex is not None: 

295 fmt.magic_regex = magic_regex 

296 

297 for ext in fmt.extensions: 

298 if ext in extension2format: 

299 raise ValueError(f'extension "{ext}" already registered') 

300 extension2format[ext] = fmt 

301 

302 ioformats[name] = fmt 

303 return fmt 

304 

305 

306def get_ioformat(name: str) -> IOFormat: 

307 """Return ioformat object or raise appropriate error.""" 

308 if name not in ioformats: 

309 raise UnknownFileTypeError(name) 

310 fmt = ioformats[name] 

311 # Make sure module is importable, since this could also raise an error. 

312 fmt.module 

313 return ioformats[name] 

314 

315 

316def register_external_io_formats(group): 

317 if hasattr(entry_points(), 'select'): 

318 fmt_entry_points = entry_points().select(group=group) 

319 else: 

320 fmt_entry_points = entry_points().get(group, ()) 

321 

322 for entry_point in fmt_entry_points: 

323 try: 

324 define_external_io_format(entry_point) 

325 except Exception as exc: 

326 warnings.warn( 

327 'Failed to register external ' 

328 f'IO format {entry_point.name}: {exc}' 

329 ) 

330 

331 

332def define_external_io_format(entry_point): 

333 

334 fmt = entry_point.load() 

335 if entry_point.name in ioformats: 

336 raise ValueError(f'Format {entry_point.name} already defined') 

337 if not isinstance(fmt, ExternalIOFormat): 

338 raise TypeError('Wrong type for registering external IO formats ' 

339 f'in format {entry_point.name}, expected ' 

340 'ExternalIOFormat') 

341 F(entry_point.name, **fmt._asdict(), external=True) 

342 

343 

344# We define all the IO formats below. Each IO format has a code, 

345# such as '1F', which defines some of the format's properties: 

346# 

347# 1=single atoms object 

348# +=multiple atoms objects 

349# F=accepts a file-descriptor 

350# S=needs a file-name str 

351# B=like F, but opens in binary mode 

352 

353F = define_io_format 

354F('abinit-gsr', 'ABINIT GSR file', '1S', 

355 module='abinit', glob='*o_GSR.nc') 

356F('abinit-in', 'ABINIT input file', '1F', 

357 module='abinit', magic=b'*znucl *') 

358F('abinit-out', 'ABINIT output file', '1F', 

359 module='abinit', magic=b'*.Version * of ABINIT') 

360F('aims', 'FHI-aims geometry file', '1S', ext='in') 

361F('aims-output', 'FHI-aims output', '+S', 

362 module='aims', magic=b'*Invoking FHI-aims ...') 

363F('bundletrajectory', 'ASE bundle trajectory', '+S') 

364# XXX: Define plugin in ase db backends package: 

365# F('aselmdb', 'ASE LMDB format', '+F') 

366F('castep-castep', 'CASTEP output file', '+F', 

367 module='castep', ext='castep') 

368F('castep-cell', 'CASTEP geom file', '1F', 

369 module='castep', ext='cell') 

370F('castep-geom', 'CASTEP trajectory file', '+F', 

371 module='castep', ext='geom') 

372F('castep-md', 'CASTEP molecular dynamics file', '+F', 

373 module='castep', ext='md') 

374F('castep-phonon', 'CASTEP phonon file', '1F', 

375 module='castep', ext='phonon') 

376F('cfg', 'AtomEye configuration', '1F') 

377F('cif', 'CIF-file', '+B', ext='cif') 

378F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

379F('cjson', 'Chemical json file', '1F', ext='cjson') 

380F('cp2k-dcd', 'CP2K DCD file', '+B', 

381 module='cp2k', ext='dcd') 

382F('cp2k-restart', 'CP2K restart file', '1F', 

383 module='cp2k', ext='restart') 

384F('crystal', 'Crystal fort.34 format', '1F', 

385 ext=['f34', '34'], glob=['f34', '34']) 

386F('cube', 'CUBE file', '1F', ext='cube') 

387F('dacapo-text', 'Dacapo text output', '1F', 

388 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

389F('db', 'ASE SQLite database file', '+S') 

390F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

391F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

392 module='dlp4', ext='config', glob=['*CONFIG*']) 

393F('dlp-history', 'DL_POLY HISTORY file', '+F', 

394 module='dlp4', glob='HISTORY') 

395F('dmol-arc', 'DMol3 arc file', '+S', 

396 module='dmol', ext='arc') 

397F('dmol-car', 'DMol3 structure file', '1S', 

398 module='dmol', ext='car') 

399F('dmol-incoor', 'DMol3 structure file', '1S', 

400 module='dmol') 

401F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

402 glob=['GEOMETRY.OUT']) 

403F('elk-in', 'ELK input file', '1F', module='elk') 

404F('eon', 'EON CON file', '+F', 

405 ext='con') 

406F('eps', 'Encapsulated Postscript', '1S') 

407F('espresso-in', 'Quantum espresso in file', '1F', 

408 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

409F('espresso-out', 'Quantum espresso out file', '+F', 

410 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

411F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml') 

412F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out') 

413F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

414F('findsym', 'FINDSYM-format', '+F') 

415F('gamess-us-out', 'GAMESS-US output file', '1F', 

416 module='gamess_us', magic=b'*GAMESS') 

417F('gamess-us-in', 'GAMESS-US input file', '1F', 

418 module='gamess_us') 

419F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

420 module='gamess_us', magic=b' $DATA', ext='dat') 

421F('gaussian-in', 'Gaussian com (input) file', '1F', 

422 module='gaussian', ext=['com', 'gjf']) 

423F('gaussian-out', 'Gaussian output file', '+F', 

424 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

425F('acemolecule-out', 'ACE output file', '1S', 

426 module='acemolecule') 

427F('acemolecule-input', 'ACE input file', '1S', 

428 module='acemolecule') 

429F('gen', 'DFTBPlus GEN format', '1F') 

430F('gif', 'Graphics interchange format', '+S', 

431 module='animation') 

432F('gpaw-out', 'GPAW text output', '+F', 

433 magic=b'* ___ ___ ___ _ _ _') 

434F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

435F('gpw', 'GPAW restart-file', '1S', 

436 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

437F('gromacs', 'Gromacs coordinates', '1F', 

438 ext='gro') 

439F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

440F('html', 'X3DOM HTML', '1F', module='x3d') 

441F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

442F('jsv', 'JSV file format', '1F') 

443F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

444 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

445F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

446 module='lammpsrun') 

447F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

448 encoding='ascii') 

449F('magres', 'MAGRES ab initio NMR data file', '1F') 

450F('mol', 'MDL Molfile', '1F') 

451F('mp4', 'MP4 animation', '+S', 

452 module='animation') 

453F('mustem', 'muSTEM xtl file', '1F', 

454 ext='xtl') 

455F('mysql', 'ASE MySQL database file', '+S', 

456 module='db') 

457F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

458 magic=b'CDF') 

459F('nomad-json', 'JSON from Nomad archive', '+F', 

460 ext='nomad-json') 

461F('nwchem-in', 'NWChem input file', '1F', 

462 module='nwchem', ext='nwi') 

463F('nwchem-out', 'NWChem output file', '+F', 

464 module='nwchem', ext='nwo', 

465 magic=b'*Northwest Computational Chemistry Package') 

466F('octopus-in', 'Octopus input file', '1F', 

467 module='octopus', glob='inp') 

468F('onetep-out', 'ONETEP output file', '+F', 

469 module='onetep', 

470 magic=b'*Linear-Scaling Ab Initio Total Energy Program*') 

471F('onetep-in', 'ONETEP input file', '1F', 

472 module='onetep', 

473 magic=[b'*lock species ', 

474 b'*LOCK SPECIES ', 

475 b'*--- INPUT FILE ---*']) 

476F('orca-output', 'ORCA output', '+F', 

477 module='orca', magic=b'* O R C A *') 

478F('proteindatabank', 'Protein Data Bank', '+F', 

479 ext='pdb') 

480F('png', 'Portable Network Graphics', '1B') 

481F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

482F('pov', 'Persistance of Vision', '1S') 

483# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

484# extension 

485F('prismatic', 'prismatic and computem XYZ-file', '1F') 

486F('py', 'Python file', '+F') 

487F('sys', 'qball sys file', '1F') 

488F('qbox', 'QBOX output file', '+F', 

489 magic=b'*:simulation xmlns:') 

490F('res', 'SHELX format', '1S', ext='shelx') 

491F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

492F('sdf', 'SDF format', '1F') 

493F('siesta-xv', 'Siesta .XV file', '1F', 

494 glob='*.XV', module='siesta') 

495F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

496F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

497F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

498 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

499F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

500 magic=b'$coord') 

501F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

502 module='turbomole', glob='gradient', magic=b'$grad') 

503F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

504F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

505 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

506F('vasp-out', 'VASP OUTCAR file', '+F', 

507 module='vasp', glob='*OUTCAR*') 

508F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

509 module='vasp', glob='*XDATCAR*') 

510F('vasp-xml', 'VASP vasprun.xml file', '+F', 

511 module='vasp', glob='*vasp*.xml') 

512F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

513F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

514F('wout', 'Wannier90 output', '1F', module='wannier90') 

515F('x3d', 'X3D', '1S') 

516F('xsd', 'Materials Studio file', '1F') 

517F('xsf', 'XCrySDen Structure File', '+F', 

518 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

519 b'*\nMOLECULE', b'*\nATOMS']) 

520F('xtd', 'Materials Studio file', '+F') 

521# xyz: No `ext='xyz'` in the definition below. 

522# The .xyz files are handled by the extxyz module by default. 

523F('xyz', 'XYZ-file', '+F') 

524 

525# Register IO formats exposed through the ase.ioformats entry point 

526register_external_io_formats('ase.ioformats') 

527 

528 

529def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

530 """ 

531 Parse any expected file compression from the extension of a filename. 

532 Return the filename without the extension, and the extension. Recognises 

533 ``.gz``, ``.bz2``, ``.xz``. 

534 

535 >>> get_compression('H2O.pdb.gz') 

536 ('H2O.pdb', 'gz') 

537 >>> get_compression('crystal.cif') 

538 ('crystal.cif', None) 

539 

540 Parameters 

541 ========== 

542 filename: str 

543 Full filename including extension. 

544 

545 Returns 

546 ======= 

547 (root, extension): (str, str or None) 

548 Filename split into root without extension, and the extension 

549 indicating compression format. Will not split if compression 

550 is not recognised. 

551 """ 

552 # Update if anything is added 

553 valid_compression = ['gz', 'bz2', 'xz'] 

554 

555 # Use stdlib as it handles most edge cases 

556 root, compression = os.path.splitext(filename) 

557 

558 # extension keeps the '.' so remember to remove it 

559 if compression.strip('.') in valid_compression: 

560 return root, compression.strip('.') 

561 else: 

562 return filename, None 

563 

564 

565def open_with_compression(filename: str, mode: str = 'r') -> IO: 

566 """ 

567 Wrapper around builtin `open` that will guess compression of a file 

568 from the filename and open it for reading or writing as if it were 

569 a standard file. 

570 

571 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

572 

573 Supported modes are: 

574 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

575 * 'rb, 'wb' for binary read and write. 

576 

577 Parameters 

578 ========== 

579 filename: str 

580 Path to the file to open, including any extensions that indicate 

581 the compression used. 

582 mode: str 

583 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

584 

585 Returns 

586 ======= 

587 fd: file 

588 File-like object open with the specified mode. 

589 """ 

590 

591 # Compressed formats sometimes default to binary, so force text mode. 

592 if mode == 'r': 

593 mode = 'rt' 

594 elif mode == 'w': 

595 mode = 'wt' 

596 elif mode == 'a': 

597 mode = 'at' 

598 

599 _root, compression = get_compression(filename) 

600 

601 if compression == 'gz': 

602 import gzip 

603 return gzip.open(filename, mode=mode) # type: ignore[return-value] 

604 elif compression == 'bz2': 

605 import bz2 

606 return bz2.open(filename, mode=mode) 

607 elif compression == 'xz': 

608 import lzma 

609 return lzma.open(filename, mode) 

610 else: 

611 # Either None or unknown string 

612 return open(filename, mode) 

613 

614 

615def is_compressed(fd: io.BufferedIOBase) -> bool: 

616 """Check if the file object is in a compressed format.""" 

617 compressed = False 

618 

619 # We'd like to avoid triggering imports unless already imported. 

620 # Also, Python can be compiled without e.g. lzma so we need to 

621 # protect against that: 

622 if 'gzip' in sys.modules: 

623 import gzip 

624 compressed = compressed or isinstance(fd, gzip.GzipFile) 

625 if 'bz2' in sys.modules: 

626 import bz2 

627 compressed = compressed or isinstance(fd, bz2.BZ2File) 

628 if 'lzma' in sys.modules: 

629 import lzma 

630 compressed = compressed or isinstance(fd, lzma.LZMAFile) 

631 return compressed 

632 

633 

634def wrap_read_function(read, filename, index=None, **kwargs): 

635 """Convert read-function to generator.""" 

636 if index is None: 

637 yield read(filename, **kwargs) 

638 else: 

639 yield from read(filename, index, **kwargs) 

640 

641 

642NameOrFile = Union[str, PurePath, IO] 

643 

644 

645def write( 

646 filename: NameOrFile, 

647 images: Union[Atoms, Sequence[Atoms]], 

648 format: str = None, 

649 parallel: bool = True, 

650 append: bool = False, 

651 **kwargs: Any 

652) -> None: 

653 """Write Atoms object(s) to file. 

654 

655 filename: str or file 

656 Name of the file to write to or a file descriptor. The name '-' 

657 means standard output. 

658 images: Atoms object or list of Atoms objects 

659 A single Atoms object or a list of Atoms objects. 

660 format: str 

661 Used to specify the file-format. If not given, the 

662 file-format will be taken from suffix of the filename. 

663 parallel: bool 

664 Default is to write on master only. Use parallel=False to write 

665 from all slaves. 

666 append: bool 

667 Default is to open files in 'w' or 'wb' mode, overwriting 

668 existing files. In some cases opening the file in 'a' or 'ab' 

669 mode (appending) is useful, 

670 e.g. writing trajectories or saving multiple Atoms objects in one file. 

671 WARNING: If the file format does not support multiple entries without 

672 additional keywords/headers, files created using 'append=True' 

673 might not be readable by any program! They will nevertheless be 

674 written without error message. 

675 

676 The use of additional keywords is format specific. write() may 

677 return an object after writing certain formats, but this behaviour 

678 may change in the future. 

679 

680 """ 

681 

682 if isinstance(filename, PurePath): 

683 filename = str(filename) 

684 

685 if isinstance(filename, str): 

686 fd = None 

687 if filename == '-': 

688 fd = sys.stdout 

689 filename = None # type: ignore[assignment] 

690 elif format is None: 

691 format = filetype(filename, read=False) 

692 assert isinstance(format, str) 

693 else: 

694 fd = filename # type: ignore[assignment] 

695 if format is None: 

696 try: 

697 format = filetype(filename, read=False) 

698 assert isinstance(format, str) 

699 except UnknownFileTypeError: 

700 format = None 

701 filename = None # type: ignore[assignment] 

702 

703 format = format or 'json' # default is json 

704 

705 io = get_ioformat(format) 

706 

707 return _write(filename, fd, format, io, images, 

708 parallel=parallel, append=append, **kwargs) 

709 

710 

711@parallel_function 

712def _write(filename, fd, format, io, images, parallel=None, append=False, 

713 **kwargs): 

714 if isinstance(images, Atoms): 

715 images = [images] 

716 

717 if io.single: 

718 if len(images) > 1: 

719 raise ValueError('{}-format can only store 1 Atoms object.' 

720 .format(format)) 

721 images = images[0] 

722 

723 if not io.can_write: 

724 raise ValueError(f"Can't write to {format}-format") 

725 

726 # Special case for json-format: 

727 if format == 'json' and (len(images) > 1 or append): 

728 if filename is not None: 

729 return io.write(filename, images, append=append, **kwargs) 

730 raise ValueError("Can't write more than one image to file-descriptor " 

731 'using json-format.') 

732 

733 if io.acceptsfd: 

734 open_new = (fd is None) 

735 try: 

736 if open_new: 

737 mode = 'wb' if io.isbinary else 'w' 

738 if append: 

739 mode = mode.replace('w', 'a') 

740 fd = open_with_compression(filename, mode) 

741 # XXX remember to re-enable compressed open 

742 # fd = io.open(filename, mode) 

743 return io.write(fd, images, **kwargs) 

744 finally: 

745 if open_new and fd is not None: 

746 fd.close() 

747 else: 

748 if fd is not None: 

749 raise ValueError("Can't write {}-format to file-descriptor" 

750 .format(format)) 

751 if io.can_append: 

752 return io.write(filename, images, append=append, **kwargs) 

753 elif append: 

754 raise ValueError("Cannot append to {}-format, write-function " 

755 "does not support the append keyword." 

756 .format(format)) 

757 else: 

758 return io.write(filename, images, **kwargs) 

759 

760 

761def read( 

762 filename: NameOrFile, 

763 index: Any = None, 

764 format: Optional[str] = None, 

765 parallel: bool = True, 

766 do_not_split_by_at_sign: bool = False, 

767 **kwargs 

768) -> Union[Atoms, List[Atoms]]: 

769 """Read Atoms object(s) from file. 

770 

771 filename: str or file 

772 Name of the file to read from or a file descriptor. 

773 index: int, slice or str 

774 The last configuration will be returned by default. Examples: 

775 

776 * ``index=0``: first configuration 

777 * ``index=-2``: second to last 

778 * ``index=':'`` or ``index=slice(None)``: all 

779 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

780 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

781 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

782 format: str 

783 Used to specify the file-format. If not given, the 

784 file-format will be guessed by the *filetype* function. 

785 parallel: bool 

786 Default is to read on master and broadcast to slaves. Use 

787 parallel=False to read on all slaves. 

788 do_not_split_by_at_sign: bool 

789 If False (default) ``filename`` is splitted by at sign ``@`` 

790 

791 Many formats allow on open file-like object to be passed instead 

792 of ``filename``. In this case the format cannot be auto-detected, 

793 so the ``format`` argument should be explicitly given.""" 

794 

795 if isinstance(filename, PurePath): 

796 filename = str(filename) 

797 if filename == '-': 

798 filename = sys.stdin 

799 if isinstance(index, str): 

800 try: 

801 index = string2index(index) 

802 except ValueError: 

803 pass 

804 

805 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

806 if index is None: 

807 index = -1 

808 format = format or filetype(filename, read=isinstance(filename, str)) 

809 

810 io = get_ioformat(format) 

811 if isinstance(index, (slice, str)): 

812 return list(_iread(filename, index, format, io, parallel=parallel, 

813 **kwargs)) 

814 else: 

815 return next(_iread(filename, slice(index, None), format, io, 

816 parallel=parallel, **kwargs)) 

817 

818 

819def iread( 

820 filename: NameOrFile, 

821 index: Any = None, 

822 format: str = None, 

823 parallel: bool = True, 

824 do_not_split_by_at_sign: bool = False, 

825 **kwargs 

826) -> Iterator[Atoms]: 

827 """Iterator for reading Atoms objects from file. 

828 

829 Works as the `read` function, but yields one Atoms object at a time 

830 instead of all at once.""" 

831 

832 if isinstance(filename, PurePath): 

833 filename = str(filename) 

834 

835 if isinstance(index, str): 

836 index = string2index(index) 

837 

838 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

839 

840 if index is None or index == ':': 

841 index = slice(None, None, None) 

842 

843 if not isinstance(index, (slice, str)): 

844 index = slice(index, (index + 1) or None) 

845 

846 format = format or filetype(filename, read=isinstance(filename, str)) 

847 io = get_ioformat(format) 

848 

849 yield from _iread(filename, index, format, io, parallel=parallel, 

850 **kwargs) 

851 

852 

853@parallel_generator 

854def _iread(filename, index, format, io, parallel=None, full_output=False, 

855 **kwargs): 

856 

857 if not io.can_read: 

858 raise ValueError(f"Can't read from {format}-format") 

859 

860 if io.single: 

861 start = index.start 

862 assert start is None or start == 0 or start == -1 

863 args = () 

864 else: 

865 args = (index,) 

866 

867 must_close_fd = False 

868 if isinstance(filename, str): 

869 if io.acceptsfd: 

870 mode = 'rb' if io.isbinary else 'r' 

871 fd = open_with_compression(filename, mode) 

872 must_close_fd = True 

873 else: 

874 fd = filename 

875 else: 

876 assert io.acceptsfd 

877 fd = filename 

878 

879 # Make sure fd is closed in case loop doesn't finish: 

880 try: 

881 for dct in io.read(fd, *args, **kwargs): 

882 if not isinstance(dct, dict): 

883 dct = {'atoms': dct} 

884 if full_output: 

885 yield dct 

886 else: 

887 yield dct['atoms'] 

888 finally: 

889 if must_close_fd: 

890 fd.close() 

891 

892 

893def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

894 if not isinstance(filename, str): 

895 return filename, index 

896 

897 basename = os.path.basename(filename) 

898 if do_not_split_by_at_sign or '@' not in basename: 

899 return filename, index 

900 

901 newindex = None 

902 newfilename, newindex = filename.rsplit('@', 1) 

903 

904 if isinstance(index, slice): 

905 return newfilename, index 

906 try: 

907 newindex = string2index(newindex) 

908 except ValueError: 

909 warnings.warn('Can not parse index for path \n' 

910 ' "%s" \nConsider set ' 

911 'do_not_split_by_at_sign=True \nif ' 

912 'there is no index.' % filename) 

913 return newfilename, newindex 

914 

915 

916def match_magic(data: bytes) -> IOFormat: 

917 data = data[:PEEK_BYTES] 

918 for ioformat in ioformats.values(): 

919 if ioformat.match_magic(data): 

920 return ioformat 

921 raise UnknownFileTypeError('Cannot guess file type from contents') 

922 

923 

924def filetype( 

925 filename: NameOrFile, 

926 read: bool = True, 

927 guess: bool = True, 

928) -> str: 

929 """Try to guess the type of the file. 

930 

931 First, special signatures in the filename will be checked for. If that 

932 does not identify the file type, then the first 2000 bytes of the file 

933 will be read and analysed. Turn off this second part by using 

934 read=False. 

935 

936 Can be used from the command-line also:: 

937 

938 $ ase info filename ... 

939 """ 

940 

941 orig_filename = filename 

942 if hasattr(filename, 'name'): 

943 filename = filename.name 

944 

945 ext = None 

946 if isinstance(filename, str): 

947 if os.path.isdir(filename): 

948 if os.path.basename(os.path.normpath(filename)) == 'states': 

949 return 'eon' 

950 return 'bundletrajectory' 

951 

952 if filename.startswith('postgres'): 

953 return 'postgresql' 

954 

955 if filename.startswith('mysql') or filename.startswith('mariadb'): 

956 return 'mysql' 

957 

958 if filename.endswith('aselmdb'): 

959 return 'db' 

960 

961 # strip any compression extensions that can be read 

962 root, _compression = get_compression(filename) 

963 basename = os.path.basename(root) 

964 

965 if '.' in basename: 

966 ext = os.path.splitext(basename)[1].strip('.').lower() 

967 

968 for fmt in ioformats.values(): 

969 if fmt.match_name(basename): 

970 return fmt.name 

971 

972 if not read: 

973 if ext is None: 

974 raise UnknownFileTypeError('Could not guess file type') 

975 ioformat = extension2format.get(ext) 

976 if ioformat: 

977 return ioformat.name 

978 

979 # askhl: This is strange, we don't know if ext is a format: 

980 return ext 

981 

982 if orig_filename == filename: 

983 fd = open_with_compression(filename, 'rb') 

984 else: 

985 fd = orig_filename # type: ignore[assignment] 

986 else: 

987 fd = filename 

988 if fd is sys.stdin: 

989 return 'json' 

990 

991 data = fd.read(PEEK_BYTES) 

992 if fd is not filename: 

993 fd.close() 

994 else: 

995 fd.seek(0) 

996 

997 if len(data) == 0: 

998 raise UnknownFileTypeError('Empty file: ' + filename) 

999 

1000 try: 

1001 return match_magic(data).name 

1002 except UnknownFileTypeError: 

1003 pass 

1004 

1005 format = None 

1006 if ext in extension2format: 

1007 format = extension2format[ext].name 

1008 

1009 if format is None and guess: 

1010 format = ext 

1011 if format is None: 

1012 # Do quick xyz check: 

1013 lines = data.splitlines() 

1014 if lines and lines[0].strip().isdigit(): 

1015 return extension2format['xyz'].name 

1016 

1017 raise UnknownFileTypeError('Could not guess file type') 

1018 assert isinstance(format, str) 

1019 return format 

1020 

1021 

1022def index2range(index, length): 

1023 """Convert slice or integer to range. 

1024 

1025 If index is an integer, range will contain only that integer.""" 

1026 obj = range(length)[index] 

1027 if isinstance(obj, numbers.Integral): 

1028 obj = range(obj, obj + 1) 

1029 return obj