Coverage for /builds/ericyuan00000/ase/ase/io/formats.py: 89.03%
547 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-06-18 01:20 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-06-18 01:20 +0000
1# fmt: off
3"""File formats.
5This module implements the read(), iread() and write() functions in ase.io.
6For each file format there is an IOFormat object.
8There is a dict, ioformats, which stores the objects.
10Example
11=======
13The xyz format is implemented in the ase/io/xyz.py file which has a
14read_xyz() generator and a write_xyz() function. This and other
15information can be obtained from ioformats['xyz'].
16"""
18import functools
19import inspect
20import io
21import numbers
22import os
23import re
24import sys
25import warnings
26from importlib import import_module
27from importlib.metadata import entry_points
28from pathlib import Path, PurePath
29from typing import (
30 IO,
31 Any,
32 Dict,
33 Iterator,
34 List,
35 Optional,
36 Sequence,
37 Tuple,
38 Union,
39)
41from ase.atoms import Atoms
42from ase.parallel import parallel_function, parallel_generator
43from ase.utils import string2index
44from ase.utils.plugins import ExternalIOFormat
46PEEK_BYTES = 50000
49class UnknownFileTypeError(Exception):
50 pass
53class IOFormat:
54 def __init__(self, name: str, desc: str, code: str, module_name: str,
55 encoding: str = None) -> None:
56 self.name = name
57 self.description = desc
58 assert len(code) == 2
59 assert code[0] in list('+1')
60 assert code[1] in list('BFS')
61 self.code = code
62 self.module_name = module_name
63 self.encoding = encoding
65 # (To be set by define_io_format())
66 self.extensions: List[str] = []
67 self.globs: List[str] = []
68 self.magic: List[str] = []
69 self.magic_regex: Optional[bytes] = None
71 def open(self, fname, mode: str = 'r') -> IO:
72 # We might want append mode, too
73 # We can allow more flags as needed (buffering etc.)
74 if mode not in list('rwa'):
75 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")
76 if mode == 'r' and not self.can_read:
77 raise NotImplementedError('No reader implemented for {} format'
78 .format(self.name))
79 if mode == 'w' and not self.can_write:
80 raise NotImplementedError('No writer implemented for {} format'
81 .format(self.name))
82 if mode == 'a' and not self.can_append:
83 raise NotImplementedError('Appending not supported by {} format'
84 .format(self.name))
86 if self.isbinary:
87 mode += 'b'
89 path = Path(fname)
90 return path.open(mode, encoding=self.encoding)
92 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
93 encoding = self.encoding
94 if encoding is None:
95 encoding = 'utf-8' # Best hacky guess.
97 if self.isbinary:
98 if isinstance(data, str):
99 data = data.encode(encoding)
100 else:
101 if isinstance(data, bytes):
102 data = data.decode(encoding)
104 return self._ioclass(data)
106 @property
107 def _ioclass(self):
108 if self.isbinary:
109 return io.BytesIO
110 else:
111 return io.StringIO
113 def parse_images(self, data: Union[str, bytes],
114 **kwargs) -> Sequence[Atoms]:
115 with self._buf_as_filelike(data) as fd:
116 outputs = self.read(fd, **kwargs)
117 if self.single:
118 assert isinstance(outputs, Atoms)
119 return [outputs]
120 else:
121 return list(self.read(fd, **kwargs))
123 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
124 images = self.parse_images(data, **kwargs)
125 return images[-1]
127 @property
128 def can_read(self) -> bool:
129 return self._readfunc() is not None
131 @property
132 def can_write(self) -> bool:
133 return self._writefunc() is not None
135 @property
136 def can_append(self) -> bool:
137 writefunc = self._writefunc()
138 return self.can_write and 'append' in writefunc.__code__.co_varnames
140 def __repr__(self) -> str:
141 tokens = [f'{name}={value!r}'
142 for name, value in vars(self).items()]
143 return 'IOFormat({})'.format(', '.join(tokens))
145 def __getitem__(self, i):
146 # For compatibility.
147 #
148 # Historically, the ioformats were listed as tuples
149 # with (description, code). We look like such a tuple.
150 return (self.description, self.code)[i]
152 @property
153 def single(self) -> bool:
154 """Whether this format is for a single Atoms object."""
155 return self.code[0] == '1'
157 @property
158 def _formatname(self) -> str:
159 return self.name.replace('-', '_')
161 def _readfunc(self):
162 return getattr(self.module, 'read_' + self._formatname, None)
164 def _writefunc(self):
165 return getattr(self.module, 'write_' + self._formatname, None)
167 @property
168 def read(self):
169 if not self.can_read:
170 self._warn_none('read')
171 return None
173 return self._read_wrapper
175 def _read_wrapper(self, *args, **kwargs):
176 function = self._readfunc()
177 if function is None:
178 self._warn_none('read')
179 return None
180 if not inspect.isgeneratorfunction(function):
181 function = functools.partial(wrap_read_function, function)
182 return function(*args, **kwargs)
184 def _warn_none(self, action):
185 msg = ('Accessing the IOFormat.{action} property on a format '
186 'without {action} support will change behaviour in the '
187 'future and return a callable instead of None. '
188 'Use IOFormat.can_{action} to check whether {action} '
189 'is supported.')
190 warnings.warn(msg.format(action=action), FutureWarning)
192 @property
193 def write(self):
194 if not self.can_write:
195 self._warn_none('write')
196 return None
198 return self._write_wrapper
200 def _write_wrapper(self, *args, **kwargs):
201 function = self._writefunc()
202 if function is None:
203 raise ValueError(f'Cannot write to {self.name}-format')
204 return function(*args, **kwargs)
206 @property
207 def modes(self) -> str:
208 modes = ''
209 if self.can_read:
210 modes += 'r'
211 if self.can_write:
212 modes += 'w'
213 return modes
215 def full_description(self) -> str:
216 lines = [f'Name: {self.name}',
217 f'Description: {self.description}',
218 f'Modes: {self.modes}',
219 f'Encoding: {self.encoding}',
220 f'Module: {self.module_name}',
221 f'Code: {self.code}',
222 f'Extensions: {self.extensions}',
223 f'Globs: {self.globs}',
224 f'Magic: {self.magic}']
225 return '\n'.join(lines)
227 @property
228 def acceptsfd(self) -> bool:
229 return self.code[1] != 'S'
231 @property
232 def isbinary(self) -> bool:
233 return self.code[1] == 'B'
235 @property
236 def module(self):
237 try:
238 return import_module(self.module_name)
239 except ImportError as err:
240 raise UnknownFileTypeError(
241 f'File format not recognized: {self.name}. Error: {err}')
243 def match_name(self, basename: str) -> bool:
244 from fnmatch import fnmatch
245 return any(fnmatch(basename, pattern)
246 for pattern in self.globs)
248 def match_magic(self, data: bytes) -> bool:
249 if self.magic_regex:
250 assert not self.magic, 'Define only one of magic and magic_regex'
251 match = re.match(self.magic_regex, data, re.M | re.S)
252 return match is not None
254 from fnmatch import fnmatchcase
255 return any(
256 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]
257 for magic in self.magic
258 )
261ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
262extension2format = {}
265all_formats = ioformats # Aliased for compatibility only. Please do not use.
266format2modulename = {} # Left for compatibility only.
269def define_io_format(name, desc, code, *, module=None, ext=None,
270 glob=None, magic=None, encoding=None,
271 magic_regex=None, external=False):
272 if module is None:
273 module = name.replace('-', '_')
274 format2modulename[name] = module
276 if not external:
277 module = 'ase.io.' + module
279 def normalize_patterns(strings):
280 if strings is None:
281 strings = []
282 elif isinstance(strings, (str, bytes)):
283 strings = [strings]
284 else:
285 strings = list(strings)
286 return strings
288 fmt = IOFormat(name, desc, code, module_name=module,
289 encoding=encoding)
290 fmt.extensions = normalize_patterns(ext)
291 fmt.globs = normalize_patterns(glob)
292 fmt.magic = normalize_patterns(magic)
294 if magic_regex is not None:
295 fmt.magic_regex = magic_regex
297 for ext in fmt.extensions:
298 if ext in extension2format:
299 raise ValueError(f'extension "{ext}" already registered')
300 extension2format[ext] = fmt
302 ioformats[name] = fmt
303 return fmt
306def get_ioformat(name: str) -> IOFormat:
307 """Return ioformat object or raise appropriate error."""
308 if name not in ioformats:
309 raise UnknownFileTypeError(name)
310 fmt = ioformats[name]
311 # Make sure module is importable, since this could also raise an error.
312 fmt.module
313 return ioformats[name]
316def register_external_io_formats(group):
317 if hasattr(entry_points(), 'select'):
318 fmt_entry_points = entry_points().select(group=group)
319 else:
320 fmt_entry_points = entry_points().get(group, ())
322 for entry_point in fmt_entry_points:
323 try:
324 define_external_io_format(entry_point)
325 except Exception as exc:
326 warnings.warn(
327 'Failed to register external '
328 f'IO format {entry_point.name}: {exc}'
329 )
332def define_external_io_format(entry_point):
334 fmt = entry_point.load()
335 if entry_point.name in ioformats:
336 raise ValueError(f'Format {entry_point.name} already defined')
337 if not isinstance(fmt, ExternalIOFormat):
338 raise TypeError('Wrong type for registering external IO formats '
339 f'in format {entry_point.name}, expected '
340 'ExternalIOFormat')
341 F(entry_point.name, **fmt._asdict(), external=True)
344# We define all the IO formats below. Each IO format has a code,
345# such as '1F', which defines some of the format's properties:
346#
347# 1=single atoms object
348# +=multiple atoms objects
349# F=accepts a file-descriptor
350# S=needs a file-name str
351# B=like F, but opens in binary mode
353F = define_io_format
354F('abinit-gsr', 'ABINIT GSR file', '1S',
355 module='abinit', glob='*o_GSR.nc')
356F('abinit-in', 'ABINIT input file', '1F',
357 module='abinit', magic=b'*znucl *')
358F('abinit-out', 'ABINIT output file', '1F',
359 module='abinit', magic=b'*.Version * of ABINIT')
360F('aims', 'FHI-aims geometry file', '1S', ext='in')
361F('aims-output', 'FHI-aims output', '+S',
362 module='aims', magic=b'*Invoking FHI-aims ...')
363F('bundletrajectory', 'ASE bundle trajectory', '+S')
364# XXX: Define plugin in ase db backends package:
365# F('aselmdb', 'ASE LMDB format', '+F')
366F('castep-castep', 'CASTEP output file', '+F',
367 module='castep', ext='castep')
368F('castep-cell', 'CASTEP geom file', '1F',
369 module='castep', ext='cell')
370F('castep-geom', 'CASTEP trajectory file', '+F',
371 module='castep', ext='geom')
372F('castep-md', 'CASTEP molecular dynamics file', '+F',
373 module='castep', ext='md')
374F('castep-phonon', 'CASTEP phonon file', '1F',
375 module='castep', ext='phonon')
376F('cfg', 'AtomEye configuration', '1F')
377F('cif', 'CIF-file', '+B', ext='cif')
378F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
379F('cjson', 'Chemical json file', '1F', ext='cjson')
380F('cp2k-dcd', 'CP2K DCD file', '+B',
381 module='cp2k', ext='dcd')
382F('cp2k-restart', 'CP2K restart file', '1F',
383 module='cp2k', ext='restart')
384F('crystal', 'Crystal fort.34 format', '1F',
385 ext=['f34', '34'], glob=['f34', '34'])
386F('cube', 'CUBE file', '1F', ext='cube')
387F('dacapo-text', 'Dacapo text output', '1F',
388 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
389F('db', 'ASE SQLite database file', '+S')
390F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
391F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
392 module='dlp4', ext='config', glob=['*CONFIG*'])
393F('dlp-history', 'DL_POLY HISTORY file', '+F',
394 module='dlp4', glob='HISTORY')
395F('dmol-arc', 'DMol3 arc file', '+S',
396 module='dmol', ext='arc')
397F('dmol-car', 'DMol3 structure file', '1S',
398 module='dmol', ext='car')
399F('dmol-incoor', 'DMol3 structure file', '1S',
400 module='dmol')
401F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
402 glob=['GEOMETRY.OUT'])
403F('elk-in', 'ELK input file', '1F', module='elk')
404F('eon', 'EON CON file', '+F',
405 ext='con')
406F('eps', 'Encapsulated Postscript', '1S')
407F('espresso-in', 'Quantum espresso in file', '1F',
408 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
409F('espresso-out', 'Quantum espresso out file', '+F',
410 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
411F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')
412F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')
413F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
414F('findsym', 'FINDSYM-format', '+F')
415F('gamess-us-out', 'GAMESS-US output file', '1F',
416 module='gamess_us', magic=b'*GAMESS')
417F('gamess-us-in', 'GAMESS-US input file', '1F',
418 module='gamess_us')
419F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
420 module='gamess_us', magic=b' $DATA', ext='dat')
421F('gaussian-in', 'Gaussian com (input) file', '1F',
422 module='gaussian', ext=['com', 'gjf'])
423F('gaussian-out', 'Gaussian output file', '+F',
424 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
425F('acemolecule-out', 'ACE output file', '1S',
426 module='acemolecule')
427F('acemolecule-input', 'ACE input file', '1S',
428 module='acemolecule')
429F('gen', 'DFTBPlus GEN format', '1F')
430F('gif', 'Graphics interchange format', '+S',
431 module='animation')
432F('gpaw-out', 'GPAW text output', '+F',
433 magic=b'* ___ ___ ___ _ _ _')
434F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
435F('gpw', 'GPAW restart-file', '1S',
436 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
437F('gromacs', 'Gromacs coordinates', '1F',
438 ext='gro')
439F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
440F('html', 'X3DOM HTML', '1F', module='x3d')
441F('json', 'ASE JSON database file', '+F', ext='json', module='db')
442F('jsv', 'JSV file format', '1F')
443F('lammps-dump-text', 'LAMMPS text dump file', '+F',
444 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
445F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
446 module='lammpsrun')
447F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
448 encoding='ascii')
449F('magres', 'MAGRES ab initio NMR data file', '1F')
450F('mol', 'MDL Molfile', '1F')
451F('mp4', 'MP4 animation', '+S',
452 module='animation')
453F('mustem', 'muSTEM xtl file', '1F',
454 ext='xtl')
455F('mysql', 'ASE MySQL database file', '+S',
456 module='db')
457F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
458 magic=b'CDF')
459F('nomad-json', 'JSON from Nomad archive', '+F',
460 ext='nomad-json')
461F('nwchem-in', 'NWChem input file', '1F',
462 module='nwchem', ext='nwi')
463F('nwchem-out', 'NWChem output file', '+F',
464 module='nwchem', ext='nwo',
465 magic=b'*Northwest Computational Chemistry Package')
466F('octopus-in', 'Octopus input file', '1F',
467 module='octopus', glob='inp')
468F('onetep-out', 'ONETEP output file', '+F',
469 module='onetep',
470 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')
471F('onetep-in', 'ONETEP input file', '1F',
472 module='onetep',
473 magic=[b'*lock species ',
474 b'*LOCK SPECIES ',
475 b'*--- INPUT FILE ---*'])
476F('orca-output', 'ORCA output', '+F',
477 module='orca', magic=b'* O R C A *')
478F('proteindatabank', 'Protein Data Bank', '+F',
479 ext='pdb')
480F('png', 'Portable Network Graphics', '1B')
481F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
482F('pov', 'Persistance of Vision', '1S')
483# prismatic: Should have ext='xyz' if/when multiple formats can have the same
484# extension
485F('prismatic', 'prismatic and computem XYZ-file', '1F')
486F('py', 'Python file', '+F')
487F('sys', 'qball sys file', '1F')
488F('qbox', 'QBOX output file', '+F',
489 magic=b'*:simulation xmlns:')
490F('res', 'SHELX format', '1S', ext='shelx')
491F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
492F('sdf', 'SDF format', '1F')
493F('siesta-xv', 'Siesta .XV file', '1F',
494 glob='*.XV', module='siesta')
495F('struct', 'WIEN2k structure file', '1S', module='wien2k')
496F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
497F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
498 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
499F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
500 magic=b'$coord')
501F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
502 module='turbomole', glob='gradient', magic=b'$grad')
503F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
504F('vasp', 'VASP POSCAR/CONTCAR', '1F',
505 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
506F('vasp-out', 'VASP OUTCAR file', '+F',
507 module='vasp', glob='*OUTCAR*')
508F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
509 module='vasp', glob='*XDATCAR*')
510F('vasp-xml', 'VASP vasprun.xml file', '+F',
511 module='vasp', glob='*vasp*.xml')
512F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
513F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
514F('wout', 'Wannier90 output', '1F', module='wannier90')
515F('x3d', 'X3D', '1S')
516F('xsd', 'Materials Studio file', '1F')
517F('xsf', 'XCrySDen Structure File', '+F',
518 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
519 b'*\nMOLECULE', b'*\nATOMS'])
520F('xtd', 'Materials Studio file', '+F')
521# xyz: No `ext='xyz'` in the definition below.
522# The .xyz files are handled by the extxyz module by default.
523F('xyz', 'XYZ-file', '+F')
525# Register IO formats exposed through the ase.ioformats entry point
526register_external_io_formats('ase.ioformats')
529def get_compression(filename: str) -> Tuple[str, Optional[str]]:
530 """
531 Parse any expected file compression from the extension of a filename.
532 Return the filename without the extension, and the extension. Recognises
533 ``.gz``, ``.bz2``, ``.xz``.
535 >>> get_compression('H2O.pdb.gz')
536 ('H2O.pdb', 'gz')
537 >>> get_compression('crystal.cif')
538 ('crystal.cif', None)
540 Parameters
541 ==========
542 filename: str
543 Full filename including extension.
545 Returns
546 =======
547 (root, extension): (str, str or None)
548 Filename split into root without extension, and the extension
549 indicating compression format. Will not split if compression
550 is not recognised.
551 """
552 # Update if anything is added
553 valid_compression = ['gz', 'bz2', 'xz']
555 # Use stdlib as it handles most edge cases
556 root, compression = os.path.splitext(filename)
558 # extension keeps the '.' so remember to remove it
559 if compression.strip('.') in valid_compression:
560 return root, compression.strip('.')
561 else:
562 return filename, None
565def open_with_compression(filename: str, mode: str = 'r') -> IO:
566 """
567 Wrapper around builtin `open` that will guess compression of a file
568 from the filename and open it for reading or writing as if it were
569 a standard file.
571 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
573 Supported modes are:
574 * 'r', 'rt', 'w', 'wt' for text mode read and write.
575 * 'rb, 'wb' for binary read and write.
577 Parameters
578 ==========
579 filename: str
580 Path to the file to open, including any extensions that indicate
581 the compression used.
582 mode: str
583 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
585 Returns
586 =======
587 fd: file
588 File-like object open with the specified mode.
589 """
591 # Compressed formats sometimes default to binary, so force text mode.
592 if mode == 'r':
593 mode = 'rt'
594 elif mode == 'w':
595 mode = 'wt'
596 elif mode == 'a':
597 mode = 'at'
599 _root, compression = get_compression(filename)
601 if compression == 'gz':
602 import gzip
603 return gzip.open(filename, mode=mode) # type: ignore[return-value]
604 elif compression == 'bz2':
605 import bz2
606 return bz2.open(filename, mode=mode)
607 elif compression == 'xz':
608 import lzma
609 return lzma.open(filename, mode)
610 else:
611 # Either None or unknown string
612 return open(filename, mode)
615def is_compressed(fd: io.BufferedIOBase) -> bool:
616 """Check if the file object is in a compressed format."""
617 compressed = False
619 # We'd like to avoid triggering imports unless already imported.
620 # Also, Python can be compiled without e.g. lzma so we need to
621 # protect against that:
622 if 'gzip' in sys.modules:
623 import gzip
624 compressed = compressed or isinstance(fd, gzip.GzipFile)
625 if 'bz2' in sys.modules:
626 import bz2
627 compressed = compressed or isinstance(fd, bz2.BZ2File)
628 if 'lzma' in sys.modules:
629 import lzma
630 compressed = compressed or isinstance(fd, lzma.LZMAFile)
631 return compressed
634def wrap_read_function(read, filename, index=None, **kwargs):
635 """Convert read-function to generator."""
636 if index is None:
637 yield read(filename, **kwargs)
638 else:
639 yield from read(filename, index, **kwargs)
642NameOrFile = Union[str, PurePath, IO]
645def write(
646 filename: NameOrFile,
647 images: Union[Atoms, Sequence[Atoms]],
648 format: str = None,
649 parallel: bool = True,
650 append: bool = False,
651 **kwargs: Any
652) -> None:
653 """Write Atoms object(s) to file.
655 filename: str or file
656 Name of the file to write to or a file descriptor. The name '-'
657 means standard output.
658 images: Atoms object or list of Atoms objects
659 A single Atoms object or a list of Atoms objects.
660 format: str
661 Used to specify the file-format. If not given, the
662 file-format will be taken from suffix of the filename.
663 parallel: bool
664 Default is to write on master only. Use parallel=False to write
665 from all slaves.
666 append: bool
667 Default is to open files in 'w' or 'wb' mode, overwriting
668 existing files. In some cases opening the file in 'a' or 'ab'
669 mode (appending) is useful,
670 e.g. writing trajectories or saving multiple Atoms objects in one file.
671 WARNING: If the file format does not support multiple entries without
672 additional keywords/headers, files created using 'append=True'
673 might not be readable by any program! They will nevertheless be
674 written without error message.
676 The use of additional keywords is format specific. write() may
677 return an object after writing certain formats, but this behaviour
678 may change in the future.
680 """
682 if isinstance(filename, PurePath):
683 filename = str(filename)
685 if isinstance(filename, str):
686 fd = None
687 if filename == '-':
688 fd = sys.stdout
689 filename = None # type: ignore[assignment]
690 elif format is None:
691 format = filetype(filename, read=False)
692 assert isinstance(format, str)
693 else:
694 fd = filename # type: ignore[assignment]
695 if format is None:
696 try:
697 format = filetype(filename, read=False)
698 assert isinstance(format, str)
699 except UnknownFileTypeError:
700 format = None
701 filename = None # type: ignore[assignment]
703 format = format or 'json' # default is json
705 io = get_ioformat(format)
707 return _write(filename, fd, format, io, images,
708 parallel=parallel, append=append, **kwargs)
711@parallel_function
712def _write(filename, fd, format, io, images, parallel=None, append=False,
713 **kwargs):
714 if isinstance(images, Atoms):
715 images = [images]
717 if io.single:
718 if len(images) > 1:
719 raise ValueError('{}-format can only store 1 Atoms object.'
720 .format(format))
721 images = images[0]
723 if not io.can_write:
724 raise ValueError(f"Can't write to {format}-format")
726 # Special case for json-format:
727 if format == 'json' and (len(images) > 1 or append):
728 if filename is not None:
729 return io.write(filename, images, append=append, **kwargs)
730 raise ValueError("Can't write more than one image to file-descriptor "
731 'using json-format.')
733 if io.acceptsfd:
734 open_new = (fd is None)
735 try:
736 if open_new:
737 mode = 'wb' if io.isbinary else 'w'
738 if append:
739 mode = mode.replace('w', 'a')
740 fd = open_with_compression(filename, mode)
741 # XXX remember to re-enable compressed open
742 # fd = io.open(filename, mode)
743 return io.write(fd, images, **kwargs)
744 finally:
745 if open_new and fd is not None:
746 fd.close()
747 else:
748 if fd is not None:
749 raise ValueError("Can't write {}-format to file-descriptor"
750 .format(format))
751 if io.can_append:
752 return io.write(filename, images, append=append, **kwargs)
753 elif append:
754 raise ValueError("Cannot append to {}-format, write-function "
755 "does not support the append keyword."
756 .format(format))
757 else:
758 return io.write(filename, images, **kwargs)
761def read(
762 filename: NameOrFile,
763 index: Any = None,
764 format: Optional[str] = None,
765 parallel: bool = True,
766 do_not_split_by_at_sign: bool = False,
767 **kwargs
768) -> Union[Atoms, List[Atoms]]:
769 """Read Atoms object(s) from file.
771 filename: str or file
772 Name of the file to read from or a file descriptor.
773 index: int, slice or str
774 The last configuration will be returned by default. Examples:
776 * ``index=0``: first configuration
777 * ``index=-2``: second to last
778 * ``index=':'`` or ``index=slice(None)``: all
779 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
780 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
781 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
782 format: str
783 Used to specify the file-format. If not given, the
784 file-format will be guessed by the *filetype* function.
785 parallel: bool
786 Default is to read on master and broadcast to slaves. Use
787 parallel=False to read on all slaves.
788 do_not_split_by_at_sign: bool
789 If False (default) ``filename`` is splitted by at sign ``@``
791 Many formats allow on open file-like object to be passed instead
792 of ``filename``. In this case the format cannot be auto-detected,
793 so the ``format`` argument should be explicitly given."""
795 if isinstance(filename, PurePath):
796 filename = str(filename)
797 if filename == '-':
798 filename = sys.stdin
799 if isinstance(index, str):
800 try:
801 index = string2index(index)
802 except ValueError:
803 pass
805 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
806 if index is None:
807 index = -1
808 format = format or filetype(filename, read=isinstance(filename, str))
810 io = get_ioformat(format)
811 if isinstance(index, (slice, str)):
812 return list(_iread(filename, index, format, io, parallel=parallel,
813 **kwargs))
814 else:
815 return next(_iread(filename, slice(index, None), format, io,
816 parallel=parallel, **kwargs))
819def iread(
820 filename: NameOrFile,
821 index: Any = None,
822 format: str = None,
823 parallel: bool = True,
824 do_not_split_by_at_sign: bool = False,
825 **kwargs
826) -> Iterator[Atoms]:
827 """Iterator for reading Atoms objects from file.
829 Works as the `read` function, but yields one Atoms object at a time
830 instead of all at once."""
832 if isinstance(filename, PurePath):
833 filename = str(filename)
835 if isinstance(index, str):
836 index = string2index(index)
838 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
840 if index is None or index == ':':
841 index = slice(None, None, None)
843 if not isinstance(index, (slice, str)):
844 index = slice(index, (index + 1) or None)
846 format = format or filetype(filename, read=isinstance(filename, str))
847 io = get_ioformat(format)
849 yield from _iread(filename, index, format, io, parallel=parallel,
850 **kwargs)
853@parallel_generator
854def _iread(filename, index, format, io, parallel=None, full_output=False,
855 **kwargs):
857 if not io.can_read:
858 raise ValueError(f"Can't read from {format}-format")
860 if io.single:
861 start = index.start
862 assert start is None or start == 0 or start == -1
863 args = ()
864 else:
865 args = (index,)
867 must_close_fd = False
868 if isinstance(filename, str):
869 if io.acceptsfd:
870 mode = 'rb' if io.isbinary else 'r'
871 fd = open_with_compression(filename, mode)
872 must_close_fd = True
873 else:
874 fd = filename
875 else:
876 assert io.acceptsfd
877 fd = filename
879 # Make sure fd is closed in case loop doesn't finish:
880 try:
881 for dct in io.read(fd, *args, **kwargs):
882 if not isinstance(dct, dict):
883 dct = {'atoms': dct}
884 if full_output:
885 yield dct
886 else:
887 yield dct['atoms']
888 finally:
889 if must_close_fd:
890 fd.close()
893def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
894 if not isinstance(filename, str):
895 return filename, index
897 basename = os.path.basename(filename)
898 if do_not_split_by_at_sign or '@' not in basename:
899 return filename, index
901 newindex = None
902 newfilename, newindex = filename.rsplit('@', 1)
904 if isinstance(index, slice):
905 return newfilename, index
906 try:
907 newindex = string2index(newindex)
908 except ValueError:
909 warnings.warn('Can not parse index for path \n'
910 ' "%s" \nConsider set '
911 'do_not_split_by_at_sign=True \nif '
912 'there is no index.' % filename)
913 return newfilename, newindex
916def match_magic(data: bytes) -> IOFormat:
917 data = data[:PEEK_BYTES]
918 for ioformat in ioformats.values():
919 if ioformat.match_magic(data):
920 return ioformat
921 raise UnknownFileTypeError('Cannot guess file type from contents')
924def filetype(
925 filename: NameOrFile,
926 read: bool = True,
927 guess: bool = True,
928) -> str:
929 """Try to guess the type of the file.
931 First, special signatures in the filename will be checked for. If that
932 does not identify the file type, then the first 2000 bytes of the file
933 will be read and analysed. Turn off this second part by using
934 read=False.
936 Can be used from the command-line also::
938 $ ase info filename ...
939 """
941 orig_filename = filename
942 if hasattr(filename, 'name'):
943 filename = filename.name
945 ext = None
946 if isinstance(filename, str):
947 if os.path.isdir(filename):
948 if os.path.basename(os.path.normpath(filename)) == 'states':
949 return 'eon'
950 return 'bundletrajectory'
952 if filename.startswith('postgres'):
953 return 'postgresql'
955 if filename.startswith('mysql') or filename.startswith('mariadb'):
956 return 'mysql'
958 if filename.endswith('aselmdb'):
959 return 'db'
961 # strip any compression extensions that can be read
962 root, _compression = get_compression(filename)
963 basename = os.path.basename(root)
965 if '.' in basename:
966 ext = os.path.splitext(basename)[1].strip('.').lower()
968 for fmt in ioformats.values():
969 if fmt.match_name(basename):
970 return fmt.name
972 if not read:
973 if ext is None:
974 raise UnknownFileTypeError('Could not guess file type')
975 ioformat = extension2format.get(ext)
976 if ioformat:
977 return ioformat.name
979 # askhl: This is strange, we don't know if ext is a format:
980 return ext
982 if orig_filename == filename:
983 fd = open_with_compression(filename, 'rb')
984 else:
985 fd = orig_filename # type: ignore[assignment]
986 else:
987 fd = filename
988 if fd is sys.stdin:
989 return 'json'
991 data = fd.read(PEEK_BYTES)
992 if fd is not filename:
993 fd.close()
994 else:
995 fd.seek(0)
997 if len(data) == 0:
998 raise UnknownFileTypeError('Empty file: ' + filename)
1000 try:
1001 return match_magic(data).name
1002 except UnknownFileTypeError:
1003 pass
1005 format = None
1006 if ext in extension2format:
1007 format = extension2format[ext].name
1009 if format is None and guess:
1010 format = ext
1011 if format is None:
1012 # Do quick xyz check:
1013 lines = data.splitlines()
1014 if lines and lines[0].strip().isdigit():
1015 return extension2format['xyz'].name
1017 raise UnknownFileTypeError('Could not guess file type')
1018 assert isinstance(format, str)
1019 return format
1022def index2range(index, length):
1023 """Convert slice or integer to range.
1025 If index is an integer, range will contain only that integer."""
1026 obj = range(length)[index]
1027 if isinstance(obj, numbers.Integral):
1028 obj = range(obj, obj + 1)
1029 return obj