Coverage for /builds/ericyuan00000/ase/ase/utils/linesearcharmijo.py: 61.39%

158 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-06-18 01:20 +0000

1# fmt: off 

2 

3# flake8: noqa 

4import logging 

5import math 

6 

7import numpy as np 

8 

9# CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 

10try: 

11 import scipy 

12 import scipy.linalg 

13 have_scipy = True 

14except ImportError: 

15 have_scipy = False 

16# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 

17 

18from ase.utils import longsum 

19 

20logger = logging.getLogger(__name__) 

21 

22# CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 

23 

24 

25class LinearPath: 

26 """Describes a linear search path of the form t -> t g 

27 """ 

28 

29 def __init__(self, dirn): 

30 """Initialise LinearPath object 

31 

32 Args: 

33 dirn : search direction 

34 """ 

35 self.dirn = dirn 

36 

37 def step(self, alpha): 

38 return alpha * self.dirn 

39 

40 

41def nullspace(A, myeps=1e-10): 

42 """The RumPath class needs the ability to compute the null-space of 

43 a small matrix. This is provided here. But we now also need scipy! 

44 

45 This routine was copy-pasted from 

46 http://stackoverflow.com/questions/5889142/python-numpy-scipy-finding-the-null-space-of-a-matrix 

47 How the h*** does numpy/scipy not have a null-space implemented? 

48 """ 

49 u, s, vh = scipy.linalg.svd(A) 

50 padding = max(0, np.shape(A)[1] - np.shape(s)[0]) 

51 null_mask = np.concatenate(((s <= myeps), 

52 np.ones((padding,), dtype=bool)), 

53 axis=0) 

54 null_space = scipy.compress(null_mask, vh, axis=0) 

55 return scipy.transpose(null_space) 

56 

57 

58class RumPath: 

59 """Describes a curved search path, taking into account information 

60 about (near-) rigid unit motions (RUMs). 

61 

62 One can tag sub-molecules of the system, which are collections of 

63 particles that form a (near-)rigid unit. Let x1, ... xn be the positions 

64 of one such molecule, then we construct a path of the form 

65 xi(t) = xi(0) + (exp(K t) - I) yi + t wi + t c 

66 where yi = xi - <x>, c = <g> is a rigid translation, K is anti-symmetric 

67 so that exp(tK) yi denotes a rotation about the centre of mass, and wi 

68 is the remainind stretch of the molecule. 

69 

70 The following variables are stored: 

71 * rotation_factors : array of acceleration factors 

72 * rigid_units : array of molecule indices 

73 * stretch : w 

74 * K : list of K matrices 

75 * y : list of y-vectors 

76 """ 

77 

78 def __init__(self, x_start, dirn, rigid_units, rotation_factors): 

79 """Initialise a `RumPath` 

80 

81 Args: 

82 x_start : vector containing the positions in d x nAt shape 

83 dirn : search direction, same shape as x_start vector 

84 rigid_units : array of arrays of molecule indices 

85 rotation_factors : factor by which the rotation of each molecular 

86 is accelerated; array of scalars, same length as 

87 rigid_units 

88 """ 

89 

90 if not have_scipy: 

91 raise RuntimeError( 

92 "RumPath depends on scipy, which could not be imported") 

93 

94 # keep some stuff stored 

95 self.rotation_factors = rotation_factors 

96 self.rigid_units = rigid_units 

97 # create storage for more stuff 

98 self.K = [] 

99 self.y = [] 

100 # We need to reshape x_start and dirn since we want to apply 

101 # rotations to individual position vectors! 

102 # we will eventually store the stretch in w, X is just a reference 

103 # to x_start with different shape 

104 w = dirn.copy().reshape([3, len(dirn) / 3]) 

105 X = x_start.reshape([3, len(dirn) / 3]) 

106 

107 for I in rigid_units: # I is a list of indices for one molecule 

108 # get the positions of the i-th molecule, subtract mean 

109 x = X[:, I] 

110 y = x - x.mean(0).T # PBC? 

111 # same for forces >>> translation component 

112 g = w[:, I] 

113 f = g - g.mean(0).T 

114 # compute the system to solve for K (see accompanying note!) 

115 # A = \sum_j Yj Yj' 

116 # b = \sum_j Yj' fj 

117 A = np.zeros((3, 3)) 

118 b = np.zeros(3) 

119 for j in range(len(I)): 

120 Yj = np.array([[y[1, j], 0.0, -y[2, j]], 

121 [-y[0, j], y[2, j], 0.0], 

122 [0.0, -y[1, j], y[0, j]]]) 

123 A += np.dot(Yj.T, Yj) 

124 b += np.dot(Yj.T, f[:, j]) 

125 # If the directions y[:,j] span all of R^3 (canonically this is true 

126 # when there are at least three atoms in the molecule) but if 

127 # not, then A is singular so we cannot solve A k = b. In this case 

128 # we solve Ak = b in the space orthogonal to the null-space of A. 

129 # TODO: 

130 # this can get unstable if A is "near-singular"! We may 

131 # need to revisit this idea at some point to get something 

132 # more robust 

133 N = nullspace(A) 

134 b -= np.dot(np.dot(N, N.T), b) 

135 A += np.dot(N, N.T) 

136 k = scipy.linalg.solve(A, b, sym_pos=True) 

137 K = np.array([[0.0, k[0], -k[2]], 

138 [-k[0], 0.0, k[1]], 

139 [k[2], -k[1], 0.0]]) 

140 # now remove the rotational component from the search direction 

141 # ( we actually keep the translational component as part of w, 

142 # but this could be changed as well! ) 

143 w[:, I] -= np.dot(K, y) 

144 # store K and y 

145 self.K.append(K) 

146 self.y.append(y) 

147 

148 # store the stretch (no need to copy here, since w is already a copy) 

149 self.stretch = w 

150 

151 def step(self, alpha): 

152 """perform a step in the line-search, given a step-length alpha 

153 

154 Args: 

155 alpha : step-length 

156 

157 Returns: 

158 s : update for positions 

159 """ 

160 # translation and stretch 

161 s = alpha * self.stretch 

162 # loop through rigid_units 

163 for (I, K, y, rf) in zip(self.rigid_units, self.K, self.y, 

164 self.rotation_factors): 

165 # with matrix exponentials: 

166 # s[:, I] += expm(K * alpha * rf) * p.y - p.y 

167 # third-order taylor approximation: 

168 # I + t K + 1/2 t^2 K^2 + 1/6 t^3 K^3 - I 

169 # = t K (I + 1/2 t K (I + 1/3 t K)) 

170 aK = alpha * rf * K 

171 s[:, I] += np.dot(aK, y + 0.5 * np.dot(aK, 

172 y + 1 / 3. * np.dot(aK, y))) 

173 

174 return s.ravel() 

175# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 

176 

177 

178class LineSearchArmijo: 

179 

180 def __init__(self, func, c1=0.1, tol=1e-14): 

181 """Initialise the linesearch with set parameters and functions. 

182 

183 Args: 

184 func: the function we are trying to minimise (energy), which should 

185 take an array of positions for its argument 

186 c1: parameter for the sufficient decrease condition in (0.0 0.5) 

187 tol: tolerance for evaluating equality 

188 

189 """ 

190 

191 self.tol = tol 

192 self.func = func 

193 

194 if not (0 < c1 < 0.5): 

195 logger.error("c1 outside of allowed interval (0, 0.5). Replacing with " 

196 "default value.") 

197 print("Warning: C1 outside of allowed interval. Replacing with " 

198 "default value.") 

199 c1 = 0.1 

200 

201 self.c1 = c1 

202 

203 # CO : added rigid_units and rotation_factors 

204 

205 def run(self, x_start, dirn, a_max=None, a_min=None, a1=None, 

206 func_start=None, func_old=None, func_prime_start=None, 

207 rigid_units=None, rotation_factors=None, maxstep=None): 

208 """Perform a backtracking / quadratic-interpolation linesearch 

209 to find an appropriate step length with Armijo condition. 

210 NOTE THIS LINESEARCH DOES NOT IMPOSE WOLFE CONDITIONS! 

211 

212 The idea is to do backtracking via quadratic interpolation, stabilised 

213 by putting a lower bound on the decrease at each linesearch step. 

214 To ensure BFGS-behaviour, whenever "reasonable" we take 1.0 as the 

215 starting step. 

216 

217 Since Armijo does not guarantee convergence of BFGS, the outer 

218 BFGS algorithm must restart when the current search direction 

219 ceases to be a descent direction. 

220 

221 Args: 

222 x_start: vector containing the position to begin the linesearch 

223 from (ie the current location of the optimisation) 

224 dirn: vector pointing in the direction to search in (pk in [NW]). 

225 Note that this does not have to be a unit vector, but the 

226 function will return a value scaled with respect to dirn. 

227 a_max: an upper bound on the maximum step length allowed. Default is 2.0. 

228 a_min: a lower bound on the minimum step length allowed. Default is 1e-10. 

229 A RuntimeError is raised if this bound is violated 

230 during the line search. 

231 a1: the initial guess for an acceptable step length. If no value is 

232 given, this will be set automatically, using quadratic 

233 interpolation using func_old, or "rounded" to 1.0 if the 

234 initial guess lies near 1.0. (specifically for LBFGS) 

235 func_start: the value of func at the start of the linesearch, ie 

236 phi(0). Passing this information avoids potentially expensive 

237 re-calculations 

238 func_prime_start: the value of func_prime at the start of the 

239 linesearch (this will be dotted with dirn to find phi_prime(0)) 

240 func_old: the value of func_start at the previous step taken in 

241 the optimisation (this will be used to calculate the initial 

242 guess for the step length if it is not provided) 

243 rigid_units, rotationfactors : see documentation of RumPath, if it is 

244 unclear what these parameters are, then leave them at None 

245 maxstep: maximum allowed displacement in Angstrom. Default is 0.2. 

246 

247 Returns: 

248 A tuple: (step, func_val, no_update) 

249 

250 step: the final chosen step length, representing the number of 

251 multiples of the direction vector to move 

252 func_val: the value of func after taking this step, ie phi(step) 

253 no_update: true if the linesearch has not performed any updates of 

254 phi or alpha, due to errors or immediate convergence 

255 

256 Raises: 

257 ValueError for problems with arguments 

258 RuntimeError for problems encountered during iteration 

259 """ 

260 

261 a1 = self.handle_args(x_start, dirn, a_max, a_min, a1, func_start, 

262 func_old, func_prime_start, maxstep) 

263 

264 # DEBUG 

265 logger.debug("a1(auto) = %e", a1) 

266 

267 if abs(a1 - 1.0) <= 0.5: 

268 a1 = 1.0 

269 

270 logger.debug("-----------NEW LINESEARCH STARTED---------") 

271 

272 a_final = None 

273 phi_a_final = None 

274 num_iter = 0 

275 

276 # CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 

277 # create a search-path 

278 if rigid_units is None: 

279 # standard linear search-path 

280 logger.debug("-----using LinearPath-----") 

281 path = LinearPath(dirn) 

282 else: 

283 logger.debug("-----using RumPath------") 

284 # if rigid_units != None, but rotation_factors == None, then 

285 # raise an error. 

286 if rotation_factors == None: 

287 raise RuntimeError( 

288 'RumPath cannot be created since rotation_factors == None') 

289 path = RumPath(x_start, dirn, rigid_units, rotation_factors) 

290 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 

291 

292 while (True): 

293 

294 logger.debug("-----------NEW ITERATION OF LINESEARCH----------") 

295 logger.debug("Number of linesearch iterations: %d", num_iter) 

296 logger.debug("a1 = %e", a1) 

297 

298 # CO replaced: func_a1 = self.func(x_start + a1 * self.dirn) 

299 func_a1 = self.func(x_start + path.step(a1)) 

300 phi_a1 = func_a1 

301 # compute sufficient decrease (Armijo) condition 

302 suff_dec = (phi_a1 <= self.func_start + 

303 self.c1 * a1 * self.phi_prime_start) 

304 

305 # DEBUG 

306 # print("c1*a1*phi_prime_start = ", self.c1*a1*self.phi_prime_start, 

307 # " | phi_a1 - phi_0 = ", phi_a1 - self.func_start) 

308 logger.info("a1 = %.3f, suff_dec = %r", a1, suff_dec) 

309 if a1 < self.a_min: 

310 raise RuntimeError('a1 < a_min, giving up') 

311 if self.phi_prime_start > 0.0: 

312 raise RuntimeError("self.phi_prime_start > 0.0") 

313 

314 # check sufficient decrease (Armijo condition) 

315 if suff_dec: 

316 a_final = a1 

317 phi_a_final = phi_a1 

318 logger.debug("Linesearch returned a = %e, phi_a = %e", 

319 a_final, phi_a_final) 

320 logger.debug("-----------LINESEARCH COMPLETE-----------") 

321 return a_final, phi_a_final, num_iter == 0 

322 

323 # we don't have sufficient decrease, so we need to compute a 

324 # new trial step-length 

325 at = - ((self.phi_prime_start * a1) / 

326 (2 * ((phi_a1 - self.func_start) / a1 - self.phi_prime_start))) 

327 logger.debug("quadratic_min: initial at = %e", at) 

328 

329 # because a1 does not satisfy Armijo it follows that at must 

330 # lie between 0 and a1. In fact, more strongly, 

331 # at \leq (2 (1-c1))^{-1} a1, which is a back-tracking condition 

332 # therefore, we should now only check that at has not become too small, 

333 # in which case it is likely that nonlinearity has played a big role 

334 # here, so we take an ultra-conservative backtracking step 

335 a1 = max(at, a1 / 10.0) 

336 if a1 > at: 

337 logger.debug( 

338 "at (%e) < a1/10: revert to backtracking a1/10", at) 

339 

340 # (end of while(True) line-search loop) 

341 # (end of run()) 

342 

343 def handle_args(self, x_start, dirn, a_max, a_min, a1, func_start, func_old, 

344 func_prime_start, maxstep): 

345 """Verify passed parameters and set appropriate attributes accordingly. 

346 

347 A suitable value for the initial step-length guess will be either 

348 verified or calculated, stored in the attribute self.a_start, and 

349 returned. 

350 

351 Args: 

352 The args should be identical to those of self.run(). 

353 

354 Returns: 

355 The suitable initial step-length guess a_start 

356 

357 Raises: 

358 ValueError for problems with arguments 

359 

360 """ 

361 

362 self.a_max = a_max 

363 self.a_min = a_min 

364 self.x_start = x_start 

365 self.dirn = dirn 

366 self.func_old = func_old 

367 self.func_start = func_start 

368 self.func_prime_start = func_prime_start 

369 

370 if a_max is None: 

371 a_max = 2.0 

372 

373 if a_max < self.tol: 

374 logger.warning("a_max too small relative to tol. Reverting to " 

375 "default value a_max = 2.0 (twice the <ideal> step).") 

376 a_max = 2.0 # THIS ASSUMES NEWTON/BFGS TYPE BEHAVIOUR! 

377 

378 if self.a_min is None: 

379 self.a_min = 1e-10 

380 

381 if func_start is None: 

382 logger.debug("Setting func_start") 

383 self.func_start = self.func(x_start) 

384 

385 self.phi_prime_start = longsum(self.func_prime_start * self.dirn) 

386 if self.phi_prime_start >= 0: 

387 logger.error( 

388 "Passed direction which is not downhill. Aborting...: %e", 

389 self.phi_prime_start 

390 ) 

391 raise ValueError("Direction is not downhill.") 

392 elif math.isinf(self.phi_prime_start): 

393 logger.error("Passed func_prime_start and dirn which are too big. " 

394 "Aborting...") 

395 raise ValueError("func_prime_start and dirn are too big.") 

396 

397 if a1 is None: 

398 if func_old is not None: 

399 # Interpolating a quadratic to func and func_old - see NW 

400 # equation 3.60 

401 a1 = 2 * (self.func_start - self.func_old) / \ 

402 self.phi_prime_start 

403 logger.debug("Interpolated quadratic, obtained a1 = %e", a1) 

404 if a1 is None or a1 > a_max: 

405 logger.debug("a1 greater than a_max. Reverting to default value " 

406 "a1 = 1.0") 

407 a1 = 1.0 

408 if a1 is None or a1 < self.tol: 

409 logger.debug("a1 is None or a1 < self.tol. Reverting to default value " 

410 "a1 = 1.0") 

411 a1 = 1.0 

412 if a1 is None or a1 < self.a_min: 

413 logger.debug("a1 is None or a1 < a_min. Reverting to default value " 

414 "a1 = 1.0") 

415 a1 = 1.0 

416 

417 if maxstep is None: 

418 maxstep = 0.2 

419 logger.debug("maxstep = %e", maxstep) 

420 

421 r = np.reshape(dirn, (-1, 3)) 

422 steplengths = ((a1 * r)**2).sum(1)**0.5 

423 maxsteplength = np.max(steplengths) 

424 if maxsteplength >= maxstep: 

425 a1 *= maxstep / maxsteplength 

426 logger.debug("Rescaled a1 to fulfill maxstep criterion") 

427 

428 self.a_start = a1 

429 

430 logger.debug("phi_start = %e, phi_prime_start = %e", self.func_start, 

431 self.phi_prime_start) 

432 logger.debug("func_start = %s, self.func_old = %s", self.func_start, 

433 self.func_old) 

434 logger.debug("a1 = %e, a_max = %e, a_min = %e", a1, a_max, self.a_min) 

435 

436 return a1