Skip to content

Commit

Permalink
udf timing
Browse files Browse the repository at this point in the history
  • Loading branch information
ajz34 committed May 22, 2021
1 parent 7943e50 commit 347c93e
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 5 deletions.
6 changes: 4 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
## 1. 服务器部署

- [ ] 1-1 需要再验证一下大体系、小内存情况下的数值导数
- [ ] 1-2 需要检查内存是否控制过于严格或产生溢出 (特别是开壳层)
- [x] 1-2 需要检查内存是否控制过于严格或产生溢出 (特别是开壳层) **结论:似乎没有严重溢出,但有些过剩。以后还需要微调。**
- [ ] 1-3 **未完全解决** 确定 cProfile 的工作流程,并确定各函数调用时间与打印方式
- [x] 1-4 确定较为自动的脚本,使得对于同一输入卡,服务器与本地可以轻松地分别执行大分子与小分子计算
- [ ] 1-5 确定是否能写队列脚本,是否可以用 Gaussian 输入卡作为 CLI
Expand Down Expand Up @@ -88,4 +88,6 @@
- [x] 8-1 `get_gradient_jk`:并行效率 37/40,无需更改
- [ ] 8-2 `get_gradient_gga`:并行效率存在问题,但似乎是内存 bandwidth 控制,难以修改代码
- [ ] 8-3 `get_cderi_mo``get_cpks_eri`:这些涉及到是否允许 async 读写盘;但目前似乎无法判断程序效。
甚至感到使用 async 之后程序效率更低;可能需要询问专家了。
甚至感到使用 async 之后程序效率更低;可能需要询问专家了。
- [ ] 8-4 `Ax0_Core_HF``Ax0_cpks_HF` 的效率在小体系体现不出问题,但大体系需要关心。
还是解决不了异步的问题。
1 change: 1 addition & 0 deletions pyscf/dh/dhutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def gen_shl_batch(mol, blksize, start_id=0, stop_id=None):

def calc_batch_size(unit_flop, mem_avail, pre_flop=0):
# mem_avail: in MB
print("DEBUG: mem_avail", mem_avail)
max_memory = 0.8 * mem_avail - pre_flop * 8 / 1024 ** 2
batch_size = int(max(max_memory // (unit_flop * 8 / 1024 ** 2), 1))
return batch_size
Expand Down
8 changes: 7 additions & 1 deletion pyscf/dh/grad/udfdh.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from dh import UDFDH
from dh.dhutil import calc_batch_size, gen_batch, gen_shl_batch, tot_size
from dh.dhutil import calc_batch_size, gen_batch, gen_shl_batch, tot_size, timing
from dh.grad.rdfdh import contract_multiple_rho, get_H_1_ao, get_S_1_ao, generator_L_1
import dh.grad.rdfdh
from pyscf import gto, lib, df
Expand All @@ -14,6 +14,7 @@
αα, αβ, ββ = 0, 1, 2


@timing
def get_gradient_jk(dfobj: df.DF, C, D, D_r, Y_mo, cx, cx_n, max_memory=2000):
mol, aux = dfobj.mol, dfobj.auxmol
natm, nao, nmo, nocc = mol.natm, mol.nao, C.shape[-1], mol.nelec
Expand Down Expand Up @@ -105,12 +106,14 @@ def __init__(self, mol: gto.Mole, skip_construct=False, *args, **kwargs):
self.grad_tot = NotImplemented
self.de = NotImplemented

@timing
def prepare_H_1(self):
H_1_ao = get_H_1_ao(self.mol)
H_1_mo = einsum("sup, Auv, svq -> sApq", self.C, H_1_ao, self.C)
self.tensors.create("H_1_ao", H_1_ao)
self.tensors.create("H_1_mo", H_1_mo)

@timing
def prepare_S_1(self):
S_1_ao = get_S_1_ao(self.mol)
S_1_mo = einsum("sup, Auv, svq -> sApq", self.C, S_1_ao, self.C)
Expand All @@ -124,6 +127,7 @@ def prepare_gradient_jk(self):
cx_n = self.cx_n if self.xc_n else self.cx
self.grad_jk = get_gradient_jk(self.df_jk, self.C, self.D, D_r, Y_mo, self.cx, cx_n, self.get_memory())

@timing
def prepare_gradient_gga(self):
tensors = self.tensors
if "rho" not in tensors:
Expand Down Expand Up @@ -153,6 +157,7 @@ def prepare_gradient_gga(self):
self.grad_gga = grad_contrib
return self

@timing
def prepare_gradient_pt2(self):
tensors = self.tensors
C, D, e = self.C, self.D, self.e
Expand Down Expand Up @@ -210,6 +215,7 @@ def prepare_gradient_pt2(self):
grad_corr[A] += einsum("Pia, tPia -> t", G_ia_ri[σ], Y_1_ia_ri[σ])
self.grad_pt2 = grad_corr

@timing
def prepare_gradient_enfunc(self):
tensors = self.tensors
natm = self.mol.natm
Expand Down
2 changes: 1 addition & 1 deletion pyscf/dh/rdfdh.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def load(r0, r1, pre):
with lib.call_in_background(load) as bload:
load(0-nbatch, 0, pre_load)
for sA in gen_batch(nocc, nmo, nbatch):
print(sA)
nA = sA.stop - sA.start
sAvir = slice(sA.start - nocc, sA.stop - nocc)
buf_load, pre_load = pre_load, buf_load
Expand Down Expand Up @@ -410,6 +409,7 @@ def get_memory(self): # leave at least 500MB space anyway
return max(self.max_memory - lib.current_memory()[0], 500)

def calc_batch_size(self, unit_flop, pre_flop=0, fixed_mem=None):
print("DEBUG: self.get_memory", self.get_memory())
if self._fixed_batch:
return self._fixed_batch
if fixed_mem:
Expand Down
11 changes: 10 additions & 1 deletion pyscf/dh/udfdh.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def energy_elec(mf: UDFDH, params=None, **kwargs):
# region first derivative related


@timing
def get_eri_cpks(Y_mo_jk, nocc, cx, eri_cpks=None, max_memory=2000):
naux, nmo, _ = Y_mo_jk[0].shape
nvir = nmo - nocc[α], nmo - nocc[β]
Expand Down Expand Up @@ -141,6 +142,7 @@ def Ax0_cpks_HF(eri_cpks, max_memory=2000):
nocc = eri_cpks[αα].shape[1], eri_cpks[ββ].shape[1]
mvir, mocc = max(nvir), max(nocc)

@timing
def Ax0_cpks_HF_inner(X):
prop_shape = X[0].shape[:-2]
X = [X[σ].reshape(-1, X[σ].shape[-2], X[σ].shape[-1]) for σ in (α, β)]
Expand All @@ -165,6 +167,7 @@ def Ax0_Core_HF(si, sa, sj, sb, cx, Y_mo_jk, max_memory=2000):
ni = [si[σ].stop - si[σ].start for σ in (α, β)]
na = [sa[σ].stop - sa[σ].start for σ in (α, β)]

@timing
def Ax0_Core_HF_inner(X):
prop_shape = X[0].shape[:-2]
X = [X[σ].reshape(-1, X[σ].shape[-2], X[σ].shape[-1]) for σ in (α, β)]
Expand All @@ -189,6 +192,7 @@ def Ax0_Core_KS(si, sa, sj, sb, mo_coeff, xc_setting, xc_kernel):
ni, mol, grids, xc, dm = xc_setting
rho, vxc, fxc = xc_kernel

@timing
def Ax0_Core_KS_inner(X):
prop_shape = X[0].shape[:-2]
X = [X[σ].reshape(-1, X[σ].shape[-2], X[σ].shape[-1]) for σ in (α, β)]
Expand Down Expand Up @@ -220,6 +224,7 @@ def __init__(self,
self.mvir = NotImplemented
self.mocc = max(max(self.nocc), 1)

@timing
def run_scf(self):
self.mf_s.grids = self.mf_n.grids = self.grids
self.build()
Expand Down Expand Up @@ -285,6 +290,7 @@ def Ax0_cpks_inner(X):
return [ax0_hf[σ] + ax0_ks[σ] for σ in (α, β)]
return Ax0_cpks_inner

@timing
def solve_cpks(self, rhs):
nocc, nvir = self.nocc, self.nvir

Expand Down Expand Up @@ -323,7 +329,8 @@ def prepare_integral(self):
get_eri_cpks([tensors["Y_mo_jk" + str(σ)] for σ in (α, β)], nocc, self.cx, eri_cpks, self.get_memory())
return self

def prepare_pt2(self, dump_t_ijab=True):
@timing
def prepare_pt2(self, dump_t_ijab=True, fast_trans=True):
tensors = self.tensors
nvir, nocc, nmo = self.nvir, self.nocc, self.nmo
mocc, mvir = max(nocc), max(nvir)
Expand Down Expand Up @@ -403,6 +410,7 @@ def prepare_pt2(self, dump_t_ijab=True):

return self

@timing
def prepare_lagrangian(self, gen_W=False):
tensors = self.tensors
nvir, nocc, nmo, naux = self.nvir, self.nocc, self.nmo, self.df_ri.get_naoaux()
Expand Down Expand Up @@ -444,6 +452,7 @@ def prepare_lagrangian(self, gen_W=False):
tensors.create("L" + str(σ), L[σ])
return self

@timing
def prepare_D_r(self):
tensors = self.tensors
sv, so = self.sv, self.so
Expand Down

0 comments on commit 347c93e

Please sign in to comment.