From e9511b18d464ce99a74b492d47a80f5b33e6580b Mon Sep 17 00:00:00 2001 From: zaotian Date: Tue, 16 Jun 2026 17:20:47 +0800 Subject: [PATCH 1/2] hybrid gauge tddft force --- source/source_esolver/esolver_ks_lcao.cpp | 2 +- .../source_esolver/esolver_ks_lcao_tddft.cpp | 3 +- .../source_estate/module_pot/H_TDDFT_pw.cpp | 5 +- .../source_io/module_hs/cal_r_overlap_R.cpp | 40 +++ source/source_io/module_hs/cal_r_overlap_R.h | 14 + source/source_lcao/FORCE_STRESS.cpp | 30 +- source/source_lcao/FORCE_STRESS.h | 3 +- .../module_operator_lcao/td_pot_hybrid.cpp | 9 +- .../module_operator_lcao/td_pot_hybrid.h | 14 + .../td_pot_hybrid_force.hpp | 175 +++++++++++ source/source_lcao/module_rt/CMakeLists.txt | 1 + .../module_rt/force_rt_overlap.cpp | 283 ++++++++++++++++++ .../source_lcao/module_rt/force_rt_overlap.h | 17 ++ source/source_lcao/module_rt/td_folding.cpp | 36 +++ source/source_lcao/module_rt/td_folding.h | 9 + source/source_lcao/module_rt/td_info.cpp | 151 ++++++++++ source/source_lcao/module_rt/td_info.h | 13 + 17 files changed, 795 insertions(+), 10 deletions(-) create mode 100644 source/source_lcao/module_operator_lcao/td_pot_hybrid_force.hpp create mode 100644 source/source_lcao/module_rt/force_rt_overlap.cpp create mode 100644 source/source_lcao/module_rt/force_rt_overlap.h diff --git a/source/source_esolver/esolver_ks_lcao.cpp b/source/source_esolver/esolver_ks_lcao.cpp index ee04d9f1e25..4f1a0b9fe05 100644 --- a/source/source_esolver/esolver_ks_lcao.cpp +++ b/source/source_esolver/esolver_ks_lcao.cpp @@ -245,7 +245,7 @@ void ESolver_KS_LCAO::cal_force(UnitCell& ucell, ModuleBase::matrix& for two_center_bundle_, orb_, force, this->scs, this->locpp, this->sf, this->kv, this->pw_rho, this->solvent, this->dftu, this->deepks, - this->exx_nao, &ucell.symm); + this->exx_nao, &ucell.symm, static_cast*>(this->p_hamilt)); // delete RA after cal_force this->RA.delete_grid(); diff --git a/source/source_esolver/esolver_ks_lcao_tddft.cpp b/source/source_esolver/esolver_ks_lcao_tddft.cpp index fd47b15fc53..c17dc10e83b 100644 --- a/source/source_esolver/esolver_ks_lcao_tddft.cpp +++ b/source/source_esolver/esolver_ks_lcao_tddft.cpp @@ -102,6 +102,7 @@ void ESolver_KS_LCAO_TDDFT::runner(UnitCell& ucell, const int istep) //---------------------------------------------------------------- this->before_scf(ucell, istep); // From ESolver_KS_LCAO td_p->initialize_phase_hybrid(ucell, dynamic_cast, TR>*>(this->p_hamilt)->getHR()); + td_p->calculate_grad_overlap(this->pv, ucell, this->gd, this->orb_.cutoffs(), this->two_center_bundle_.overlap_orb.get()); // Initialize the moving spatial gauge if (use_td_moving_gauge && this->td_mg_ == nullptr) { @@ -388,7 +389,7 @@ void ESolver_KS_LCAO_TDDFT::iter_finish(UnitCell& ucell, // Calculate energy-density matrix for RT-TDDFT if (conv_esolver && estep == estep_max - 1 && istep >= (PARAM.inp.init_wfc == "file" ? 0 : 1) - && PARAM.inp.td_edm == 0) + && PARAM.inp.td_edm == 0 && PARAM.inp.td_stype != 2) { if (use_tensor && use_lapack) { diff --git a/source/source_estate/module_pot/H_TDDFT_pw.cpp b/source/source_estate/module_pot/H_TDDFT_pw.cpp index babece4d784..69d8b7f9a31 100644 --- a/source/source_estate/module_pot/H_TDDFT_pw.cpp +++ b/source/source_estate/module_pot/H_TDDFT_pw.cpp @@ -109,6 +109,7 @@ void H_TDDFT_pw::cal_fixed_v(double* vl_pseudo) // time evolve H_TDDFT_pw::istep++; H_TDDFT_pw::istep_int = istep; + global_vext_time = {0.0, 0.0, 0.0}; // judgement to skip vext if (!PARAM.inp.td_vext || istep > tend || istep < tstart) @@ -124,7 +125,7 @@ void H_TDDFT_pw::cal_fixed_v(double* vl_pseudo) trigo_count = 0; heavi_count = 0; - global_vext_time = {0.0, 0.0, 0.0}; + for (auto direc: PARAM.inp.td_vext_dire) { @@ -272,6 +273,7 @@ void H_TDDFT_pw::update_At() At = At + At_laststep / 2.0; At_laststep.set(0.0, 0.0, 0.0); Et.set(0.0, 0.0, 0.0); + global_vext_time = { 0.0, 0.0, 0.0 }; // judgement to skip vext if (!PARAM.inp.td_vext || istep > tend || istep < tstart) @@ -343,6 +345,7 @@ void H_TDDFT_pw::update_At() count++; } At = At + At_laststep / 2.0; + if(stype==2)global_vext_time = { Et[0],Et[1],Et[2] }; ModuleBase::timer::end("H_TDDFT_pw", "update_At"); return; diff --git a/source/source_io/module_hs/cal_r_overlap_R.cpp b/source/source_io/module_hs/cal_r_overlap_R.cpp index f75570a4116..3ce8311e130 100644 --- a/source/source_io/module_hs/cal_r_overlap_R.cpp +++ b/source/source_io/module_hs/cal_r_overlap_R.cpp @@ -516,7 +516,47 @@ ModuleBase::Vector3 cal_r_overlap_R::get_psi_r_psi(const ModuleBase::Vec return temp_prp; } +ModuleBase::Vector3 cal_r_overlap_R::get_psi_r_gradpsi(const ModuleBase::Vector3& R1, + const int& T1, + const int& L1, + const int& m1, + const int& N1, + const ModuleBase::Vector3& R2, + const int& T2, + const int& L2, + const int& m2, + const int& N2, + const ModuleBase::Vector3& Efield, + const ModuleBase::Vector3& dR) +{ + ModuleBase::Vector3 origin_point(0.0, 0.0, 0.0); + double factor = sqrt(ModuleBase::FOUR_PI / 3.0); + const ModuleBase::Vector3& distance = R2 - R1; + + ModuleBase::Vector3 grad_o = center2_orb11[T1][T2][L1][N1][L2].at(N2).cal_grad_overlap(origin_point, distance, m1, m2); + + ModuleBase::Vector3 grad_rx = -1 * factor * center2_orb21_r[T1][T2][L1][N1][L2].at(N2).cal_grad_overlap(origin_point, + distance, + m1, + 1, + m2); // m = 1 + + ModuleBase::Vector3 grad_ry = -1 * factor * center2_orb21_r[T1][T2][L1][N1][L2].at(N2).cal_grad_overlap(origin_point, + distance, + m1, + 2, + m2); // m = -1 + ModuleBase::Vector3 grad_rz = factor * center2_orb21_r[T1][T2][L1][N1][L2].at(N2).cal_grad_overlap(origin_point, + distance, + m1, + 0, + m2); // m = 0 + + ModuleBase::Vector3 temp_prp = Efield[0] * grad_rx + Efield[1] * grad_ry + Efield[2] * grad_rz + (Efield*(R1-dR)) * grad_o; + + return temp_prp; +} void cal_r_overlap_R::get_psi_r_beta(const UnitCell& ucell, std::vector>& nlm, const ModuleBase::Vector3& R1, diff --git a/source/source_io/module_hs/cal_r_overlap_R.h b/source/source_io/module_hs/cal_r_overlap_R.h index 2329cd4ea85..0f7d51e1491 100644 --- a/source/source_io/module_hs/cal_r_overlap_R.h +++ b/source/source_io/module_hs/cal_r_overlap_R.h @@ -45,6 +45,20 @@ class cal_r_overlap_R const int& m2, const int& N2 ); + ModuleBase::Vector3 get_psi_r_gradpsi( + const ModuleBase::Vector3& R1, + const int& T1, + const int& L1, + const int& m1, + const int& N1, + const ModuleBase::Vector3& R2, + const int& T2, + const int& L2, + const int& m2, + const int& N2, + const ModuleBase::Vector3& Efield, + const ModuleBase::Vector3& dR + ); void get_psi_r_beta( const UnitCell& ucell, std::vector>& nlm, diff --git a/source/source_lcao/FORCE_STRESS.cpp b/source/source_lcao/FORCE_STRESS.cpp index b2e810f2838..cf1e7f7012b 100644 --- a/source/source_lcao/FORCE_STRESS.cpp +++ b/source/source_lcao/FORCE_STRESS.cpp @@ -24,7 +24,9 @@ #include "source_lcao/module_operator_lcao/nonlocal.h" #include "source_lcao/module_operator_lcao/ekinetic.h" #include "source_lcao/module_operator_lcao/overlap.h" +#include "source_lcao/module_operator_lcao/td_pot_hybrid.h" #include "source_lcao/pulay_fs.h" +#include "source_lcao/module_rt/force_rt_overlap.h" // mohan add 2025-11-04 @@ -85,7 +87,8 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, Plus_U &dftu, // mohan add 2025-11-07 Setup_DeePKS& deepks, Exx_NAO &exx_nao, - ModuleSymmetry::Symmetry* symm) + ModuleSymmetry::Symmetry* symm, + hamilt::Hamilt* p_hamilt) { ModuleBase::TITLE("Force_Stress_LCAO", "getForceStress"); ModuleBase::timer::start("Force_Stress_LCAO", "getForceStress"); @@ -113,6 +116,7 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, ModuleBase::matrix fcc; ModuleBase::matrix fscc; ModuleBase::matrix fvnl_dalpha; // deepks + ModuleBase::matrix fpothybrid; fvl_dphi.create(nat, 3); // must do it now, update it later, noted by zhengdy @@ -127,6 +131,7 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, fcc.create(nat, 3); // force due to core correction fscc.create(nat, 3); // force due to self-consistent field fvnl_dalpha.create(nat, 3); // deepks + fpothybrid.create(nat, 3); // pulay force for hybrid gauge rt-tddft // calculate basic terms in Force, same method with PW base this->calForcePwPart(ucell, fvl_dvl, fewalds, fcc, fscc, pelec->f_en.etxc, @@ -199,7 +204,10 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, hamilt::Overlap> tmp_overlap( nullptr, kv.kvec_d, nullptr, nullptr, &ucell, orb.cutoffs(), &gd, two_center_bundle.overlap_orb.get()); - tmp_overlap.cal_force_stress(isforce, isstress, edmR, foverlap, soverlap); + if(PARAM.inp.td_stype != 2) + { + tmp_overlap.cal_force_stress(isforce, isstress, edmR, foverlap, soverlap); + } // Calculate nonlocal force/stress (uses DM) hamilt::Nonlocal> tmp_nonlocal( @@ -207,6 +215,15 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, two_center_bundle.overlap_orb_beta.get()); tmp_nonlocal.cal_force_stress(isforce, isstress, dmR, fvnl_dbeta, svnl_dbeta); + if(PARAM.inp.td_stype == 2) + { + hamilt::TD_pot_hybrid> tmp_hybrid( + nullptr, &kv, nullptr, nullptr, orb, &ucell, orb.cutoffs(), &gd, nullptr); + tmp_hybrid.cal_force_stress(isforce, dmR, fpothybrid); + + cal_foverlap_rt(foverlap, dmat, p_hamilt, kv, pv, ucell); + } + // Switch back to spin channel 0 if (PARAM.inp.nspin == 2) { @@ -501,6 +518,7 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, //--------------------------------- // sum all parts of force! //--------------------------------- + ModuleBase::Vector3 net_force = {0.0, 0.0, 0.0}; for (int i = 0; i < 3; i++) { double sum = 0.0; @@ -511,7 +529,8 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, + fvl_dvl(iat, i) // derivative of local potential force (pw) + fewalds(iat, i) // ewald force (pw) + fcc(iat, i) // nonlinear core correction force (pw) - + fscc(iat, i); // self consistent corretion force (pw) + + fscc(iat, i) // self consistent corretion force (pw) + + fpothybrid(iat, i); // pulay force for hybrid gauge rt-tddft // Force contribution from DFT+U, Quxin add on 20201029 if (PARAM.inp.dft_plus_u) @@ -564,7 +583,7 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, // sum total force for correction sum += fcs(iat, i); } - + net_force[i]=sum; if (!(PARAM.inp.gate_flag || PARAM.inp.efield_flag)) { for (int iat = 0; iat < nat; ++iat) @@ -678,6 +697,9 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, // this->printforce_total(ry, istestf, fcs); ModuleIO::print_force(GlobalV::ofs_running, ucell, "TOTAL-FORCE (eV/Angstrom)", fcs, false); + net_force*= ModuleBase::Ry_to_eV / ModuleBase::BOHR_TO_A; + ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Net force vector (eV/Ang)", net_force.x, net_force.y, net_force.z); + ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Total drift (ev/Ang)", net_force.norm()); if (istestf) { GlobalV::ofs_running << "\n FORCE INVALID TABLE." << std::endl; diff --git a/source/source_lcao/FORCE_STRESS.h b/source/source_lcao/FORCE_STRESS.h index f23a03f786a..2856a105afa 100644 --- a/source/source_lcao/FORCE_STRESS.h +++ b/source/source_lcao/FORCE_STRESS.h @@ -53,7 +53,8 @@ class Force_Stress_LCAO Plus_U &dftu, // mohan add 2025-11-07 Setup_DeePKS &deepks, Exx_NAO &exx_nao, - ModuleSymmetry::Symmetry* symm); + ModuleSymmetry::Symmetry* symm, + hamilt::Hamilt* p_hamilt = nullptr); private: int nat; diff --git a/source/source_lcao/module_operator_lcao/td_pot_hybrid.cpp b/source/source_lcao/module_operator_lcao/td_pot_hybrid.cpp index 015af41fec1..429f8be5b37 100644 --- a/source/source_lcao/module_operator_lcao/td_pot_hybrid.cpp +++ b/source/source_lcao/module_operator_lcao/td_pot_hybrid.cpp @@ -23,13 +23,17 @@ hamilt::TD_pot_hybrid>::TD_pot_hybrid( { this->cal_type = calculation_type::lcao_tddft_periodic; this->ucell = ucell_in; + this->gridD = GridD_in; #ifdef __DEBUG assert(this->ucell != nullptr); assert(this->hsk != nullptr); #endif this->init_td(); // initialize HR to allocate sparse Ekinetic matrix memory - this->initialize_HR(GridD_in); + if(hR_in != nullptr) + { + this->initialize_HR(GridD_in); + } } // destructor @@ -289,6 +293,7 @@ template void hamilt::TD_pot_hybrid>::contributeHk(int ik) { return; } - +#include "td_pot_hybrid_force.hpp" +template class hamilt::TD_pot_hybrid>; template class hamilt::TD_pot_hybrid, double>>; template class hamilt::TD_pot_hybrid, std::complex>>; diff --git a/source/source_lcao/module_operator_lcao/td_pot_hybrid.h b/source/source_lcao/module_operator_lcao/td_pot_hybrid.h index 7b86218e099..25f78c27237 100644 --- a/source/source_lcao/module_operator_lcao/td_pot_hybrid.h +++ b/source/source_lcao/module_operator_lcao/td_pot_hybrid.h @@ -66,6 +66,9 @@ class TD_pot_hybrid> : public OperatorLCAO virtual void set_HR_fixed(void*) override; + void cal_force_stress(const bool cal_force, + const HContainer* dmR, + ModuleBase::matrix& force); private: const UnitCell* ucell = nullptr; @@ -120,6 +123,17 @@ class TD_pot_hybrid> : public OperatorLCAO /// @brief exact the nearest neighbor atoms from all adjacent atoms std::vector adjs_all; + + void cal_force_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const ModuleBase::Vector3& dtau, + const ModuleBase::Vector3& dR, + TR* dmR_pointer, + double* force1, + double* force2); + + const Grid_Driver* gridD = nullptr; }; } // namespace hamilt diff --git a/source/source_lcao/module_operator_lcao/td_pot_hybrid_force.hpp b/source/source_lcao/module_operator_lcao/td_pot_hybrid_force.hpp new file mode 100644 index 00000000000..90971546f07 --- /dev/null +++ b/source/source_lcao/module_operator_lcao/td_pot_hybrid_force.hpp @@ -0,0 +1,175 @@ +#pragma once +#include "td_pot_hybrid.h" +#include "source_base/parallel_reduce.h" +#include "source_base/timer.h" +#include "source_base/libm/libm.h" + +namespace hamilt +{ +template +void TD_pot_hybrid>::cal_force_stress(const bool cal_force, + const HContainer* dmR, + ModuleBase::matrix& force) +{ + Et = elecstate::H_TDDFT_pw::Et; + const Parallel_Orbitals* paraV = dmR->get_paraV(); + #pragma omp parallel + { + ModuleBase::matrix force_local(force.nr, force.nc); + #pragma omp for schedule(dynamic) + for (int iat1 = 0; iat1 < ucell->nat; iat1++) + { + auto tau1 = ucell->get_tau(iat1); + int T1, I1; + ucell->iat2iait(iat1, &I1, &T1); + AdjacentAtomInfo adjs; + this->gridD->Find_atom(*ucell, tau1, T1, I1, &adjs); + std::vector is_adj(adjs.adj_num + 1, false); + for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1) + { + const int T2 = adjs.ntype[ad1]; + const int I2 = adjs.natom[ad1]; + const int iat2 = ucell->itia2iat(T2, I2); + if (paraV->get_row_size(iat1) <= 0 || paraV->get_col_size(iat2) <= 0) + { + continue; + } + const ModuleBase::Vector3& R_index2 = adjs.box[ad1]; + // choose the real adjacent atoms + // Note: the distance of atoms should less than the cutoff radius, + // When equal, the theoretical value of matrix element is zero, + // but the calculated value is not zero due to the numerical error, which would lead to result changes. + if (this->ucell->cal_dtau(iat1, iat2, R_index2).norm() * this->ucell->lat0 + < orb_cutoff_[T1] + orb_cutoff_[T2]) + { + is_adj[ad1] = true; + } + } + filter_adjs(is_adj, adjs); + + for (int ad = 0; ad < adjs.adj_num + 1; ++ad) + { + const int T2 = adjs.ntype[ad]; + const int I2 = adjs.natom[ad]; + const int iat2 = ucell->itia2iat(T2, I2); + double* force_tmp1 = (cal_force) ? &force_local(iat2, 0) : nullptr; + // force_tmp1[0]=0; + // force_tmp1[1]=0; + // force_tmp1[2]=0; + double* force_tmp2 = (cal_force) ? &force_local(iat1, 0) : nullptr; + const ModuleBase::Vector3& R_index2 = adjs.box[ad]; + ModuleBase::Vector3 dtau = this->ucell->cal_dtau(iat1, iat2, R_index2); + ModuleBase::Vector3 dR = this->ucell->cal_dtau(iat1, iat1, R_index2); + + const BaseMatrix* tmp = dmR->find_matrix(iat1, iat2, R_index2); + if (tmp != nullptr) + { + this->cal_force_IJR(iat1, iat2, paraV, dtau, dR, tmp->get_pointer(), force_tmp1, force_tmp2); + } + else + { + ModuleBase::WARNING_QUIT("TD_pot_hybrid::calculate_HR", "R_index not found in HR"); + } + // std::cout<<"iat1: "< +void TD_pot_hybrid, std::complex>>::cal_force_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const ModuleBase::Vector3& dtau, + const ModuleBase::Vector3& dR, + std::complex* dmR_pointer, + double* force1, + double* force2) +{ + return; +} +template +void TD_pot_hybrid>::cal_force_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const ModuleBase::Vector3& dtau, + const ModuleBase::Vector3& dR, + TR* dmR_pointer, + double* force1, + double* force2) +{ + // --------------------------------------------- + // get info of orbitals of atom1 and atom2 from ucell + // --------------------------------------------- + int T1, I1; + this->ucell->iat2iait(iat1, &I1, &T1); + int T2, I2; + this->ucell->iat2iait(iat2, &I2, &T2); + Atom& atom1 = this->ucell->atoms[T1]; + Atom& atom2 = this->ucell->atoms[T2]; + + const int* iw2l1 = atom1.iw2l.data(); + const int* iw2n1 = atom1.iw2n.data(); + const int* iw2m1 = atom1.iw2m.data(); + const int* iw2l2 = atom2.iw2l.data(); + const int* iw2n2 = atom2.iw2n.data(); + const int* iw2m2 = atom2.iw2m.data(); + + // --------------------------------------------- + // calculate the Ekinetic matrix for each pair of orbitals + // --------------------------------------------- + double olm[3] = {0, 0, 0}; + auto row_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat2); + const int step_trace = col_indexes.size() + 1; + ModuleBase::Vector3 shift = {0.0, 0.0, 1.0}; + const ModuleBase::Vector3& tau1 = this->ucell->get_tau(iat1); + const ModuleBase::Vector3 tau2 = tau1 + dtau; + for (int iw1l = 0; iw1l < row_indexes.size(); iw1l++) + { + const int iw1 = row_indexes[iw1l]; + const int L1 = iw2l1[iw1]; + const int N1 = iw2n1[iw1]; + const int m1 = iw2m1[iw1]; + + for (int iw2l = 0; iw2l < col_indexes.size(); iw2l++) + { + const int iw2 = col_indexes[iw2l]; + const int L2 = iw2l2[iw2]; + const int N2 = iw2n2[iw2]; + const int m2 = iw2m2[iw2]; + + ModuleBase::Vector3 tmp_grad = r_calculator->get_psi_r_gradpsi(tau1 * this->ucell->lat0, T1, L1, m1, N1, tau2 * this->ucell->lat0, T2, L2, m2, N2, Et, dR * this->ucell->lat0); + ModuleBase::Vector3 tmp_grad1 = r_calculator->get_psi_r_gradpsi(tau2 * this->ucell->lat0, T2, L2, m2, N2, tau1 * this->ucell->lat0, T1, L1, m1, N1, Et, dR * this->ucell->lat0); + + for(int i = 0; i < 3; i++) + { + force1[i] -= dmR_pointer[0] * tmp_grad[i]; + force2[i] -= dmR_pointer[0] * tmp_grad1[i]; + } + dmR_pointer++; + } + } +} +}// namespace hamilt \ No newline at end of file diff --git a/source/source_lcao/module_rt/CMakeLists.txt b/source/source_lcao/module_rt/CMakeLists.txt index 046632f3138..22af0621701 100644 --- a/source/source_lcao/module_rt/CMakeLists.txt +++ b/source/source_lcao/module_rt/CMakeLists.txt @@ -17,6 +17,7 @@ if(ENABLE_LCAO) solve_propagation.cpp boundary_fix.cpp td_moving_gauge.cpp + force_rt_overlap.cpp ) if(USE_CUDA) diff --git a/source/source_lcao/module_rt/force_rt_overlap.cpp b/source/source_lcao/module_rt/force_rt_overlap.cpp new file mode 100644 index 00000000000..f90a8e9e22b --- /dev/null +++ b/source/source_lcao/module_rt/force_rt_overlap.cpp @@ -0,0 +1,283 @@ +#include "force_rt_overlap.h" +#include "td_info.h" +#include "td_folding.h" +#include "source_base/module_external/lapack_connector.h" +#include "source_base/module_external/scalapack_connector.h" +#include "source_estate/module_pot/H_TDDFT_pw.h" +#include "source_base/parallel_reduce.h" +template <> +void cal_foverlap_rt(ModuleBase::matrix& foverlap, + const LCAO_domain::Setup_DM>& dmat, + hamilt::Hamilt>* p_hamilt, + const K_Vectors& kv, + Parallel_Orbitals& pv, + UnitCell& ucell) +{ + const int nlocal = PARAM.globalv.nlocal; + assert(nlocal >= 0); + + TD_info* td_info = TD_info::td_vel_op; + //get dS/dR_{x,y,z} + std::vector*> dsxr = td_info->get_grad_overlap(); + // allocate matrix + const long nloc = pv.nloc; + const int nrow = pv.nrow; + std::complex* Htmp = new std::complex[nloc]; + std::complex* Sinv = new std::complex[nloc]; + std::complex* dsxk = new std::complex[nloc]; + std::complex* pdsxk = new std::complex[nloc]; + std::complex* tmp1 = new std::complex[nloc]; + std::complex* tmp2 = new std::complex[nloc]; + std::complex* tmp3 = new std::complex[nloc]; + std::complex* Hybridtmp = new std::complex[nloc]; + std::vector*> tmp_out = {nullptr, nullptr, nullptr}; + for(int dir = 0; dir<3; dir++) + { + tmp_out[dir] = new std::complex[nloc]; + ModuleBase::GlobalFunc::ZEROS(tmp_out[dir], nloc); + } + for (int ik = 0; ik < kv.get_nks(); ++ik) + { + p_hamilt->updateHk(ik); + // get dmk + std::complex* tmp_dmk = dmat.dm->get_DMK_pointer(ik); + + ModuleBase::GlobalFunc::ZEROS(Htmp, nloc); + ModuleBase::GlobalFunc::ZEROS(Sinv, nloc); + ModuleBase::GlobalFunc::ZEROS(tmp1, nloc); + ModuleBase::GlobalFunc::ZEROS(tmp2, nloc); + ModuleBase::GlobalFunc::ZEROS(Hybridtmp, nloc); + + const int inc = 1; + + hamilt::MatrixBlock> h_mat; + hamilt::MatrixBlock> s_mat; + //get Hk Sk + p_hamilt->matrix(h_mat, s_mat); + BlasConnector::copy(nloc, h_mat.p, inc, Htmp, inc); + BlasConnector::copy(nloc, s_mat.p, inc, Sinv, inc); + + vector ipiv(nloc, 0); + int info = 0; + const int one_int = 1; + const std::complex one_complex = {1.0, 0.0}; + const std::complex two_complex = {2.0, 0.0}; + const std::complex mone_complex = {-1.0, 0.0}; + const std::complex zero_complex = {0.0, 0.0}; + + ScalapackConnector::getrf(nlocal, nlocal, Sinv, one_int, one_int, pv.desc, ipiv.data(), &info); + + int lwork = -1; + int liwork = -1; + + // if lwork == -1, then the size of work is (at least) of length 1. + std::vector> work(1, 0); + + // if liwork = -1, then the size of iwork is (at least) of length 1. + std::vector iwork(1, 0); + + ScalapackConnector::getri(nlocal, + Sinv, + one_int, + one_int, + pv.desc, + ipiv.data(), + work.data(), + &lwork, + iwork.data(), + &liwork, + &info); + + lwork = work[0].real(); + work.resize(lwork, 0); + liwork = iwork[0]; + iwork.resize(liwork, 0); + + ScalapackConnector::getri(nlocal, + Sinv, + one_int, + one_int, + pv.desc, + ipiv.data(), + work.data(), + &lwork, + iwork.data(), + &liwork, + &info); + + const char N_char = 'N'; + const char T_char = 'T'; + const char C_char = 'C'; + + + ScalapackConnector::gemm(T_char, + C_char, + nlocal, + nlocal, + nlocal, + one_complex, + tmp_dmk, + one_int, + one_int, + pv.desc, + Htmp, + one_int, + one_int, + pv.desc, + zero_complex, + tmp1, + one_int, + one_int, + pv.desc); + + ScalapackConnector::gemm(N_char, + N_char, + nlocal, + nlocal, + nlocal, + one_complex, + tmp1, + one_int, + one_int, + pv.desc, + Sinv, + one_int, + one_int, + pv.desc, + zero_complex, + tmp2, + one_int, + one_int, + pv.desc); + for(int dir = 0; dir<3; dir++) + { + ModuleBase::GlobalFunc::ZEROS(dsxk, nloc); + ModuleBase::GlobalFunc::ZEROS(pdsxk, nloc); + ModuleBase::GlobalFunc::ZEROS(tmp3, nloc); + module_rt::folding_HR_td(*dsxr[dir], dsxk, kv.kvec_d[ik], TD_info::cart_At, TD_info::td_vel_op->get_phase_hybrid(), nrow, 1); + module_rt::folding_partial_dot(*dsxr[dir], pdsxk, kv.kvec_d[ik], nrow, 1, &ucell, TD_info::td_vel_op->get_phase_hybrid(), TD_info::cart_At, elecstate::H_TDDFT_pw::Et); + ScalapackConnector::gemm(N_char, + N_char, + nlocal, + nlocal, + nlocal, + two_complex, + tmp2, + one_int, + one_int, + pv.desc, + dsxk, + one_int, + one_int, + pv.desc, + one_complex, + tmp_out[dir], + one_int, + one_int, + pv.desc); + + ScalapackConnector::gemm(T_char, + N_char, + nlocal, + nlocal, + nlocal, + mone_complex, + tmp_dmk, + one_int, + one_int, + pv.desc, + pdsxk, + one_int, + one_int, + pv.desc, + one_complex, + tmp3, + one_int, + one_int, + pv.desc); + + ScalapackConnector::geadd(N_char, + nlocal, + nlocal, + one_complex, + tmp3, + one_int, + one_int, + pv.desc, + one_complex, + tmp_out[dir], + one_int, + one_int, + pv.desc); + } + } + delete[] Htmp; + delete[] Sinv; + delete[] tmp1; + delete[] tmp2; + delete[] dsxk; + delete[] Hybridtmp; + // std::string filename = "process_debug_" + std::to_string(GlobalV::MY_RANK) + ".txt"; + // std::ofstream debug_file(filename); + // debug_file << "=== Process " << GlobalV::MY_RANK << " ===" << std::endl; + // debug_file << "=== Matrix Partition Information ===" << std::endl; + // auto row_indexes = pv.get_indexes_row(); + // auto col_indexes = pv.get_indexes_col(); + // for(int iat = 0; iat < ucell.nat; iat++) + // { + // int row0 = pv.atom_begin_row[iat]; + // int col0 = pv.atom_begin_col[iat]; + // for(int mu = 0; mu < pv.get_row_size(iat); ++mu) + // { + // for(int nu = 0; nu < pv.get_col_size(iat); ++nu) + // { + // debug_file<<"mu: "<*> p_diag = {tmp_out[0], tmp_out[1], tmp_out[2]}; + for(int mu = 0; mu < pv.get_row_size(iat); ++mu) + { + for(int nu = 0; nu < pv.get_col_size(iat); ++nu) + { + if(row_indexes[row0+mu]==col_indexes[col0+nu]) + { + const long index = (row0 + mu) + (col0 + nu)*row_size; + for(int dir = 0; dir<3; dir++) + { + // p_diag[dir] = tmp_out[dir] + (row0 + mu) + (col0 + nu) * row_size; + force_tmp1[dir] += (tmp_out[dir][index]).real(); + } + } + } + + } + } + Parallel_Reduce::reduce_all(foverlap.c, foverlap.nr * foverlap.nc); + for(int dir = 0; dir<3; dir++) + { + delete[] tmp_out[dir]; + } + return; +} +template <> +void cal_foverlap_rt(ModuleBase::matrix& foverlap, + const LCAO_domain::Setup_DM& dmat, + hamilt::Hamilt* p_hamilt, + const K_Vectors& kv, + Parallel_Orbitals& pv, + UnitCell& ucell) +{ + return; +} \ No newline at end of file diff --git a/source/source_lcao/module_rt/force_rt_overlap.h b/source/source_lcao/module_rt/force_rt_overlap.h new file mode 100644 index 00000000000..cfffc32bb95 --- /dev/null +++ b/source/source_lcao/module_rt/force_rt_overlap.h @@ -0,0 +1,17 @@ +#ifndef FORCE_RT_OVERLAP_H +#define FORCE_RT_OVERLAP_H + +#include "source_basis/module_ao/parallel_orbitals.h" +#include "source_cell/klist.h" +#include "source_lcao/setup_dm.h" +#include "source_hamilt/hamilt.h" + +template +void cal_foverlap_rt(ModuleBase::matrix& foverlap, + const LCAO_domain::Setup_DM& dmat, + hamilt::Hamilt* p_hamilt, + const K_Vectors& kv, + Parallel_Orbitals& pv, + UnitCell& ucell); + +#endif \ No newline at end of file diff --git a/source/source_lcao/module_rt/td_folding.cpp b/source/source_lcao/module_rt/td_folding.cpp index bd3b4cd24d0..2df49042ca2 100644 --- a/source/source_lcao/module_rt/td_folding.cpp +++ b/source/source_lcao/module_rt/td_folding.cpp @@ -102,6 +102,42 @@ void folding_partial_HR_td(const UnitCell& ucell, } } } +void folding_partial_dot(const hamilt::HContainer& dR, + std::complex* dk, + const ModuleBase::Vector3& kvec_d_in, + const int ncol, + const int hk_type, + const UnitCell* ucell, + const std::map, std::complex>& phase_hybrid, + const ModuleBase::Vector3& At, + const ModuleBase::Vector3& Et) +{ +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < dR.size_atom_pairs(); ++i) + { + hamilt::AtomPair& tmp = dR.get_atom_pair(i); + for(int ir = 0;ir < tmp.get_R_size(); ++ir ) + { + const ModuleBase::Vector3 r_index = tmp.get_R_index(ir); + const int iat1 = tmp.get_atom_i(); + const int iat2 = tmp.get_atom_j(); + // cal k_phase + // if TK==std::complex, kphase is e^{ikR} + const ModuleBase::Vector3 dR(r_index.x, r_index.y, r_index.z); + const double arg = (kvec_d_in * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos(arg, &sinp, &cosp); + std::complex kphase = std::complex(cosp, sinp); + kphase *= phase_hybrid.at(r_index); + ModuleBase::Vector3 dtau = ucell->cal_dtau(iat1, iat1, r_index); + kphase *= Et * dtau * ucell->lat0; + tmp.find_R(r_index); + tmp.add_to_matrix(dk, ncol, kphase, hk_type); + } + } +} template void folding_HR_td(const hamilt::HContainer& hR, std::complex* hk, diff --git a/source/source_lcao/module_rt/td_folding.h b/source/source_lcao/module_rt/td_folding.h index d61d72488e2..73b954c625e 100644 --- a/source/source_lcao/module_rt/td_folding.h +++ b/source/source_lcao/module_rt/td_folding.h @@ -30,6 +30,15 @@ void folding_partial_HR_td(const UnitCell& ucell, const int ix, const int ncol, const int hk_type); +void folding_partial_dot(const hamilt::HContainer& dR, + std::complex* dk, + const ModuleBase::Vector3& kvec_d_in, + const int ncol, + const int hk_type, + const UnitCell* ucell, + const std::map, std::complex>& phase_hybrid, + const ModuleBase::Vector3& At, + const ModuleBase::Vector3& Et); }// namespace module_rt #endif \ No newline at end of file diff --git a/source/source_lcao/module_rt/td_info.cpp b/source/source_lcao/module_rt/td_info.cpp index f4251cc3731..e39bc96cae8 100644 --- a/source/source_lcao/module_rt/td_info.cpp +++ b/source/source_lcao/module_rt/td_info.cpp @@ -58,6 +58,13 @@ TD_info::~TD_info() delete this->current_term[dir]; } } + for (int dir = 0; dir < 3; dir++) + { + if (this->grad_overlap[dir] != nullptr) + { + delete this->grad_overlap[dir]; + } + } } void TD_info::output_cart_At(const std::string& out_dir) @@ -267,7 +274,151 @@ void TD_info::destroy_HS_R_td_sparse(void) HR_sparse_td_vel[1].swap(empty_HR_sparse_td_vel_down); } +void TD_info::calculate_grad_overlap(const Parallel_Orbitals& paraV, + const UnitCell& ucell, + const Grid_Driver& GridD, + const std::vector& orb_cutoff, + const TwoCenterIntegrator* intor) +{ + ModuleBase::TITLE("TD_info", "calculate_grad_overlap"); + ModuleBase::timer::start("TD_info", "calculate_grad_overlap"); + for (int dir=0;dir<3;dir++) + { + if (this->grad_overlap[dir] != nullptr) + { + delete this->grad_overlap[dir]; + } + this->grad_overlap[dir] = new hamilt::HContainer(¶V); + } + for (int iat1 = 0; iat1 < ucell.nat; iat1++) + { + auto tau1 = ucell.get_tau(iat1); + int T1=0; + int I1=0; + ucell.iat2iait(iat1, &I1, &T1); + AdjacentAtomInfo adjs; + GridD.Find_atom(ucell, tau1, T1, I1, &adjs); + for (int ad = 0; ad < adjs.adj_num + 1; ++ad) + { + const int T2 = adjs.ntype[ad]; + const int I2 = adjs.natom[ad]; + int iat2 = ucell.itia2iat(T2, I2); + if (paraV.get_row_size(iat1) <= 0 || paraV.get_col_size(iat2) <= 0) + { + continue; + } + const ModuleBase::Vector3& R_index = adjs.box[ad]; + // choose the real adjacent atoms + // Note: the distance of atoms should less than the cutoff radius, + // When equal, the theoretical value of matrix element is zero, + // but the calculated value is not zero due to the numerical error, which would lead to result changes. + if (ucell.cal_dtau(iat1, iat2, R_index).norm() * ucell.lat0 + >= orb_cutoff[T1] + orb_cutoff[T2]) + { + continue; + } + hamilt::AtomPair tmp(iat1, iat2, R_index, ¶V); + for (int dir=0;dir<3;dir++) + { + this->grad_overlap[dir]->insert_pair(tmp); + } + } + } + // allocate the memory of BaseMatrix in grad_overlap, and set the new values to zero + for (int dir=0;dir<3;dir++) + { + this->grad_overlap[dir]->allocate(nullptr, true); + } + +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int iap = 0; iap < this->grad_overlap[0]->size_atom_pairs(); ++iap) + { + hamilt::AtomPair& tmp = this->grad_overlap[0]->get_atom_pair(iap); + const int iat1 = tmp.get_atom_i(); + const int iat2 = tmp.get_atom_j(); + + for (int iR = 0; iR < tmp.get_R_size(); ++iR) + { + const ModuleBase::Vector3 R_index = tmp.get_R_index(iR); + auto dtau = ucell.cal_dtau(iat1, iat2, R_index); + + double* p_data[3] = {nullptr, nullptr, nullptr}; + for (int i = 0; i < 3; i++) + { + p_data[i] = this->grad_overlap[i]->get_atom_pair(iap).get_pointer(iR); + } + // --------------------------------------------- + // get info of orbitals of atom1 and atom2 from ucell + // --------------------------------------------- + int T1=0; + int I1=0; + ucell.iat2iait(iat1, &I1, &T1); + int T2=0; + int I2=0; + ucell.iat2iait(iat2, &I2, &T2); + Atom& atom1 = ucell.atoms[T1]; + Atom& atom2 = ucell.atoms[T2]; + + // npol is the number of polarizations, + // 1 for non-magnetic (one Hamiltonian matrix only has spin-up or spin-down), + // 2 for magnetic (one Hamiltonian matrix has both spin-up and spin-down) + const int npol = ucell.get_npol(); + const int* iw2l1 = atom1.iw2l.data(); + const int* iw2n1 = atom1.iw2n.data(); + const int* iw2m1 = atom1.iw2m.data(); + const int* iw2l2 = atom2.iw2l.data(); + const int* iw2n2 = atom2.iw2n.data(); + const int* iw2m2 = atom2.iw2m.data(); + + // --------------------------------------------- + // calculate the overlap matrix for each pair of orbitals + // --------------------------------------------- + double olm[3] = {0, 0, 0}; + auto row_indexes = paraV.get_indexes_row(iat1); + auto col_indexes = paraV.get_indexes_col(iat2); + const int step_trace = col_indexes.size() + 1; + for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol) + { + const int iw1 = row_indexes[iw1l] / npol; + const int L1 = iw2l1[iw1]; + const int N1 = iw2n1[iw1]; + const int m1 = iw2m1[iw1]; + + // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) + int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; + + for (int iw2l = 0; iw2l < col_indexes.size(); iw2l += npol) + { + const int iw2 = col_indexes[iw2l] / npol; + const int L2 = iw2l2[iw2]; + const int N2 = iw2n2[iw2]; + const int m2 = iw2m2[iw2]; + + // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) + int M2 = (m2 % 2 == 0) ? -m2 / 2 : (m2 + 1) / 2; + intor->calculate(T1, L1, N1, M1, T2, L2, N2, M2, dtau * ucell.lat0, nullptr, olm); + for (int dir = 0; dir < 3; dir++) + { + for (int ipol = 0; ipol < npol; ipol++) + { + p_data[dir][ipol * step_trace] += olm[dir]; + } + p_data[dir] += npol; + } + } + for (int dir = 0; dir < 3; dir++) + { + p_data[dir] += (npol - 1) * col_indexes.size(); + } + + } + } + } + ModuleBase::timer::end("TD_info", "calculate_grad_overlap"); +} template void TD_info::initialize_phase_hybrid>(const UnitCell& ucell, const hamilt::HContainer>* hR); template diff --git a/source/source_lcao/module_rt/td_info.h b/source/source_lcao/module_rt/td_info.h index bccb1f27c49..d14c67daf70 100644 --- a/source/source_lcao/module_rt/td_info.h +++ b/source/source_lcao/module_rt/td_info.h @@ -4,6 +4,7 @@ #include "source_base/timer.h" #include "source_lcao/module_hcontainer/hcontainer.h" #include "source_io/module_hs/cal_r_overlap_R.h" +#include "source_basis/module_nao/two_center_integrator.h" #include // Class to store TDDFT infos, mainly for periodic system. @@ -64,6 +65,15 @@ class TD_info return this->phase_hybrid; } + void calculate_grad_overlap(const Parallel_Orbitals& paraV, + const UnitCell& ucell, + const Grid_Driver& GridD, + const std::vector& orb_cutoff, + const TwoCenterIntegrator* intor); + std::vector*> get_grad_overlap() const + { + return this->grad_overlap; + } // set velocity HR. void set_velocity_HR(hamilt::HContainer>* HR) { @@ -107,6 +117,9 @@ class TD_info /// @brief store the read in At_data static std::vector> At_from_file; + /// @brief store the dS/dD matrix + std::vector*> grad_overlap = {nullptr, nullptr, nullptr}; + /// @brief destory HSR data stored void destroy_HS_R_td_sparse(); From 73326e5d111e915058b513c390939cc01b1c1ab2 Mon Sep 17 00:00:00 2001 From: zaotian Date: Tue, 16 Jun 2026 18:33:02 +0800 Subject: [PATCH 2/2] fix no MPI compile --- source/source_lcao/module_rt/force_rt_overlap.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/source_lcao/module_rt/force_rt_overlap.cpp b/source/source_lcao/module_rt/force_rt_overlap.cpp index f90a8e9e22b..a95ccf93ef5 100644 --- a/source/source_lcao/module_rt/force_rt_overlap.cpp +++ b/source/source_lcao/module_rt/force_rt_overlap.cpp @@ -13,6 +13,7 @@ void cal_foverlap_rt(ModuleBase::matrix& foverlap, Parallel_Orbitals& pv, UnitCell& ucell) { +#ifdef __MPI const int nlocal = PARAM.globalv.nlocal; assert(nlocal >= 0); @@ -270,6 +271,7 @@ void cal_foverlap_rt(ModuleBase::matrix& foverlap, delete[] tmp_out[dir]; } return; +#endif } template <> void cal_foverlap_rt(ModuleBase::matrix& foverlap,