From 6eba88d0ecae38b6b0088b4c559326b8dd61579e Mon Sep 17 00:00:00 2001 From: Ksenia Date: Wed, 11 Feb 2026 14:57:50 +0100 Subject: [PATCH 1/4] No not compute the last LC point n_rep times Regression module (regression()): The last LC point is computed on the full training set and it's a waste to reshuffle points n_rep times --- qstack/regression/regression.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index 8c6d68a..66e00b0 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -85,6 +85,8 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, alpha = scipy.linalg.solve(K_solve, y_solve, assume_a='pos') y_kf_predict = np.dot(Ks, alpha) maes.append(np.mean(np.abs(y_test-y_kf_predict))) + if size_train==len(all_indices_train): + break maes_all.append((size_train, np.mean(maes), np.std(maes))) return maes_all if not save_pred else (maes_all, (y_test, y_kf_predict)) From 4449eac69a7bd926dd3c879871505f6009bec56e Mon Sep 17 00:00:00 2001 From: Ksenia Date: Wed, 11 Feb 2026 15:25:29 +0100 Subject: [PATCH 2/4] Treat input indices as np arrays Regression module (train_test_split_idx()): List concatenation with `+` fails on numpy arrays --- qstack/regression/kernel_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qstack/regression/kernel_utils.py b/qstack/regression/kernel_utils.py index 1fbaf60..4ccdaf0 100644 --- a/qstack/regression/kernel_utils.py +++ b/qstack/regression/kernel_utils.py @@ -146,7 +146,7 @@ def train_test_split_idx(y, idx_test=None, idx_train=None, raise RuntimeError("Repeated train indices") if len(np.delete(np.arange(len(y)), idx_test)) != len(y)-len(idx_test): raise RuntimeError("Repeated test indices") - if len(np.delete(np.arange(len(y)), idx_test+idx_train)) != len(y)-len(idx_test)-len(idx_train): + if len(np.delete(np.arange(len(y)), np.hstack((idx_test, idx_train)))) != len(y)-len(idx_test)-len(idx_train): warnings.warn('Train and test set indices overlap. Is it intended?', RuntimeWarning, stacklevel=2) return np.array(idx_train), np.array(idx_test), y[idx_train], y[idx_test] From b0db6da1deef24b07d70fcda6648857339678ecd Mon Sep 17 00:00:00 2001 From: Ksenia Date: Sat, 4 Jul 2026 17:09:22 +0200 Subject: [PATCH 3/4] Array tools: inherit numpy data types from args --- qstack/mathutils/array.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py index cf326ab..7120c94 100644 --- a/qstack/mathutils/array.py +++ b/qstack/mathutils/array.py @@ -13,17 +13,14 @@ def scatter(values, indices): for i, j in enumerate(indices): x[...,i,j] = values[...,i] ``` - Args: values (numpy.ndarray): Array of values to be scattered of shape (..., N). indices (numpy.ndarray): Array of indices indicating where to scatter the values of shape (N,). Returns: numpy.ndarray: New array with scattered values of shape (..., N, max(indices)+1). - - """ - x = np.zeros((*values.shape, max(indices)+1)) + x = np.zeros((*values.shape, max(indices)+1), dtype=np.asarray(values).dtype) x[...,np.arange(len(indices)),indices] = values return x @@ -62,7 +59,7 @@ def stack_padding(xs): max_size = max(shapes) if max_size == min(shapes): return np.stack(xs, axis=0) - X = np.zeros((len(xs), *max_size)) + X = np.zeros((len(xs), *max_size), dtype=np.result_type(*xs)) for i, x in enumerate(xs): slices = tuple(np.s_[0:s] for s in x.shape) X[i][slices] = x @@ -90,7 +87,7 @@ def vstack_padding(xs): shapes_common_axis, shapes_other_axes = np.split(np.array([x.shape for x in xs]), (1,), axis=1) if len(np.unique(shapes_other_axes, axis=0))==1: return np.vstack(xs) - X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0))) + X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0)), dtype=np.result_type(*xs)) for x, s0 in slice_generator(xs, inc=lambda x: x.shape[0]): slices = (s0, *(np.s_[0:s] for s in x.shape[1:])) X[slices] = x From d5ac511aa55f27df1d97040118aeed268f0779e2 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Sat, 4 Jul 2026 17:39:38 +0200 Subject: [PATCH 4/4] Adapt metatensor converters to ml-density Make inner functions independent from pyscf mol to avoid unnesessary conversion --- qstack/io/metatensor.py | 116 +++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 50 deletions(-) diff --git a/qstack/io/metatensor.py b/qstack/io/metatensor.py index 60fce91..c856d01 100644 --- a/qstack/io/metatensor.py +++ b/qstack/io/metatensor.py @@ -56,6 +56,20 @@ def _get_tsize(tensor): return sum(np.prod(tensor.block(key).values.shape) for key in tensor.keys) +def _get_nao(atoms, llists): + """Compute the number of orbitals in a molecule with a given basis. + + Args: + atoms (numpy.ndarray): 1D int array of atom numbers in molecule. + llists (dict): List of angular momentum quantum numbers for each shell for each element. + Output of _get_llist(). + + Returns: + int: Number of orbitals. + """ + return sum(sum(np.array(llists[q])*2+1) for q in atoms) + + def _labels_to_array(labels): """Represent a set of metatensor labels as an array. @@ -70,36 +84,32 @@ def _labels_to_array(labels): return values.view(dtype=dtype).reshape(values.shape[0]) -def _vector_to_tensormap(mol, c): +def _vector_to_tensormap(atoms, llists, c): """Transform an vector into a tensor map. The input vector is assumed to be in the GPR order. Each element of the vector corresponds to an atomic orbital of the molecule. Args: - mol (pyscf.gto.Mole): pyscf Mole object. + atoms (numpy.ndarray): 1D int array of atom numbers in molecule. + llists (dict): List of angular momentum quantum numbers for each shell for each element. + Output of _get_llist(). c (numpy.ndarray): vector to transform. Returns: metatensor.TensorMap: Tensor map representation of the vector. """ - atom_charges = numbers(mol) - tm_label_vals = [] block_prop_label_vals = {} block_samp_label_vals = {} block_comp_label_vals = {} - blocks = {} - llists = {} # Create labels for TensorMap, lables for blocks, and empty blocks - llists = _get_llist(mol) - - for q, samples_count in zip(*np.unique(atom_charges, return_counts=True), strict=True): + for q, samples_count in zip(*np.unique(atoms, return_counts=True), strict=True): llist = llists[q] - block_samp_label_vals_q = np.where(atom_charges==q)[0].reshape(-1,1) + block_samp_label_vals_q = np.where(atoms==q)[0].reshape(-1,1) for l in sorted(set(llist)): label = (l, q) tm_label_vals.append(label) @@ -120,7 +130,7 @@ def _vector_to_tensormap(mol, c): iq = dict.fromkeys(llists.keys(), 0) i = Cursor(action='slicer') - for q in atom_charges: + for q in atoms: if llists[q]==sorted(llists[q]): for l in set(llists[q]): msize = 2*l+1 @@ -144,30 +154,31 @@ def _vector_to_tensormap(mol, c): return tensor -def _tensormap_to_vector(mol, tensor): +def _tensormap_to_vector(atoms, llists, tensor): """Transform a tensor map into a vector. The output vector will be in the GPR order. Args: - mol (pyscf.gto.Mole): pyscf Mole object. + atoms (numpy.ndarray): 1D int array of atom numbers in molecule. + llists (dict): List of angular momentum quantum numbers for each shell for each element. + Output of _get_llist(). tensor (metatensor.TensorMap): tensor to transform. Returns: numpy.ndarray: 1D array (vector) representation. Raises: - RuntimeError: If tensor size does not match mol.nao. + RuntimeError: If tensor size does not match number of orbitals. """ - nao = _get_tsize(tensor) - if mol.nao != nao: - raise RuntimeError(f'Tensor size mismatch ({nao} instead of {mol.nao})') + nao = _get_nao(atoms, llists) + nao1 = _get_tsize(tensor) + if nao != nao1: + raise RuntimeError(f'Tensor size mismatch ({nao1} instead of {nao})') - c = np.zeros(mol.nao) - atom_charges = numbers(mol) - llists = _get_llist(mol) + c = np.zeros(nao) i = 0 - for iat, q in enumerate(atom_charges): + for iat, q in enumerate(atoms): llist = llists[q] il = dict.fromkeys(range(max(llist) + 1), 0) for l in llist: @@ -182,24 +193,24 @@ def _tensormap_to_vector(mol, tensor): return c -def _matrix_to_tensormap(mol, dm): +def _matrix_to_tensormap(atoms, llists, dm): """Transform a matrix into a tensor map. The input matrix is assumed to be in the GPR order. Each element of the matrix corresponds to a pair of atomic orbitals. Args: - mol (pyscf.gto.Mole): pyscf Mole object. + atoms (numpy.ndarray): 1D int array of atom numbers in molecule. + llists (dict): List of angular momentum quantum numbers for each shell for each element. + Output of _get_llist(). dm (numpy.ndarray): matrix to transform. Returns: metatensor.TensorMap: Tensor map representation of the matrix. """ - atom_charges = numbers(mol) - elements, counts = np.unique(atom_charges, return_counts=True) + elements, counts = np.unique(atoms, return_counts=True) counts = dict(zip(elements, counts, strict=True)) - element_indices = {q: np.where(atom_charges==q)[0] for q in elements} - llists = _get_llist(mol) + element_indices = {q: np.where(atoms==q)[0] for q in elements} tm_label_vals = [] block_prop_label_vals = {} @@ -244,14 +255,14 @@ def _matrix_to_tensormap(mol, dm): if all(llists[q]==sorted(llists[q]) for q in llists): iq1 = dict.fromkeys(elements, 0) i1 = Cursor(action='slicer') - for iat1, q1 in enumerate(atom_charges): + for iat1, q1 in enumerate(atoms): for l1 in set(llists[q1]): msize1 = 2*l1+1 nsize1 = llists[q1].count(l1) iq2 = dict.fromkeys(elements, 0) i1 += nsize1*msize1 i2 = Cursor(action='slicer') - for iat2, q2 in enumerate(atom_charges): + for iat2, q2 in enumerate(atoms): for l2 in set(llists[q2]): msize2 = 2*l2+1 nsize2 = llists[q2].count(l2) @@ -265,13 +276,13 @@ def _matrix_to_tensormap(mol, dm): else: iq1 = dict.fromkeys(elements, 0) i1 = Cursor(action='slicer') - for iat1, q1 in enumerate(atom_charges): + for iat1, q1 in enumerate(atoms): il1 = dict.fromkeys(range(max(llists[q1]) + 1), 0) for l1 in llists[q1]: i1 += 2*l1+1 iq2 = dict.fromkeys(elements, 0) i2 = Cursor(action='slicer') - for iat2, q2 in enumerate(atom_charges): + for iat2, q2 in enumerate(atoms): il2 = dict.fromkeys(range(max(llists[q2]) + 1), 0) for l2 in llists[q2]: dmslice = dm[i1(),i2(2*l2+1)] @@ -291,13 +302,15 @@ def _matrix_to_tensormap(mol, dm): return tensor -def _tensormap_to_matrix(mol, tensor, fast=False): +def _tensormap_to_matrix(atoms, llists, tensor, fast=False): """Transform a tensor map into a matrix. The output matrix will be in the GPR order. Args: - mol (pyscf.gto.Mole): pyscf Mole object. + atoms (numpy.ndarray): 1D int array of atom numbers in molecule. + llists (dict): List of angular momentum quantum numbers for each shell for each element. + Output of _get_llist(). tensor (metatensor.TensorMap): tensor to transform. fast (bool): Whether to use a faster approach using assumptions on the internal ordering of the TensorMap. Default is False. @@ -312,21 +325,20 @@ def _tensormap_to_matrix(mol, tensor, fast=False): numpy.ndarray: 2D array (matrix) representation. Raises: - RuntimeError: If tensor size does not match mol.nao * mol.nao. + RuntimeError: If tensor size does not match number of orbitals squared. """ + nao = _get_nao(atoms, llists) nao2 = _get_tsize(tensor) - if mol.nao*mol.nao != nao2: - raise RuntimeError(f'Tensor size mismatch ({nao2} instead of {mol.nao*mol.nao})') + if nao*nao != nao2: + raise RuntimeError(f'Tensor size mismatch ({nao2} instead of {nao*nao})') - dm = np.zeros((mol.nao, mol.nao)) - atom_charges = numbers(mol) - llists = _get_llist(mol) + dm = np.zeros((nao, nao)) if fast: nsizes = {q: np.unique(llist, return_counts=True)[1] for q, llist in llists.items()} idx = {} i = 0 - for iat, q in enumerate(atom_charges): + for iat, q in enumerate(atoms): idx[iat] = np.zeros_like(nsizes[q]) for l, n in enumerate(nsizes[q]): idx[iat][l] = i @@ -337,23 +349,23 @@ def _tensormap_to_matrix(mol, tensor, fast=False): msize2 = 2*l2+1 nsize1 = nsizes[q1][l1] nsize2 = nsizes[q2][l2] - ac1 = np.count_nonzero(atom_charges==q1) - ac2 = np.count_nonzero(atom_charges==q2) + ac1 = np.count_nonzero(atoms==q1) + ac2 = np.count_nonzero(atoms==q2) values = block.values.reshape((ac1, ac2, msize1, msize2, nsize1, nsize2)) - for iiat1, iat1 in enumerate(np.where(atom_charges==q1)[0]): - for iiat2, iat2 in enumerate(np.where(atom_charges==q2)[0]): + for iiat1, iat1 in enumerate(np.where(atoms==q1)[0]): + for iiat2, iat2 in enumerate(np.where(atoms==q2)[0]): i1 = idx[iat1][l1] i2 = idx[iat2][l2] dm[i1:i1+nsize1*msize1,i2:i2+nsize2*msize2] = values[iiat1,iiat2].transpose((2,0,3,1)).reshape((nsize1*msize1,nsize2*msize2)) else: i1 = 0 - for iat1, q1 in enumerate(atom_charges): + for iat1, q1 in enumerate(atoms): llist1 = llists[q1] il1 = np.zeros(max(llist1)+1, dtype=int) for l1 in llist1: msize1 = 2 * l1 + 1 i2 = 0 - for iat2, q2 in enumerate(atom_charges): + for iat2, q2 in enumerate(atoms): llist2 = llists[q2] il2 = np.zeros(max(llist2)+1, dtype=int) for l2 in llist2: @@ -391,10 +403,12 @@ def array_to_tensormap(mol, v, src='pyscf'): ValueError: If array dimension is not 1 or 2. """ v = reorder_ao(mol, v, dest='gpr', src=src) + atoms = numbers(mol) + llists = _get_llist(mol) if v.ndim==1: - return _vector_to_tensormap(mol, v) + return _vector_to_tensormap(atoms, llists, v) elif v.ndim==2: - return _matrix_to_tensormap(mol, v) + return _matrix_to_tensormap(atoms, llists, v) else: raise ValueError(f'Cannot convert to TensorMap an array with ndim={v.ndim}') @@ -419,10 +433,12 @@ def tensormap_to_array(mol, tensor, dest='pyscf', fast=False): Raises: RuntimeError: If tensor key names don't match expected format. """ + atoms = numbers(mol) + llists = _get_llist(mol) if tensor.keys.names==vector_label_names.tm: - v = _tensormap_to_vector(mol, tensor) + v = _tensormap_to_vector(atoms, llists, tensor) elif tensor.keys.names==matrix_label_names.tm: - v = _tensormap_to_matrix(mol, tensor, fast=fast) + v = _tensormap_to_matrix(atoms, llists, tensor, fast=fast) else: raise RuntimeError('Tensor key names mismatch. Cannot determine if it is a vector or a matrix') return reorder_ao(mol, v, src='gpr', dest=dest)