Skip to content
Open

Fixes #148

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 66 additions & 50 deletions qstack/io/metatensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,20 @@ def _get_tsize(tensor):
return sum(np.prod(tensor.block(key).values.shape) for key in tensor.keys)


def _get_nao(atoms, llists):
"""Compute the number of orbitals in a molecule with a given basis.

Args:
atoms (numpy.ndarray): 1D int array of atom numbers in molecule.
llists (dict): List of angular momentum quantum numbers for each shell for each element.
Output of _get_llist().

Returns:
int: Number of orbitals.
"""
return sum(sum(np.array(llists[q])*2+1) for q in atoms)


def _labels_to_array(labels):
"""Represent a set of metatensor labels as an array.

Expand All @@ -70,36 +84,32 @@ def _labels_to_array(labels):
return values.view(dtype=dtype).reshape(values.shape[0])


def _vector_to_tensormap(mol, c):
def _vector_to_tensormap(atoms, llists, c):
"""Transform an vector into a tensor map.

The input vector is assumed to be in the GPR order.
Each element of the vector corresponds to an atomic orbital of the molecule.

Args:
mol (pyscf.gto.Mole): pyscf Mole object.
atoms (numpy.ndarray): 1D int array of atom numbers in molecule.
llists (dict): List of angular momentum quantum numbers for each shell for each element.
Output of _get_llist().
c (numpy.ndarray): vector to transform.

Returns:
metatensor.TensorMap: Tensor map representation of the vector.
"""
atom_charges = numbers(mol)

tm_label_vals = []
block_prop_label_vals = {}
block_samp_label_vals = {}
block_comp_label_vals = {}

blocks = {}
llists = {}

# Create labels for TensorMap, lables for blocks, and empty blocks

llists = _get_llist(mol)

for q, samples_count in zip(*np.unique(atom_charges, return_counts=True), strict=True):
for q, samples_count in zip(*np.unique(atoms, return_counts=True), strict=True):
llist = llists[q]
block_samp_label_vals_q = np.where(atom_charges==q)[0].reshape(-1,1)
block_samp_label_vals_q = np.where(atoms==q)[0].reshape(-1,1)
for l in sorted(set(llist)):
label = (l, q)
tm_label_vals.append(label)
Expand All @@ -120,7 +130,7 @@ def _vector_to_tensormap(mol, c):

iq = dict.fromkeys(llists.keys(), 0)
i = Cursor(action='slicer')
for q in atom_charges:
for q in atoms:
if llists[q]==sorted(llists[q]):
for l in set(llists[q]):
msize = 2*l+1
Expand All @@ -144,30 +154,31 @@ def _vector_to_tensormap(mol, c):
return tensor


def _tensormap_to_vector(mol, tensor):
def _tensormap_to_vector(atoms, llists, tensor):
"""Transform a tensor map into a vector.

The output vector will be in the GPR order.

Args:
mol (pyscf.gto.Mole): pyscf Mole object.
atoms (numpy.ndarray): 1D int array of atom numbers in molecule.
llists (dict): List of angular momentum quantum numbers for each shell for each element.
Output of _get_llist().
tensor (metatensor.TensorMap): tensor to transform.

Returns:
numpy.ndarray: 1D array (vector) representation.

Raises:
RuntimeError: If tensor size does not match mol.nao.
RuntimeError: If tensor size does not match number of orbitals.
"""
nao = _get_tsize(tensor)
if mol.nao != nao:
raise RuntimeError(f'Tensor size mismatch ({nao} instead of {mol.nao})')
nao = _get_nao(atoms, llists)
nao1 = _get_tsize(tensor)
if nao != nao1:
raise RuntimeError(f'Tensor size mismatch ({nao1} instead of {nao})')

c = np.zeros(mol.nao)
atom_charges = numbers(mol)
llists = _get_llist(mol)
c = np.zeros(nao)
i = 0
for iat, q in enumerate(atom_charges):
for iat, q in enumerate(atoms):
llist = llists[q]
il = dict.fromkeys(range(max(llist) + 1), 0)
for l in llist:
Expand All @@ -182,24 +193,24 @@ def _tensormap_to_vector(mol, tensor):
return c


def _matrix_to_tensormap(mol, dm):
def _matrix_to_tensormap(atoms, llists, dm):
"""Transform a matrix into a tensor map.

The input matrix is assumed to be in the GPR order.
Each element of the matrix corresponds to a pair of atomic orbitals.

Args:
mol (pyscf.gto.Mole): pyscf Mole object.
atoms (numpy.ndarray): 1D int array of atom numbers in molecule.
llists (dict): List of angular momentum quantum numbers for each shell for each element.
Output of _get_llist().
dm (numpy.ndarray): matrix to transform.

Returns:
metatensor.TensorMap: Tensor map representation of the matrix.
"""
atom_charges = numbers(mol)
elements, counts = np.unique(atom_charges, return_counts=True)
elements, counts = np.unique(atoms, return_counts=True)
counts = dict(zip(elements, counts, strict=True))
element_indices = {q: np.where(atom_charges==q)[0] for q in elements}
llists = _get_llist(mol)
element_indices = {q: np.where(atoms==q)[0] for q in elements}

tm_label_vals = []
block_prop_label_vals = {}
Expand Down Expand Up @@ -244,14 +255,14 @@ def _matrix_to_tensormap(mol, dm):
if all(llists[q]==sorted(llists[q]) for q in llists):
iq1 = dict.fromkeys(elements, 0)
i1 = Cursor(action='slicer')
for iat1, q1 in enumerate(atom_charges):
for iat1, q1 in enumerate(atoms):
for l1 in set(llists[q1]):
msize1 = 2*l1+1
nsize1 = llists[q1].count(l1)
iq2 = dict.fromkeys(elements, 0)
i1 += nsize1*msize1
i2 = Cursor(action='slicer')
for iat2, q2 in enumerate(atom_charges):
for iat2, q2 in enumerate(atoms):
for l2 in set(llists[q2]):
msize2 = 2*l2+1
nsize2 = llists[q2].count(l2)
Expand All @@ -265,13 +276,13 @@ def _matrix_to_tensormap(mol, dm):
else:
iq1 = dict.fromkeys(elements, 0)
i1 = Cursor(action='slicer')
for iat1, q1 in enumerate(atom_charges):
for iat1, q1 in enumerate(atoms):
il1 = dict.fromkeys(range(max(llists[q1]) + 1), 0)
for l1 in llists[q1]:
i1 += 2*l1+1
iq2 = dict.fromkeys(elements, 0)
i2 = Cursor(action='slicer')
for iat2, q2 in enumerate(atom_charges):
for iat2, q2 in enumerate(atoms):
il2 = dict.fromkeys(range(max(llists[q2]) + 1), 0)
for l2 in llists[q2]:
dmslice = dm[i1(),i2(2*l2+1)]
Expand All @@ -291,13 +302,15 @@ def _matrix_to_tensormap(mol, dm):
return tensor


def _tensormap_to_matrix(mol, tensor, fast=False):
def _tensormap_to_matrix(atoms, llists, tensor, fast=False):
"""Transform a tensor map into a matrix.

The output matrix will be in the GPR order.

Args:
mol (pyscf.gto.Mole): pyscf Mole object.
atoms (numpy.ndarray): 1D int array of atom numbers in molecule.
llists (dict): List of angular momentum quantum numbers for each shell for each element.
Output of _get_llist().
tensor (metatensor.TensorMap): tensor to transform.
fast (bool): Whether to use a faster approach using assumptions
on the internal ordering of the TensorMap. Default is False.
Expand All @@ -312,21 +325,20 @@ def _tensormap_to_matrix(mol, tensor, fast=False):
numpy.ndarray: 2D array (matrix) representation.

Raises:
RuntimeError: If tensor size does not match mol.nao * mol.nao.
RuntimeError: If tensor size does not match number of orbitals squared.
"""
nao = _get_nao(atoms, llists)
nao2 = _get_tsize(tensor)
if mol.nao*mol.nao != nao2:
raise RuntimeError(f'Tensor size mismatch ({nao2} instead of {mol.nao*mol.nao})')
if nao*nao != nao2:
raise RuntimeError(f'Tensor size mismatch ({nao2} instead of {nao*nao})')

dm = np.zeros((mol.nao, mol.nao))
atom_charges = numbers(mol)
llists = _get_llist(mol)
dm = np.zeros((nao, nao))

if fast:
nsizes = {q: np.unique(llist, return_counts=True)[1] for q, llist in llists.items()}
idx = {}
i = 0
for iat, q in enumerate(atom_charges):
for iat, q in enumerate(atoms):
idx[iat] = np.zeros_like(nsizes[q])
for l, n in enumerate(nsizes[q]):
idx[iat][l] = i
Expand All @@ -337,23 +349,23 @@ def _tensormap_to_matrix(mol, tensor, fast=False):
msize2 = 2*l2+1
nsize1 = nsizes[q1][l1]
nsize2 = nsizes[q2][l2]
ac1 = np.count_nonzero(atom_charges==q1)
ac2 = np.count_nonzero(atom_charges==q2)
ac1 = np.count_nonzero(atoms==q1)
ac2 = np.count_nonzero(atoms==q2)
values = block.values.reshape((ac1, ac2, msize1, msize2, nsize1, nsize2))
for iiat1, iat1 in enumerate(np.where(atom_charges==q1)[0]):
for iiat2, iat2 in enumerate(np.where(atom_charges==q2)[0]):
for iiat1, iat1 in enumerate(np.where(atoms==q1)[0]):
for iiat2, iat2 in enumerate(np.where(atoms==q2)[0]):
i1 = idx[iat1][l1]
i2 = idx[iat2][l2]
dm[i1:i1+nsize1*msize1,i2:i2+nsize2*msize2] = values[iiat1,iiat2].transpose((2,0,3,1)).reshape((nsize1*msize1,nsize2*msize2))
else:
i1 = 0
for iat1, q1 in enumerate(atom_charges):
for iat1, q1 in enumerate(atoms):
llist1 = llists[q1]
il1 = np.zeros(max(llist1)+1, dtype=int)
for l1 in llist1:
msize1 = 2 * l1 + 1
i2 = 0
for iat2, q2 in enumerate(atom_charges):
for iat2, q2 in enumerate(atoms):
llist2 = llists[q2]
il2 = np.zeros(max(llist2)+1, dtype=int)
for l2 in llist2:
Expand Down Expand Up @@ -391,10 +403,12 @@ def array_to_tensormap(mol, v, src='pyscf'):
ValueError: If array dimension is not 1 or 2.
"""
v = reorder_ao(mol, v, dest='gpr', src=src)
atoms = numbers(mol)
llists = _get_llist(mol)
if v.ndim==1:
return _vector_to_tensormap(mol, v)
return _vector_to_tensormap(atoms, llists, v)
elif v.ndim==2:
return _matrix_to_tensormap(mol, v)
return _matrix_to_tensormap(atoms, llists, v)
else:
raise ValueError(f'Cannot convert to TensorMap an array with ndim={v.ndim}')

Expand All @@ -419,10 +433,12 @@ def tensormap_to_array(mol, tensor, dest='pyscf', fast=False):
Raises:
RuntimeError: If tensor key names don't match expected format.
"""
atoms = numbers(mol)
llists = _get_llist(mol)
if tensor.keys.names==vector_label_names.tm:
v = _tensormap_to_vector(mol, tensor)
v = _tensormap_to_vector(atoms, llists, tensor)
elif tensor.keys.names==matrix_label_names.tm:
v = _tensormap_to_matrix(mol, tensor, fast=fast)
v = _tensormap_to_matrix(atoms, llists, tensor, fast=fast)
else:
raise RuntimeError('Tensor key names mismatch. Cannot determine if it is a vector or a matrix')
return reorder_ao(mol, v, src='gpr', dest=dest)
Expand Down
9 changes: 3 additions & 6 deletions qstack/mathutils/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,14 @@ def scatter(values, indices):
for i, j in enumerate(indices):
x[...,i,j] = values[...,i]
```

Args:
values (numpy.ndarray): Array of values to be scattered of shape (..., N).
indices (numpy.ndarray): Array of indices indicating where to scatter the values of shape (N,).

Returns:
numpy.ndarray: New array with scattered values of shape (..., N, max(indices)+1).


"""
x = np.zeros((*values.shape, max(indices)+1))
x = np.zeros((*values.shape, max(indices)+1), dtype=np.asarray(values).dtype)
x[...,np.arange(len(indices)),indices] = values
return x

Expand Down Expand Up @@ -62,7 +59,7 @@ def stack_padding(xs):
max_size = max(shapes)
if max_size == min(shapes):
return np.stack(xs, axis=0)
X = np.zeros((len(xs), *max_size))
X = np.zeros((len(xs), *max_size), dtype=np.result_type(*xs))
for i, x in enumerate(xs):
slices = tuple(np.s_[0:s] for s in x.shape)
X[i][slices] = x
Expand Down Expand Up @@ -90,7 +87,7 @@ def vstack_padding(xs):
shapes_common_axis, shapes_other_axes = np.split(np.array([x.shape for x in xs]), (1,), axis=1)
if len(np.unique(shapes_other_axes, axis=0))==1:
return np.vstack(xs)
X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0)))
X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0)), dtype=np.result_type(*xs))
for x, s0 in slice_generator(xs, inc=lambda x: x.shape[0]):
slices = (s0, *(np.s_[0:s] for s in x.shape[1:]))
X[slices] = x
Expand Down
2 changes: 1 addition & 1 deletion qstack/regression/kernel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def train_test_split_idx(y, idx_test=None, idx_train=None,
raise RuntimeError("Repeated train indices")
if len(np.delete(np.arange(len(y)), idx_test)) != len(y)-len(idx_test):
raise RuntimeError("Repeated test indices")
if len(np.delete(np.arange(len(y)), idx_test+idx_train)) != len(y)-len(idx_test)-len(idx_train):
if len(np.delete(np.arange(len(y)), np.hstack((idx_test, idx_train)))) != len(y)-len(idx_test)-len(idx_train):
warnings.warn('Train and test set indices overlap. Is it intended?', RuntimeWarning, stacklevel=2)
return np.array(idx_train), np.array(idx_test), y[idx_train], y[idx_test]

Expand Down
2 changes: 2 additions & 0 deletions qstack/regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta,
alpha = scipy.linalg.solve(K_solve, y_solve, assume_a='pos')
y_kf_predict = np.dot(Ks, alpha)
maes.append(np.mean(np.abs(y_test-y_kf_predict)))
if size_train==len(all_indices_train):
break
maes_all.append((size_train, np.mean(maes), np.std(maes)))
return maes_all if not save_pred else (maes_all, (y_test, y_kf_predict))

Expand Down
Loading