from pathlib import Path
import numpy as np
import scipy.sparse
from diffpy.utils.parsers.loaddata import loadData
[docs]
def initialize_variables(data_input, number_of_components, data_type, sparsity=1, smoothness=1e18):
"""Determines the variables and initial values used in the SNMF algorithm.
Parameters
----------
data_input: 2d array like
The observed or simulated PDF or XRD data provided by the user. Has dimensions R x N where R is the signa
length and N is the number of PDF/XRD signals.
number_of_components: int
The number of component signals the user would like to decompose 'data_input' into.
data_type: str
The type of data the user has passed into the program. Can assume the value of 'PDF' or 'XRD.'
sparsity: float, optional
The regularization parameter that behaves as the coefficient of a "sparseness" regularization term that
enhances the ability to decompose signals in the case of sparse data e.g. X-ray Diffraction data.
A non-zero value indicates sparsity in the data; greater magnitudes indicate greater amounts of sparsity.
smoothness: float, optional
The regularization parameter that behaves as the coefficient of a "smoothness" term that ensures that
component signal weightings change smoothly with time. Assumes a default value of 1e18.
Returns
-------
dictionary
The collection of the names and values of the constants used in the algorithm. Contains the number of
observed PDF/XRD patterns, the length of each pattern, the type of the data, the number of components
the user would like to decompose the data into, an initial guess for the component matrix, and initial
guess for the weight factor matrix, an initial guess for the stretching factor matrix, a parameter
controlling smoothness of the solution, a parameter controlling sparseness of the solution, the matrix
representing the smoothness term, and a matrix used to construct a hessian matrix.
"""
signal_length = data_input.shape[0]
number_of_signals = data_input.shape[1]
diagonals = [
np.ones(number_of_signals - 2),
-2 * np.ones(number_of_signals - 2),
np.ones(number_of_signals - 2),
]
smoothness_term = 0.25 * scipy.sparse.diags(
diagonals, [0, 1, 2], shape=(number_of_signals - 2, number_of_signals)
)
hessian_helper_matrix = scipy.sparse.block_diag([smoothness_term.T @ smoothness_term] * number_of_components)
sequence = (
np.arange(number_of_signals * number_of_components)
.reshape(number_of_components, number_of_signals)
.T.flatten()
)
hessian_helper_matrix = hessian_helper_matrix[sequence, :][:, sequence]
return {
"signal_length": signal_length,
"number_of_signals": number_of_signals,
"number_of_components": number_of_components,
"data_type": data_type,
"smoothness": smoothness,
"sparsity": sparsity,
"smoothness_term": smoothness_term,
"hessian_helper_matrix": hessian_helper_matrix,
}