Source code for diffpy.utils.parsers.serialization

#!/usr/bin/env python
##############################################################################
#
# diffpy.utils      by DANSE Diffraction group
#                   Simon J. L. Billinge
#                   (c) 2010 The Trustees of Columbia University
#                   in the City of New York.  All rights reserved.
#
# File coded by:
#
# See AUTHORS.txt for a list of people who contributed.
# See LICENSE_DANSE.txt for license information.
#
##############################################################################

import pathlib
import json
import numpy

from .custom_exceptions import UnsupportedTypeError, ImproperSizeError
import warnings

# FIXME: add support for yaml, xml
supported_formats = ['.json']


[docs] def serialize_data(filename, hdata: dict, data_table, dt_colnames=None, show_path=True, serial_file=None): """Serialize file data into a dictionary. Can also save dictionary into a serial language file. Dictionary is formatted as {filename: data}. Requires hdata and data_table (can be generated by loadData). Parameters ---------- filename Name of the file whose data is being serialized. hdata: dict File metadata (generally related to data table). data_table: list or ndarray Data table. dt_colnames: list Names of each column in data_table. Every name in data_table_cols will be put into the Dictionary as a key with a value of that column in data_table (stored as a List). Put None for columns without names. If dt_cols has less non-None entries than columns in data_table, the pair {'data table': data_table} will be put in the dictionary. (Default None: only entry {'data table': data_table} will be added to dictionary.) show_path: bool include a path element in the database entry (default True). If 'path' is not included in hddata, extract path from filename. serial_file Serial language file to dump dictionary into. If None (defualt), no dumping will occur. Returns ------- dict: Returns the dictionary loaded from/into the updated database file. """ # compile data_table and hddata together data = {} # handle getting name of file for variety of filename types abs_path = pathlib.Path(filename).resolve() # add path to start of data if requested if show_path and 'path' not in hdata.keys(): data.update({'path': abs_path.as_posix()}) # title the entry with name of file (taken from end of path) title = abs_path.name # first add data in hddata dict data.update(hdata) # second add named columns in dt_cols # performed second to prioritize overwriting hdata entries with data_table column entries named_columns = 0 # initial value max_columns = 1 # higher than named_columns to trigger 'data table' entry if dt_colnames is not None: num_columns = [len(row) for row in data_table] max_columns = max(num_columns) num_col_names = len(dt_colnames) if max_columns < num_col_names: # assume numpy.loadtxt gives non-irregular array raise ImproperSizeError("More entries in dt_colnames than columns in data_table.") named_columns = 0 for idx in range(num_col_names): colname = dt_colnames[idx] if colname is not None: if colname in hdata.keys(): warnings.warn(f'Entry \'{colname}\' in hdata has been overwritten by a data_table entry.', RuntimeWarning) data.update({colname: list(data_table[:, idx])}) named_columns += 1 # finally add data_table as an entry named 'data table' if not all columns were parsed if named_columns < max_columns: if 'data table' in data.keys(): warnings.warn('Entry \'data table\' in hdata has been overwritten by data_table.', RuntimeWarning) data.update({'data table': data_table}) # parse name using pathlib and generate dictionary entry entry = {title: data} # no save if serial_file is None: return entry # saving/updating file # check if supported type sf = pathlib.Path(serial_file) sf_name = sf.name extension = sf.suffix if extension not in supported_formats: raise UnsupportedTypeError(sf_name, supported_formats) # new file or update existing = False try: open(serial_file) existing = True except FileNotFoundError: pass # json if extension == '.json': # cannot serialize numpy arrays class NumpyEncoder(json.JSONEncoder): def default(self, data_obj): if type(data_obj) is numpy.ndarray: return data_obj.tolist() return json.JSONEncoder.default(self, data_obj) # dump if non-existing if not existing: with open(serial_file, 'w') as jsonfile: file_data = entry # for return json.dump(file_data, jsonfile, indent=2, cls=NumpyEncoder) # update if existing else: with open(serial_file, 'r') as json_read: file_data = json.load(json_read) file_data.update(entry) with open(serial_file, 'w') as json_write: # dump to string first for formatting json.dump(file_data, json_write, indent=2, cls=NumpyEncoder) return file_data
[docs] def deserialize_data(filename, filetype=None): """Load a dictionary from a serial file. Parameters ---------- filename Serial file to load from. filetype For specifying extension type (i.e. '.json'). Returns ------- dict A dictionary read from a serial file. """ # check if supported type f = pathlib.Path(filename) f_name = f.name if filetype is None: extension = f.suffix if extension not in supported_formats: raise UnsupportedTypeError(f_name, supported_formats) else: extension = filetype return_dict = {} # json if extension == '.json': with open(filename, 'r') as json_file: j_dict = json.load(json_file) return_dict = j_dict if len(return_dict) == 0: warnings.warn(f'Loaded dictionary is empty. Possibly due to improper file type.', RuntimeWarning) return return_dict