diffpy.srrietveld.convert.listfilereader

1 ############################################################################## 2 # 3 # diffpy.srrietveld by DANSE Diffraction group 4 # Simon J. L. Billinge 5 # (c) 2011 Trustees of the Columbia University 6 # in the City of New York. All rights reserved. 7 # 8 # File coded by: Peng Tian 9 # 10 # See AUTHORS.txt for a list of people who contributed. 11 # See LICENSE.txt for license information. 12 # 13 ############################################################################## 14 15 """Load list of datafiles for sequential refinement and associated environment 16 variables such as temperature or pressure. 17 """ 18 19 import os 20 import re 21 from diffpy.srrietveld.exceptions import SrrFileError 22 23 # module version 24 __id__ = "$Id: listfilereader.py 6515 2011-04-13 14:52:02Z juhas $" 25 26 # these aliases map alternate names of column labels to the standard name 27 _LABELALIASES = { 28 'run' : 'datafile', 29 } 30

31 -def readListFile(filename, fmt=None, basepath=None):

32 '''Load an return ListFileReader instance from the specified file. 33 34 filename -- path to the list file to be loaded. Uses CSV reader 35 for ".csv" extensions or plain text reader otherwise. 36 fmt -- optional filename format. Use "csv" for CSV reader 37 or "txt" for plain text reader. When None, determine 38 format from the filename extension. 39 basepath -- when specified, make loaded datapaths relative to the 40 basepath directory. Use '.' for the current directory. 41 Do not modify datapaths when None. 42 43 Return a ListFileReader instance. 44 Raise ValueError for invalid fmt value. 45 Raise SrrFileError for non-existent or corrupted file. 46 ''' 47 if fmt is None: 48 readerclass = (os.path.splitext(filename)[1].lower() == '.csv' 49 and CSVListFileReader or ListFileReader) 50 elif fmt == 'csv': 51 readerclass = CSVListFileReader 52 elif fmt == 'txt': 53 readerclass = ListFileReader 54 else: 55 emsg = 'Invalid fmt value, supported formats are "csv" and "txt".' 56 raise ValueError(fmt) 57 rv = readerclass(filename=filename) 58 if basepath is not None: 59 fdir = os.path.dirname(filename) 60 makerelpath = (lambda f: 61 os.path.relpath(os.path.join(fdir, f), basepath)) 62 for lb in rv.banklabels: 63 rv.column[lb] = map(makerelpath, rv.column[lb]) 64 return rv

65 66

67 -def resolveColumnLabel(label):

68 '''Convert column label to a standard name. 69 This applies aliases as defined in the _LABELALIASES and maps 70 bank002 to bank2. Otherwise the label remains unchanged. 71 72 label -- string label to be converted to standard name. 73 74 Return string. 75 ''' 76 lbare1 = label.strip() 77 if lbare1.lower() in _LABELALIASES: 78 return resolveColumnLabel(_LABELALIASES[lbare1.lower()]) 79 # convert DataFilE to datafile 80 if lbare1.lower() == 'datafile': 81 lbare1 = lbare1.lower() 82 # convert bank01 to bank1 83 rv = re.sub(r'(?i)^(bank)(\d+)$', 84 lambda mx: 'bank%i' % int(mx.group(2)), lbare1) 85 return rv

86 87 # ---------------------------------------------------------------------------- 88

89 -class ListFileReader(object):

90 '''Object for loading datafiles and temperatures from a list file. 91 Bank columns are loaded as list of strings, all other columns are 92 converted to floats. 93 94 Class variables: 95 96 labelaliases -- dictionary of alternative column labels. 97 98 Instance variables: 99 100 columnlabels -- list of column labels in the loaded list file 101 banklabels -- list of bank-column labels sorted by bank index 102 column -- dictionary that maps column label to associated 103 list of values. 104 ''' 105

106 - def __init__(self, filename=None, fp=None):

107 '''Initializa ListFileReader 108 109 filename -- optional path to a list file to be loaded 110 fp -- file type object to be read 111 112 Only one of filename, fp arguments can be specified. 113 ''' 114 # check arguments 115 if None not in (filename, fp): 116 emsg = "Specify either filename or fp, not both." 117 raise ValueError(emsg) 118 self.columnlabels = [] 119 self.banklabels = [] 120 self.column = {} 121 if filename is not None: 122 try: 123 with open(filename, 'rb') as fp1: 124 self.readFile(fp1) 125 except IOError, e: 126 raise SrrFileError(e) 127 if fp is not None: 128 self.readFile(fp) 129 return

130 131

132 - def readFile(self, fp):

133 '''Read listfile from a file-type object fp. 134 135 No return value. 136 Raise SrrFileError if file loading failed. 137 ''' 138 # new variables 139 columnlabels = [] 140 column = {} 141 lineiter = iter(enumerate(fp)) 142 # load header line 143 for nr, line in lineiter: 144 nr += 1 145 w = line.split() 146 # skip blank and comment lines 147 if not w or w[0].startswith('#'): continue 148 columnlabels = map(resolveColumnLabel, w) 149 for lb in columnlabels: column[lb] = [] 150 self._validate(columnlabels, column) 151 break 152 # load data body 153 ncols = len(columnlabels) 154 for nr, line in lineiter: 155 nr += 1 156 w = line.split() 157 # skip blank and comment lines 158 if not w or w[0].startswith('#'): continue 159 if len(w) != ncols: 160 emsg = "%i: expected %i columns, found %i." % ( 161 nr, ncols, len(w)) 162 raise SrrFileError(emsg) 163 for lb, wi in zip(columnlabels, w): 164 column[lb].append(wi) 165 self._validate(columnlabels, column) 166 return

167 168

169 - def _validate(self, columnlabels, column):

170 '''Check consistency of the loaded data, convert non-bank 171 columns to floats and if everything is OK, update the instance 172 attributes. 173 174 Raise SrrFileError when something is not OK. 175 ''' 176 # check for duplicate columns 177 lbidx = {} 178 for idx, lb in enumerate(columnlabels): 179 if lb in lbidx: 180 emsg = "Duplicate labels in columns %i, %i" % ( 181 lbidx[lb] + 1, idx + 1) 182 raise SrrFileError(emsg) 183 lbidx[lb] = idx 184 # sort bank labels by their index 185 banklabels = filter(re.compile(r'(?:bank\d+|datafile)$').match, 186 columnlabels) 187 banklabels.sort(key=lambda s: int(filter(str.isdigit, s) or 0)) 188 # require at least one bankN label 189 if columnlabels and not banklabels: 190 emsg = "Missing column label 'datafile' or 'bankN'." 191 raise SrrFileError(emsg) 192 # convert non-bank columns to floats 193 for lb, col in column.iteritems(): 194 # skip the bank columns 195 if lb in banklabels: continue 196 try: 197 col[:] = map(float, col) 198 except ValueError, e: 199 emsg = "Error converting %i-th column to floats: %s" % ( 200 columnlabels.index(lb) + 1, e) 201 raise SrrFileError(emsg) 202 # everything OK here, assign the new variables 203 self.columnlabels = columnlabels 204 self.banklabels = banklabels 205 self.column = column

206 207 # End of class ListFileReader 208 209 # ---------------------------------------------------------------------------- 210

211 -class CSVListFileReader(ListFileReader):

212 '''ListFileReader specialized for comma-separated-values (CSV) format. 213 ''' 214

215 - def readFile(self, fp):

216 '''Read CSV-formatted listfile from a file-type object fp. 217 218 No return value. 219 Raise SrrFileError if file loading failed. 220 ''' 221 import csv 222 reader = csv.reader(fp) 223 try: 224 rows = [row for row in reader] 225 except csv.Error, e: 226 emsg = "%i: %s" % (reader.line_num, e) 227 raise SrrFileError(emsg) 228 headrow = rows and rows.pop(0) or [] 229 columnlabels = map(resolveColumnLabel, headrow) 230 column = dict(zip(columnlabels, map(list, zip(*rows)))) 231 self._validate(columnlabels, column) 232 return

233 234 # End of class CSVListFileReader 235

Source Code for Module diffpy.srrietveld.convert.listfilereader