1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15  """Load list of datafiles for sequential refinement and associated environment 
 16  variables such as temperature or pressure. 
 17  """ 
 18   
 19  import os 
 20  import re 
 21  from diffpy.srrietveld.exceptions import SrrFileError 
 22   
 23   
 24  __id__ = "$Id: listfilereader.py 6515 2011-04-13 14:52:02Z juhas $" 
 25   
 26   
 27  _LABELALIASES = { 
 28          'run' : 'datafile', 
 29  } 
 30   
 32      '''Load an return ListFileReader instance from the specified file. 
 33   
 34      filename -- path to the list file to be loaded.  Uses CSV reader 
 35                  for ".csv" extensions or plain text reader otherwise. 
 36      fmt      -- optional filename format.  Use "csv" for CSV reader 
 37                  or "txt" for plain text reader.  When None, determine 
 38                  format from the filename extension. 
 39      basepath -- when specified, make loaded datapaths relative to the 
 40                  basepath directory.  Use '.' for the current directory. 
 41                  Do not modify datapaths when None. 
 42   
 43      Return a ListFileReader instance. 
 44      Raise ValueError for invalid fmt value. 
 45      Raise SrrFileError for non-existent or corrupted file. 
 46      ''' 
 47      if fmt is None: 
 48          readerclass = (os.path.splitext(filename)[1].lower() == '.csv' 
 49                  and CSVListFileReader or ListFileReader) 
 50      elif fmt == 'csv': 
 51          readerclass = CSVListFileReader 
 52      elif fmt == 'txt': 
 53          readerclass = ListFileReader 
 54      else: 
 55          emsg = 'Invalid fmt value, supported formats are "csv" and "txt".' 
 56          raise ValueError(fmt) 
 57      rv = readerclass(filename=filename) 
 58      if basepath is not None: 
 59          fdir = os.path.dirname(filename) 
 60          makerelpath = (lambda f: 
 61                  os.path.relpath(os.path.join(fdir, f), basepath)) 
 62          for lb in rv.banklabels: 
 63              rv.column[lb] = map(makerelpath, rv.column[lb]) 
 64      return rv 
  65   
 66   
 68      '''Convert column label to a standard name. 
 69      This applies aliases as defined in the _LABELALIASES and maps 
 70      bank002 to bank2.  Otherwise the label remains unchanged. 
 71   
 72      label -- string label to be converted to standard name. 
 73   
 74      Return string. 
 75      ''' 
 76      lbare1 = label.strip() 
 77      if lbare1.lower() in _LABELALIASES: 
 78          return resolveColumnLabel(_LABELALIASES[lbare1.lower()]) 
 79       
 80      if lbare1.lower() == 'datafile': 
 81          lbare1 = lbare1.lower() 
 82       
 83      rv = re.sub(r'(?i)^(bank)(\d+)$', 
 84              lambda mx: 'bank%i' % int(mx.group(2)), lbare1) 
 85      return rv 
  86   
 87   
 88   
 90      '''Object for loading datafiles and temperatures from a list file. 
 91      Bank columns are loaded as list of strings, all other columns are 
 92      converted to floats. 
 93   
 94      Class variables: 
 95   
 96      labelaliases -- dictionary of alternative column labels. 
 97   
 98      Instance variables: 
 99   
100      columnlabels -- list of column labels in the loaded list file 
101      banklabels   -- list of bank-column labels sorted by bank index 
102      column       -- dictionary that maps column label to associated 
103                      list of values. 
104      ''' 
105   
106 -    def __init__(self, filename=None, fp=None): 
 107          '''Initializa ListFileReader 
108   
109          filename -- optional path to a list file to be loaded 
110          fp       -- file type object to be read 
111   
112          Only one of filename, fp arguments can be specified. 
113          ''' 
114           
115          if None not in (filename, fp): 
116              emsg = "Specify either filename or fp, not both." 
117              raise ValueError(emsg) 
118          self.columnlabels = [] 
119          self.banklabels = [] 
120          self.column = {} 
121          if filename is not None: 
122              try: 
123                  with open(filename, 'rb') as fp1: 
124                      self.readFile(fp1) 
125              except IOError, e: 
126                  raise SrrFileError(e) 
127          if fp is not None: 
128              self.readFile(fp) 
129          return 
 130   
131   
133          '''Read listfile from a file-type object fp. 
134   
135          No return value. 
136          Raise SrrFileError if file loading failed. 
137          ''' 
138           
139          columnlabels = [] 
140          column = {} 
141          lineiter = iter(enumerate(fp)) 
142           
143          for nr, line in lineiter: 
144              nr += 1 
145              w = line.split() 
146               
147              if not w or w[0].startswith('#'):   continue 
148              columnlabels = map(resolveColumnLabel, w) 
149              for lb in columnlabels:  column[lb] = [] 
150              self._validate(columnlabels, column) 
151              break 
152           
153          ncols = len(columnlabels) 
154          for nr, line in lineiter: 
155              nr += 1 
156              w = line.split() 
157               
158              if not w or w[0].startswith('#'):   continue 
159              if len(w) != ncols: 
160                  emsg = "%i: expected %i columns, found %i." % ( 
161                          nr, ncols, len(w)) 
162                  raise SrrFileError(emsg) 
163              for lb, wi in zip(columnlabels, w): 
164                  column[lb].append(wi) 
165          self._validate(columnlabels, column) 
166          return 
 167   
168   
170          '''Check consistency of the loaded data, convert non-bank 
171          columns to floats and if everything is OK, update the instance 
172          attributes. 
173   
174          Raise SrrFileError when something is not OK. 
175          ''' 
176           
177          lbidx = {} 
178          for idx, lb in enumerate(columnlabels): 
179              if lb in lbidx: 
180                  emsg = "Duplicate labels in columns %i, %i" % ( 
181                          lbidx[lb] + 1, idx + 1) 
182                  raise SrrFileError(emsg) 
183              lbidx[lb] = idx 
184           
185          banklabels = filter(re.compile(r'(?:bank\d+|datafile)$').match, 
186                  columnlabels) 
187          banklabels.sort(key=lambda s: int(filter(str.isdigit, s) or 0)) 
188           
189          if columnlabels and not banklabels: 
190              emsg = "Missing column label 'datafile' or 'bankN'." 
191              raise SrrFileError(emsg) 
192           
193          for lb, col in column.iteritems(): 
194               
195              if lb in banklabels:  continue 
196              try: 
197                  col[:] = map(float, col) 
198              except ValueError, e: 
199                  emsg = "Error converting %i-th column to floats: %s" % ( 
200                          columnlabels.index(lb) + 1, e) 
201                  raise SrrFileError(emsg) 
202           
203          self.columnlabels = columnlabels 
204          self.banklabels = banklabels 
205          self.column = column 
  206   
207   
208   
209   
210   
212      '''ListFileReader specialized for comma-separated-values (CSV) format. 
213      ''' 
214   
216          '''Read CSV-formatted listfile from a file-type object fp. 
217   
218          No return value. 
219          Raise SrrFileError if file loading failed. 
220          ''' 
221          import csv 
222          reader = csv.reader(fp) 
223          try: 
224              rows = [row for row in reader] 
225          except csv.Error, e: 
226              emsg = "%i: %s" % (reader.line_num, e) 
227              raise SrrFileError(emsg) 
228          headrow = rows and rows.pop(0) or [] 
229          columnlabels = map(resolveColumnLabel, headrow) 
230          column = dict(zip(columnlabels, map(list, zip(*rows)))) 
231          self._validate(columnlabels, column) 
232          return 
  233   
234   
235