Source code for ship.datastructures.rowdatacollection

"""

 Summary:
    Contains the RowDataCollection object. This is an object used to hold
    all of the data for a specific row of a unit in an ISIS dat file.
    It acts a collection pattern to make accessing and updating the contents
    of a row simpler.

 Author:  
     Duncan Runnacles

 Created:  
     01 Apr 2016

 Copyright: 
     Duncan Runnacles 2016

 TODO:

 Updates:

"""
from __future__ import unicode_literals

import copy

import logging
logger = logging.getLogger(__name__)

from ship.datastructures.dataobject import *
"""logging references with a __name__ set to this module."""


[docs]class RowDataCollection(object): """Composite/Facade for the ADataRowObject classes. AUnit classes should instantiate this class in order to manage all the ADataRowObject classes used to hold the units row data. There are several convenience methods to retrieve and safely update the contents. Note: There are many references to a 'key' variable in this class to decipher which object in the collection to access/update/etc. This is one of the ROW_DATA_TYPES enum values in the datunits package. See Also: ROW_DATA_TYPE - in :class:'datunits <ship.fmp.datunits>' module. TODO: Need to find a way to safely add values to the collection while making sure that all the data objects stay in sync (i.e. have the same number of rows), otherwise it will be chaos. At the moment this is tricky because we need to add individual values at the start. Possibly stop client from using any get methods etc unless all the data objects have the same number of rows. For the time being there is a convenience method checkRowsInSync() that can be called to verify that all of the data objects in this collection have the same length. """ def __init__(self, **kwargs): """Create a reference to the collection list.""" self._collection = [] self._min_collection = 0 self._current_collection = 0 self._updateCallback = kwargs.get('update_callback', None) self.has_dummy = False @classmethod
[docs] def bulkInitCollection(cls, dataobjects, **kwargs): rc = cls(**kwargs) for d in dataobjects: rc._collection.append(d) rc._max = len(rc._collection) return rc
@property def row_count(self): return self.numberOfRows() # def initCollection(self, dataobject):
[docs] def addToCollection(self, dataobject, index=None): """Setup a new data object and add it to the collection. Args: obj_type (str): The type of ADataRowObject to create. This should be a DataTypes enum from the ADataObject module. vars (list): The variables specific to the ADataRowObject that is going to be instantiated. See Also: ADataRowObject (and subclasses), DataTypes - all in ADataObject module. """ if index is None: self._collection.append(dataobject) else: try: self._collection.insert(index, dataobject) except IndexError: raise('Index %s does not exist in collection' % index) self._max = len(self._collection)
[docs] def indexOfDataObject(self, key): """Get the index of the DataObject with data_type equal to key. """ for i, c in enumerate(self._collection): if c.data_type == key: return i
[docs] def iterateRows(self, key=None): """Returns a generator for iterating through the rows in the collection. If no key is given it will return a list containing all of the values in the row. Args: key=None(int): ROW_DATA_TYPE to return. If None all values in the row will be returned as a list. Return: list if key == None, a single value otherwise. """ if key is None: for i in range(0, self.row_count): yield [o.getValue(i) for o in self._collection] else: index = self.indexOfDataObject(key) for i in range(0, self.row_count): yield self._collection[index].getValue(i)
[docs] def rowAsDict(self, index): """Get the data vals in a particular row by index. Args: index(int): the index of the row to return. Return: dict - containing the values for the requested row. """ output = {} for obj in self._collection: output[obj.data_type] = obj.getValue(index) return output
[docs] def rowAsList(self, index): """Get the data vals in a particular row by index. Args: index(int): the index of the row to return. Return: dict - containing the values for the requested row. """ output = [] for obj in self._collection: output.append(obj.getValue(index)) return output
[docs] def dataObject(self, name_key): """Return the ADataRowObject instance requested. Args: name_key (str): The key to use to retrieve the object (e.g. 'chainage'). This is usually a class declared constant e.g. RiverUnit.CHAINAGE. Returns: ADataRowObject or False if the key doesn't match any in the collection. Note: Returns a shallow copy of the collection. Any changes to the values will remain within the main list. If you want to be able to change it without affecting the main copy use getDataObjectCopy(). """ for obj in self._collection: if obj.data_type == name_key: return obj else: raise KeyError ('name_key %s was not found in collection' % (name_key))
[docs] def dataObjectAsList(self, key): """Returns a DataObject as a list. This will return the row_collection DataObject referenced by the key provided (as a ROW_DATA_TYPES) in list form. If you intend to update the values you should use getRowDataObject instead as the data provided will be mutable and therefore reflected in the values held by the row_collection. If you just want a quick way to loop through the values in one of the data objects and only intend to read the data then use this. Args: key(str): the key for the data object requested. It is best to use the class constants (i.e. RiverUnit.CHAINAGE) for this. Returns: List containing the data in the DataObject that the key points to. Returns false if there is no row collection. Raises: KeyError: If key does not exist. """ try: data_col = self.dataObject(key) if data_col == False: raise KeyError ('Key %s does not exist in collection' % (key)) vals = [] for i in data_col: vals.append(i) return vals except KeyError: raise
[docs] def toList(self): """Returns the row data a list. Collects the row data in each of the ADataObjects in this collection into a list. Then adds them to a list based on the order of this collection. I.e. each inner list is the data pertaining to a single ADataObject. Example: [ [0.0, 1.5, 3.0], [32.5, 31.0, 31.5], [0.03, 0.03, 0.03] ] Returns: List - containing lists of the data in the DataObjects in this collection. Raises: KeyError: If key does not exist. """ outlist = [] for c in self._collection: innerlist = [] for i in c: innerlist.append(i) outlist.append(innerlist) return outlist
[docs] def toDict(self): """Returns the row data object as a dict. Provides a dict where keys are the datunits.ROW_DATA_TYPES and the values are lists of the values for that type in sequence. If you intend to update the values you should use getRowDataObject instead as the data provided will be mutable and therefore reflected in the values held by the collection. If you just want to read the data then use this. Returns: dict - containing lists of values by ROW_DATA_TYPE. """ vals = {} for c in self._collection: inner = [] for i in c: inner.append(i) vals[c.data_type] = inner return vals
[docs] def dataValue(self, key, index): """Get the value in a DataObject at index. Args: key(int): ROW_DATA_TYPES for the DataObject. index(int): the row to return the value from. Return: The value in the DataObject at given index. Raises: KeyError - if key does not exist in collection. IndexError - if index does not exist in DataObject. """ for c in self._collection: if c.data_type == key: val = c.getValue(index) return val else: raise KeyError('DataObject %s does not exist in collection' % key)
def _addValue(self, key, value=None): """Add a new value to the data object in the collection as referenced by the key provided. Note: You almost certainly don't want to be using this. It's used internally to add values to ADataObject's. If you need to add data use the addRow() method. Args: key (int): Name of the data object to add the given value to. value: Optional - The value to add to the collection. If no value is supplied a default will be used. Raises: KeyError: If the name key doesn't exist in the collection. TODO: Check what other errors are thrown by the data object and make sure that they are dealt with/passed on from here. """ # Find the collection by the key and add the value to it. for c in self._collection: if c.data_type == key: c.addValue(value) break else: raise KeyError ('Key %s does not exist in collection' % (key)) # Do this after so it's not removed when something goes wrong if self.has_dummy: self.deleteRow(0) self.has_dummy = False def _setValue(self, key, value, index): """Set the value to the data object in the collection. Note: You almost certainly don't want to be using this. It's used internally to set alues to ADataObject's. If you need to add data use the updateRow() method. It will check consitency across the collection. Args: key (int): the type data object to add the given value to. value: The value to add to the collection. index (int): the index to set the value at. Raises: KeyError: If the name key doesn't exist in the collection. ValueError: If the value is not appropriate for the data type """ # Find the collection by the key and add the value to it. for c in self._collection: if c.data_type == key: try: c.setValue(value, index) break except ValueError: raise else: raise KeyError ('Key %s does not exist in collection' % (key))
[docs] def getPrintableRow(self, index): """ Get the row data in printable form. Retrieves all of the values in this RowDataObjectCollection in the order that it exists in the list. Args: index (int): the row collection index to access. Returns: string formatted for printing to .DAT file. """ out_str = '' for i, obj in enumerate(self._collection): out_str += obj.getPrintableValue(index) return out_str
[docs] def updateRow(self, row_vals, index, **kwargs): """Add a new row to the units data rows. Creates a new row from the values in the supplied value dict at the location given by the index. If the index is None then the value will be appended to the end of the row rather than inserted. **kwargs: 'no_copy'(bool): if True the deepcopy of the object will not be made. This is useful if you are loading a lot of data and don't want the overhead of deepcopy - like loading a new model. Default is False. Note: If there is any problem while updating the values in the row all datarow objects will be returned to the state they were in before the operation. This ensures that they don't get out of sync if an error is found halfway through adding the different values. This is done by creating a deep copy of the object prior to updating. Args: row_vals (dict): Contains the names of the data objects of collection as keys and the new row values as values. index (int): The index at which to insert the row. Raises: KeyError: If any of the keys don't exist. IndexError: If the index doesn't exist. """ no_copy = kwargs.get('no_copy', False) if index > self.row_count: raise IndexError dataobj_keys = self.collectionTypes() vkeys = row_vals.keys() for k in vkeys: if not k in dataobj_keys: raise KeyError('ROW_DATA_TYPE ' + str(k) + 'is not in collection') temp_list = None try: # Need to make a deep copy of the data_object so we can reset them back # to the same place if there's a problem. That way we don't get the lists # in the different objects out of sync. if not no_copy: temp_list = self._deepCopyDataObjects(self._collection) for key, val in row_vals.items(): self._setValue(key, val, index) except (IndexError, ValueError, Exception) as err: self._resetDataObject(temp_list) raise err finally: if temp_list is not None: for o in temp_list: del o del temp_list
[docs] def addRow(self, row_vals, index=None, **kwargs): """Add a new row to the units data rows. Creates a new row from the values in the supplied value dict at the location given by the index. If the index is None then the value will be appended to the end of the row rather than inserted. Note: If there is any problem while adding the new row all datarow objects will be returned to the state they were in before the operation. This ensures that they don't get out of sync if an error is found halfway through adding the different values. This is done by creating a deep copy of the object prior to updating. **kwargs: 'no_copy'(bool): if True the deepcopy of the object will not be made. This is useful if you are loading a lot of data and don't want the overhead of deepcopy - like loading a new model. Default is False. Args: row_vals (dict): Contains the names of the data objects of collection as keys and the new row values as values. index (int): The index at which to insert the row. If None it will be appended to end of the collection. Raises: KeyError: If any of the keys don't exist. IndexError: If the index doesn't exist. """ no_copy = kwargs.get('no_copy', False) if index is not None and index > self.row_count: raise IndexError dataobj_keys = self.collectionTypes() vkeys = row_vals.keys() for k in vkeys: if not k in dataobj_keys: raise KeyError('ROW_DATA_TYPE ' + str(k) + 'is not in collection') temp_list = None try: # Need to make a deep copy of the data_object so we can reset them back # to the same place if there's a problem. That way we don't get the lists # in the different objects out of sync. if not no_copy: temp_list = self._deepCopyDataObjects(self._collection) for obj in self._collection: if not obj.data_type in vkeys: if obj.default is not None: obj.addValue(obj.default, index) else: raise ValueError else: obj.addValue(row_vals[obj.data_type], index) if not self.checkRowsInSync(): raise RuntimeError except (IndexError, ValueError, Exception): self._resetDataObject(temp_list) raise except RuntimeError as err: logger.error('Collection not in sync!') logger.exception(err) self._resetDataObject(temp_list) logger.error('Collection reset to previous state') raise finally: if temp_list is not None: for o in temp_list: del o del temp_list # Do this after so it's not removed if something goes wrong if self.has_dummy: self.deleteRow(0, no_copy=True) self.has_dummy = False
[docs] def deleteRow(self, index, **kwargs): """Delete a row from the collection. **kwargs: 'no_copy'(bool): if True the deepcopy of the object will not be made. This is useful if you are loading a lot of data and don't want the overhead of deepcopy - like deleting the dummy row. Default is False. Args: index(int): the index to delete the values for. Raise: IndexError: if index is out of the bounds of the collection. """ no_copy = kwargs.get('no_copy', False) if index < 0 or index > self.row_count: raise IndexError temp_list = None try: # Need to make a deep copy of the data_object so we can reset them back # to the same place if there's a problem. That way we don't get the lists # in the different objects out of sync. if not no_copy: temp_list = self._deepCopyDataObjects(self._collection) for obj in self._collection: obj.deleteValue(index) except (IndexError, ValueError, Exception): self._resetDataObject(temp_list) raise finally: if temp_list is not None: for o in temp_list: del o del temp_list
[docs] def collectionTypes(self): """Get a list of the types (names) of all the objects in the collection. The list returned will contain all of the names used in this row collection. e.g. 'chainage', 'elevation' etc. Returns: keys (list): containing the names of the data objects. """ keys = [] for obj in self._collection: keys.append(obj.data_type) return keys
[docs] def dataObjectCopy(self, name_key): """Return the ADataRowObject instance requested. Same as the getDataObject() method except it makes a deep copy of the data object before returning it so that any changes will local to the returned copy only and not to the main reference. Args: name_key (str): The key to use to retrieve the object (e.g. 'chainage'). This is usually a class declared constant e.g. RiverUnit.CHAINAGE. Returns: ADataRowObject or False if the key doesn't match any in the collection. """ for obj in self._collection: if obj.data_type == name_key: obj_copy = self._deepCopyDataObjects(obj) return obj_copy else: raise KeyError ('name_key %s was not found in collection' % (name_key))
[docs] def deleteDataObject(self, name_key): """Delete the ADataRowObject instance requested. Args: name_key (str): The key to use to retrieve the object (i.e. 'chainage') Returns: True if the object was successfully deleted; False if not. """ for obj in self._collection: if obj.data_type == name_key: self._collection.remove(obj) self._max = len(self._collection) return True else: return False
[docs] def setDummyRow(self, row_vals): """Sets a special 'dummy row' as a placeholder until actual values. Sometimes it can be useful to have placeholder values in a collection. This is particularly true for FMP units that will cause errors in FMP if there is no data in the rows. This method will add the dummy row data and set the self.has_dummy flag to True. When actual row data is added to the collection it will check the flag and delete the row if it's True. """ self.addRow(row_vals, no_copy=True) if self.has_dummy: self.deleteRow(0, no_copy=True) self.has_dummy = True
[docs] def numberOfRows(self): """Return the number of rows held in the collection Returns: int - number of rows in this collection. """ if not self.checkRowsInSync(): raise RuntimeError('RowCollection objects are not in sync') return len(self._collection[0])
[docs] def checkRowsInSync(self): """Checks that the data objects in the collection are in sync. All the rows should be the same length. If they aren't then there's a problem and it will corrupt any output .dat file. Warning: It isn't actually that hard to corrupt the collection at the moment. It's ok if the DataObject classes are only accessed through here. If they are accessed independently of this class and not carefully checked they could fall out of sync. Returns: True if all data collections have the same length, otherwise False. """ lengths = [] for obj in self._collection: lengths.append(obj.record_length) return lengths[1:] == lengths[:-1]
def _resetDataObject(self, temp_list): """Reset the data_objects list to its previous state. This method is called when there is a problem with updating the data_objects list. It returns the self owned versions to their original state. Args: temp_list: The versions to return the objects to. """ self._collection = temp_list for o in temp_list: del o del temp_list def _deepCopyDataObjects(self, obj): """Create a deep copy of the data_objects """ object_copy = copy.deepcopy(obj) return object_copy