Source code for ship.tuflow.datafiles.datafileloader

"""

 Summary:
     Deals with loading all data from ADataObject type files.
     
     This process can get quite messy so it seems sensible to have a separate
     factory to deal with it.
     
     See Also:
         ADataObject, TmfDataObject, TmfCsvDataObject, DcDataObject

 Author:  
     Duncan Runnacles

 Created:  
     01 Apr 2016

 Copyright:  
     Duncan Runnacles 2016

 TODO:
     Need to add a subfile loader for the materials csv file loader.
     
     This module needs some cleaning up still. There is quite a bit of repeated
     code around that could be pulled out into module level functions and used
     by all of the loader.

 Updates:


"""

from __future__ import unicode_literals

import csv
import os

# ship modules
from ship.datastructures.rowdatacollection import RowDataCollection
from ship.datastructures import dataobject as do
from ship.tuflow.tuflowfilepart import DataFile, GisFile, TuflowFile
from ship.tuflow.datafiles import datafileobject as dataobj
from ship.utils import filetools
from ship.utils import utilfunctions as uuf
from ship.utils.dbfread import DBF

import logging
logger = logging.getLogger(__name__)


    
[docs]def loadDataFile(datafile, args_dict={}):
    """Factory function for creating DataFileObject type objects.
    
    Loads the contents of the DataFileObject based on the composition of the
    given object and returns the newly created DataFileObject of that type.
    
    The args_dict is a dict of key-value pairs where the key represents a 
    placeholder used within one of the data files and the value reprents the
    variables that should be used to replace that value. This is common in
    bc_dbase file for instance where '__event__' may be used as a place holder
    with BC Event Text and BC Event Name used in the control files to define
    what should be used. Other options are the use of scenario and event 
    definitions. Within these BC Event Source can be defined to associate 
    certain placeholders with values e.g.::

        Define Event == 8hr
            BC Event Source == ~DUR~ | 8hr
            BC Database == ..\bc_dbase\my_bcdbase.csv
        End Define
    
    These are stored in the tuflow model when loaded and can be passed when 
    loading data files to use.
    
    Args:
        datafile(TuflowFile): FilePart to create the DataFileObject from.
        args_dict={}(dict): This is a dictionary of keywords and associated 
            values that can be used to identify and replace placeholders in the
            source file names or column names within data files. NOTE CURRENTLY
            NOT USED. 
        
    Return:
        DataFileObject: of type identified from the composition of the given
            TuflowFile object.
    
    Note:
        The args_dict is CURRENTLY NOT USED, but will be supported soon.
            
    See Also:
        TuflowFile
        DataFileObject
    """
    if not isinstance(datafile, TuflowFile):
        raise AttributeError('datafile is not an instance of TuflowFile')
    
    command = datafile.command.upper()
    if command == 'READ MI TABLE LINKS':
        if not isinstance(datafile, GisFile):
            raise AttributeError ('datafile is not an instance of GisFile')
        
        row_data, comments = readXsFile(datafile)
        xs = dataobj.XsDataObject(row_data, datafile, comments, args_dict)
        return xs
    
    # Anything else must be a DataFile instance
    if not isinstance(datafile, DataFile):
        raise AttributeError ('datafile is not an instance of DataFile')
        
    # List containing checks for the file command and them any different file
    # types that are dealt with under that command.
    if command == 'READ MATERIALS FILE':
        if datafile.extension.lower() == 'tmf':
            row_data, comments = readTmfFile(datafile)
            tmf = dataobj.TmfDataObject(row_data, datafile, comments, args_dict)
            return tmf

        if datafile.extension.lower() == 'csv':
            row_data, comments, subfile_details = readMatCsvFile(datafile, args_dict)
            mat = dataobj.MatCsvDataObject(row_data, datafile, comments)
            
            # Load any subfiles
            for path, header_list in subfile_details.iteritems():
                mat.addSubfile(readMatSubfile(datafile, path, header_list, args_dict))
            
            return mat

    elif command == 'BC DATABASE':
        row_data, comments = readBcFile(datafile, args_dict)
        bc = dataobj.BcDataObject(row_data, datafile, comments, args_dict)
        return bc 
    
    else:
        logger.error('Command type (%s) with extension (%s) is not currently supported' 
                                    % (datafile.command, datafile.extension))
        raise ValueError ('Command type (%s) with extension (%s) is not currently supported' 
                                    % (datafile.command, datafile.extension))


[docs]def readXsFile(datafile):
    """Loads the contents of the estry 1d_xs file reference by datafile.
    """
    value_separator = ','
    comment_types = []
    xs_enum = dataobj.XsEnum()
    
    
    def loadShapeFile(file_path, row_collection):
        """Loads cross section data from Shapefile .dbf format.
        
        Uses the dbfreader library.
        """
        try:
            table = DBF(file_path, load=True)
        except IOError:
            logger.error('Unable to load file at: ' + file_path)
            raise IOError ('Unable to load file at: ' + file_path)

        for i, t in enumerate(table.records):
            count = 0
            for entry in t.values():
                row_collection._addValue(count, entry)
                count += 1
            
            # Need to catch the fact that skew does not exist in some versions.
            if count < len(xs_enum.ITERABLE):
                logger.info('1d_xs does not have skew column - adding default value')
                for k in range(count, len(xs_enum.ITERABLE)):
                    row_collection._addValue(k)

#             print t['Source'] + ' : ' + t['Type'] + ' : ' + t['Column_1']
            row_collection._addValue('row_no', i)
        
        return row_collection
    
    
    def loadMapinfoFile(file_path, row_collection):
        """Load cross section data from Mapinfo .mid format.
        """
        try:
            with open(file_path, 'rb') as csv_file:
                csv_file = csv.reader(csv_file)
                for i, row in enumerate(csv_file):

                    for j, entry in enumerate(row):
                        row_collection._addValue(j, entry)
                    
                    # Need to catch the fact that skew does not exist in some versions.
                    if len(row) < len(xs_enum.ITERABLE):
                        logger.info('1d_xs does not have skew column - adding deafult value')
                        for k in range(len(row), len(xs_enum.ITERABLE)):
                            row_collection._addValue(k)

    #                 print row[0] + ' : ' + row[1] + ' : ' + row[3]
                    row_collection._addValue('row_no', i)

        except IOError:
            logger.error('Unable to load file at: ' + file_path)
            raise IOError ('Unable to load file at: ' + file_path)
        
        return row_collection
    
    
    def setupRowCollection():
        """Setup the RowDataCollection for loading the data into.
        """
        # First entry doesn't want to have a comma in front when formatting.
        row_collection = RowDataCollection()
        types = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]
        
        # Do the first entry separately because it has a different format string
        row_collection.addToCollection(do.StringData(0, format_str='{0}', default=''))
        for i, t in enumerate(types, 1):
            if t == 0:
                row_collection.addToCollection(do.StringData(i, format_str=', {0}', default=''))
            else:
                row_collection.addToCollection(do.FloatData(i, format_str=', {0}', no_of_dps=3, default=0.00))

        # Add a couple of extra rows to the row_collection for tracking the
        # data in the file. 
        row_collection.addToCollection(do.IntData('row_no'))
        
        return row_collection

    '''
        Main Section
    '''
    row_collection = setupRowCollection()
    ext = datafile.extension.lower()
    file_path = datafile.absolutePath()
    
    # If we're loading a MapInfo mid/mif file
    if ext == 'mif' or ext == 'mid':
        
        if ext == 'mif':
            dir, name = os.path.split(file_path)
            name, ext = os.path.splitext(name)
            file_path = os.path.join(dir, name + '.mid')

        row_collection = loadMapinfoFile(file_path, row_collection)
    
    # If we're loading a Shapefile
    elif ext == 'shp' or ext == 'dbf': 

        if ext == 'shp':
            dir, name = os.path.split(file_path)
            name, ext = os.path.splitext(name)
            file_path = os.path.join(dir, name + '.dbf')

        row_collection = loadShapeFile(file_path, row_collection)
    
    else:
        logger.warning('Invalid file extension for XS type in file: ' + datafile.filenameAndExtension())
        raise ValueError ('Invalid file extension for XS type in file: ' + datafile.filenameAndExtension())
    
    # Always return an empty comments list because there will never be any.
    return row_collection, []
    
 

[docs]def readBcFile(datafile, args_dict={}):
    """Loads the contents of the BC Database file refernced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_seperator = ','
    comment_types = ['#', '!']
    bc_enum = dataobj.BcEnum()
    bc_event_data = args_dict
    
    def _checkHeaders(row, required_headers):
        """Checks that any required headers can be found.
        
        Reviews the headers in the header row of the csv file to ensure that
        any specifically needed named column headers exist.
        
        Args:
            row(list): columns headers.
            required_headers(list): column names that must be included.
        
        Return:
            list if some headers not found of False otherwise.
        """
        # Check what we have in the header row
        head_check = True
        for r in required_headers:
            if not r in row:
                head_check = False
        if not head_check:
            logger.warning('Required header (' + r + ') not' + 
                'found in file: ' + path)
        return head_check


    def _loadHeadData(row, row_collection, required_headers):
        """Loads the column header data.
        
        Adds the file defined names for the headers to the rowdatacollection.
        
        Args:
            row(list): containing the row data.
            row_collection(rowdatacollection): for updating.
            required_headers(list): column names that must exist.
        
        Return:
            rowdatacollection: updated with header row details.
        """
        row_length = len(row)
        head_check = _checkHeaders(row, required_headers)
        for i, v in enumerate(bc_enum.ITERABLE):
            if i < row_length:
                row_collection._addValue('actual_header', row[i]) 
        
        return row_collection


    def _loadRowData(row, row_count, row_collection): 
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])
        
        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in bc_enum.ITERABLE:
            if i < len(row):
                row_collection._addValue(i, row[i])
        
        return row_collection

    
    # Initialise the RowDataOjectCollection object with currect setup
    row_collection = RowDataCollection()
    for i, val in enumerate(bc_enum.ITERABLE):
        if i == 0:
            row_collection.addToCollection(do.StringData(i, format_str='{0}', default=''))
        else:
            row_collection.addToCollection(do.StringData(i, format_str=', {0}', default=''))
    
    row_collection.addToCollection(do.StringData('actual_header', format_str=', {0}', default=''), index=0)
    row_collection.addToCollection(do.IntData('row_no', format_str=None, default=''))
        
    path = datafile.absolutePath()
    required_headers = ['Name', 'Source']
    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rU') as csv_file:
            csv_file = csv.reader(csv_file)
                    

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            row_count = 0
            # Loop through the contents list loaded from file line-by-line.
            for i, line in enumerate(csv_file, 0):
                
                comment = hasCommentOnlyLine(''.join(line), comment_types)
                if comment or comment == '':
                    comment_lines.append(comment)

                # If we have a line that isn't a comment or a blank then it is going
                # to contain materials entries.
                else:
                    # First non-comment is the headers
                    if first_data_line == False:
                        first_data_line = True
                        row_collection = _loadHeadData(line, row_collection, required_headers)
                    else:
                        row_collection = _loadRowData(line, i, row_collection)
                        row_collection._addValue('row_no', row_count)
                        row_count += 1                        
                    
                    comment_lines.append(None)
    
    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError ('Cannot load file at: ' + path)
 
    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(bc_enum.ITERABLE)):
        row_collection.dataObject(i).has_changed = False
    
    return row_collection, comment_lines


[docs]def readMatCsvFile(datafile, args_dict={}):
    """Loads the contents of the Materials CSV file referenced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_seperator = ','
    comment_types = ['#', '!']
    csv_enum = dataobj.MatCsvEnum()
    subfile_details= {}

    def _loadHeadData(row, row_collection):
        """
        """
        new_row = [None] * 12

        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])
            
        new_row[0] = row[0]
        new_row[1] = row[1]
        new_row[9] = row[2]
        new_row[11] = row[3]

        row_length = len(new_row)
        for i, v in enumerate(new_row):
            if i < row_length:
                row_collection._addValue('actual_header', new_row[i]) 
        
        return row_collection

    
    def _disectEntry(col_no, entry, new_row):
        """Breaks the row values into the appropriate object values.
        
        The materials file can have Excel style sub-values. i.e. it can have
        seperate columns defined within a bigger one. This function will break
        those values down into a format usable by the values initiated in the
        rowdatacollection.
        
        Args:
            col_no(int): the current column number.
            entry(string): the value of the current column.
            new_row(list): the row values to update.
            
        Return:
            list containing the updated row values.
        
        Note:
            This isn't very nice. Need to clean it up and find a better, safer
            way of dealing with breaking the row data up. It may be excess work
            but perhaps creating an xml converter could work quite will and
            make dealing with the file a bit easier?
        """
        made_change = False

        # Put in ID and Hazard as normal
        if col_no == 0:
            new_row[0] = entry
        elif col_no == 11:
            new_row[11] = entry
        # Possible break up Manning's entry further
        elif col_no == 1:
            # See if there's more than one value in the Manning's category.
            splitval = entry.split(',')
             
            # If there is and it's numeric then it's a single value for 'n'
            if len(splitval) == 1:
                if uuf.isNumeric(splitval[0]):
                    new_row[1] = splitval[0]
             
                # Otherwise it's a filename. These can be further separated 
                # into two column headers to read from the sub files.
                else:
                    strsplit = splitval[0].split('|')
                    if len(strsplit) == 1:
                        subfile_details[strsplit[0].strip()] = []
                        new_row[6] = strsplit[0].strip()
                    elif len(strsplit) == 2:
                        subfile_details[strsplit[0]] = [strsplit[1].strip()]
                        new_row[6] = strsplit[0].strip()
                        new_row[7] = strsplit[1].strip()
                    else:
                        subfile_details[strsplit[0]] = [strsplit[1].strip(), strsplit[2].strip()]
                        new_row[6] = strsplit[0].strip()
                        new_row[7] = strsplit[1].strip()
                        new_row[8] = strsplit[2].strip()
                          
            # If there's more than one value then it must be the Manning's
            # depth curve values (N1, Y1, N2, Y2).
            else:
                new_row[2] = splitval[0]
                new_row[3] = splitval[1]
                new_row[4] = splitval[2]
                new_row[5] = splitval[3]

        # Finally grab the infiltration parameters (IL, CL)
        elif col_no == 2:
            splitval = entry.split(',')
            new_row[9] = splitval[0]
            new_row[10] = splitval[1]
        
        
        return new_row


    def _loadRowData(row, row_count, row_collection): 
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])
        new_row = [None] * 12
        
        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in csv_enum.ITERABLE:
            if i < len(row):
                new_row = _disectEntry(i, row[i], new_row)
        
        for val, item in enumerate(new_row):
            row_collection._addValue(val, item)
        

    # First entry doesn't want to have a comma in front when formatting.
    row_collection = RowDataCollection()
    types = [1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0]
    
    # Do the first entry separately because it has a different format string
    row_collection.addToCollection(do.StringData(0, format_str='{0}', default=''))
    for i, t in enumerate(types, 1):
        if t == 0:
            row_collection.addToCollection(do.StringData(i, format_str=', {0}', default=''))
        else:
            row_collection.addToCollection(do.FloatData(i, format_str=', {0}', default='', no_of_dps=3))

    # Add a couple of extra rows to the row_collection for tracking the
    # data in the file. 
    row_collection.addToCollection(do.StringData('comment', format_str='{0}', default=''))
    row_collection.addToCollection(do.StringData('actual_header', format_str='{0}', default=''))
    row_collection.addToCollection(do.IntData('row_no', format_str=None, default=''))

    path = datafile.absolutePath()
    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rb') as csv_file:
            csv_file = csv.reader(csv_file)
                    

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            line_count = 0
            
            try:
                # Loop through the contents list loaded from file line-by-line.
                for i, line in enumerate(csv_file, 0):
                    
                    comment = hasCommentOnlyLine(''.join(line), comment_types)
                    if comment or comment == '':
                        comment_lines.append(comment)

                    # If we have a line that isn't a comment or a blank then it is going
                    # to contain materials entries.
                    else:
                        # First non-comment is the headers
                        if first_data_line == False:
                            first_data_line = True
                            _loadHeadData(line, row_collection)
                        else:
                            _loadRowData(line, i, row_collection)
                        
                        row_collection._addValue('row_no', line_count)
                        line_count += 1
                        comment_lines.append(None)
            except IndexError:
                logger.error('This file is not setup/formatted correctly for a Materials.CSV file:\n' + path)
                raise IndexError ('File is not correctly formatted for a Materials.csv file')
            except AttributeError:
                logger.error('This file is not setup/formatted correctly for a Materials.CSV file:\n' + path)
                raise AttributeError ('File is not correctly formatted for a Materials.csv file')
    
    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError ('Cannot load file at: ' + path)

    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(csv_enum.ITERABLE)):
        row_collection.getDataObject(i).has_changed = False
    
    return row_collection, comment_lines, subfile_details


[docs]def readMatSubfile(main_datafile, filename, header_list, args_dict): 
    """
    """
    value_separator = ','
    comment_types = ['#', '!']
    mat_subfile_enum = dataobj.SubfileMatEnum()
    path = os.path.join(main_datafile.root, filename)
    root = main_datafile.root
    
    header1 = 'None'
    header2 = 'None'
    if len(header_list) > 0:
        header1 = header_list[0]
        if len(header_list) > 1:
            header2 = header_list[1]


    def _scanfile(filepath):
        """Scans the file before we do any loading to identify the contents.
        Need to do this because the file can be setup in so many way that it
        becomes a headache to work it out in advance. Better to take a little
        bit of extra processing time and do some quick checks first.
         
        Arguments:
            file_path (str): the path to the subfile.
        
        Return:
            tuple:
                 list: booleans with whether the column contains
                       data that we want or not.
                 int:  length of the cols list.
                 list: containing all of the first row column data
                 int:  first row with usable data on.
        """ 
        logger.debug('Scanning Materials file - %s' 
                                        % (filepath))
             
        with open(filepath, 'rb') as csv_file:
             
            csv_file = csv.reader(csv_file)
             
            cols = []
            head_list = []
            start_row = -1
            for i, row in enumerate(csv_file, 0): 
                if "".join(row).strip() == "":
                    break
 
                for j, col in enumerate(row, 0):
                    if i == 0:
                        cols.append(False)
                        head_list = row
                    elif uuf.isNumeric(col):
                        cols[j] = True
                        if start_row == -1:
                            start_row = i
                    elif cols[j] == True:
                        break
         
        return cols, len(cols), head_list, start_row
    
    
    def _loadHeadData(row, row_collection, col_length):
        """
        """
        new_row = [None] * 12

        comment_indices, length = uuf.findSubstringInList('!', row)  
        comment_lines.append(None)

        head1_location = -1
        head2_location = -1
        row_length = len(row)
        for i in range(0, col_length):
            if i < row_length:
                entry = row[i].strip()
                if entry == header1:
                    head1_location = i
                if entry == header2:
                    head2_location = i
                row_collection._addValue('actual_header', entry) 
        
        return row_collection, head1_location, head2_location
    

    def _loadRowData(row, row_count, row_collection, comment_lines, col_length, start_row): 
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        # Any lines that aren't headers, but are above the first row to contain
        # actual data will be stored as comment lines
        if row_count < start_row:
            comment_lines.append(row)
            return row_collection, comment_lines
        else:
            comment_lines.append(None)
            
        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])
        
        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in range(col_length):
            if i < len(row):
                row_collection._addValue(i, row[i])
        
        return row_collection, comment_lines
    
    
    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rb') as csv_file:
            csv_file = csv.reader(csv_file)

            # Do a quick check of the file setup
            cols, col_length, head_list, start_row = _scanfile(path)
            
            # First entry doesn't want to have a comma in front when formatting.
            # but all of the others do.
            row_collection = RowDataCollection()
            row_collection.addToCollection(do.FloatData(0, format_str=' {0}', default='', no_of_dps=6))
            for i in range(1, len(cols)):
                if cols[i] == True:
                    row_collection.addToCollection(do.FloatData(i, format_str=', {0}', default='', no_of_dps=6))
                else:
                    row_collection.addToCollection(do.StringData(i, format_str=', {0}', default=''))
                    
            row_collection.addToCollection(do.StringData('actual_header', format_str='{0}', default=''), index=0)
            row_collection.addToCollection(do.IntData('row_no', format_str=None, default=''))

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            # Loop through the contents list loaded from file line-by-line.
            for i, line in enumerate(csv_file, 0):
                
                comment = hasCommentOnlyLine(''.join(line), comment_types)
                if comment or comment == '':
                    comment_lines.append([comment, i])

                # If we have a line that isn't a comment or a blank then it is going
                # to contain materials entries.
                else:                    
                    # First non-comment is the headers
                    if first_data_line == False:
                        first_data_line = True
                        row_collection, head1_loc, head2_loc = _loadHeadData(line, row_collection, col_length)
                    else:
                        row_collection, comment_lines = _loadRowData(line, i, row_collection, comment_lines, col_length, start_row)
                    
                    row_collection._addValue('row_no', i)
    
    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError ('Cannot load file at: ' + path)
    
    path_holder = filetools.PathHolder(path, root)
    mat_sub = dataobj.DataFileSubfileMat(path_holder, row_collection, comment_lines, 
                                         path_holder.filename, head1_loc,
                                         head2_loc)
    return mat_sub


[docs]def readTmfFile(datafile):
    """Loads the contents of the Materials CSV file referenced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_separator = ','
    comment_types = ['#', '!']
    tmf_enum = dataobj.TmfEnum()

    path = datafile.absolutePath()
    value_order = range(11)
    
    row_collection = RowDataCollection()
    row_collection.addToCollection(do.IntData(0, format_str=None, default=''))
    for i in range(1, 11):
        row_collection.addToCollection(do.FloatData(i, format_str=', {0}', default='', no_of_dps=3))

    # Keep track of any comment lines and the row numbers as well
    row_collection.addToCollection(do.StringData('comment', format_str=' ! {0}', default=''))
    row_collection.addToCollection(do.IntData('row_no', format_str=None, default=''))
    
    contents = []
    logger.info('Loading data file contents from disc - %s' % (path))
    contents = _loadFileFromDisc(path)
                    
    # Stores the comments found in the file
    comment_lines = []
    
    # Loop through the contents list loaded from file line-by-line.
    first_data_line = False
    row_count = 0
    for i, line in enumerate(contents, 0):
        
        comment = hasCommentOnlyLine(line, comment_types)
        if comment or comment == '':
            comment_lines.append(comment)

        # If we have a line that isn't a comment or a blank then it is going
        # to contain materials entries.
        else:
            comment_lines.append(None)
            row_collection = _loadRowData(line, row_count, row_collection, tmf_enum.ITERABLE, 
                                            comment_types, value_separator)
            row_count += 1
    
    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(value_order)):
        row_collection.getDataObject(value_order[i]).has_changed = False
    
    return row_collection, comment_lines



def _loadRowData(line, row_number, row_collection, val_range, comment_types,
                                                            value_separator):
    """Loads the data in a specific row of the file.
    
    Args:
        line(string): row as read from file.
        row_number(int): the current row number.
        row_collection(rowdatacollection): object to update.
        val_range(list): Range of values to find in row.
        comment_types(list): characters used for commenting file.
        value_seperator(string): the character used to seperate entries.

    Return:
        rowdatacollection: updated with header row details.
    """
    # If there's a comment put it in the dict
    # Otherwise just set a default value
    line, comment = _extractInlineComment(line, comment_types)
     
    # Then sort out the other values on the line split by separator value
    split_vals = line.split(value_separator)
    split_length = len(split_vals)
    for i, v in enumerate(val_range):
        
        # Use the value_order list to know what order the values are in.
        # If we have gone beyond the number of values split we can just put
        # default values in all the other collection data types
        if i < split_length and not split_vals[i].strip() == '':
            row_collection._addValue(i, split_vals[i].strip())
        else:
            row_collection._addValue(i)

    if not comment is None:
        row_collection._addValue('comment', comment)
    row_collection._addValue('row_no', row_number)
        
    return row_collection


def _loadFileFromDisc(path):
    """Load the file at the given path.
    
    Args:
        path(string): the absolute path to the file to load.
    
    Return:
        list containing the contents of the loaded file by line.
    
    Raises:
        IOError: if the file could not be loaded for some reason.
   """ 
    contents = []
    try:
        logger.info('Loading file contents from disc')
        contents = filetools.getFile(path)
                    
    except IOError:
        logger.warning('ADataFileComponent cannot load file - IOError')
        raise 
    
    return contents 


[docs]def hasCommentOnlyLine(line, comment_types):
    """Find if line contains only comments
    
    Args:
        line(string): line to check for comment only status.
        comment_types(list): the possible comment characters to check for.
    
    Return:
        String containing line if True or False if not.
    """
    first_char = line.lstrip()[:1]
    if first_char in comment_types or line.strip() == '':
        comment = line.strip()
        return comment
    return False


def _extractInlineComment(line, comment_types):
    """Find if there's a comment on the line and extract it if there is.
    
    Args:
        line(string): the file line to be checked.
        comment_types(list): containing possible comment characters.
    
    Return:
        tuple containing the line without the comment parts and the
            rest of the data values on the line (comment, rest-of-line)
   """ 
    comment = None
    for c in comment_types:
        if c in line:
            split_comment = line.split(c)
            line = split_comment[0]
            comment = split_comment[1]
    return line, comment