Changeset - bbb76c500fa5
[Not reviewed]
0 1 0
Lance Edgar (lance) - 4 years ago 2020-04-15 19:12:27
lance@edbob.org
Add support for extra header rows, for Excel reader

data doesn't always start right after main header row
1 file changed with 15 insertions and 2 deletions:
0 comments (0 inline, 0 general)
rattail/excel.py
Show inline comments
 
@@ -44,13 +44,22 @@ class ExcelReader(object):
 
    Basic class for reading Excel files.
 
    """
 

	
 
    def __init__(self, path, sheet=0, sheet_name=None, header=0, datefmt='%Y-%m-%d', strip_fieldnames=True):
 
    def __init__(self, path, sheet=0, sheet_name=None, header=0,
 
                 first_data_row=None,
 
                 datefmt='%Y-%m-%d', strip_fieldnames=True):
 
        """
 
        Constructor; opens an Excel file for reading.
 

	
 
        :param header: Which row should be used as the header, i.e. to
 
           determine field (column) names.  This is a zero-based index, so is 0
 
           by default (i.e. the first row).
 

	
 
        :param first_data_row: Which is the first row to contain data.  If not
 
           specified, it will be assumed that data rows begin immediately after
 
           the header row, as defined by :param:`header`.  This again is
 
           zero-based, so if the very first row is the true header, but then
 
           there is another "header" row also, you might specify a value of
 
           ``2`` here, since the 3rd row is the first to contain data.
 
        """
 
        self.book = xlrd.open_workbook(path)
 
        if sheet_name is not None:
 
@@ -58,6 +67,10 @@ class ExcelReader(object):
 
        else:
 
            self.sheet = self.book.sheet_by_index(sheet)
 
        self.header = header
 
        if first_data_row is not None:
 
            self.first_data_row = first_data_row
 
        else:
 
            self.first_data_row = self.header + 1
 
        self.fields = self.sheet.row_values(self.header)
 
        if strip_fieldnames:
 
            self.fields = [field.strip() for field in self.fields]
 
@@ -75,7 +88,7 @@ class ExcelReader(object):
 
                         for j, value in enumerate(values)])
 
            rows.append(data)
 

	
 
        progress_loop(append, range(self.header + 1, self.sheet.nrows), progress,
 
        progress_loop(append, range(self.first_data_row, self.sheet.nrows), progress,
 
                      message="Reading data from Excel file")
 
        return rows
 

	
0 comments (0 inline, 0 general)