From bbb76c500fa54410ef67c346a1fdc4ac06d42a00 2020-04-15 19:12:27 From: Lance Edgar Date: 2020-04-15 19:12:27 Subject: [PATCH] Add support for extra header rows, for Excel reader data doesn't always start right after main header row --- diff --git a/rattail/excel.py b/rattail/excel.py index c1a299df67121512e205ac774d911c9039883e05..6a21e418a295ddee5ea0564d4a558da222abb9fb 100644 --- a/rattail/excel.py +++ b/rattail/excel.py @@ -44,13 +44,22 @@ class ExcelReader(object): Basic class for reading Excel files. """ - def __init__(self, path, sheet=0, sheet_name=None, header=0, datefmt='%Y-%m-%d', strip_fieldnames=True): + def __init__(self, path, sheet=0, sheet_name=None, header=0, + first_data_row=None, + datefmt='%Y-%m-%d', strip_fieldnames=True): """ Constructor; opens an Excel file for reading. :param header: Which row should be used as the header, i.e. to determine field (column) names. This is a zero-based index, so is 0 by default (i.e. the first row). + + :param first_data_row: Which is the first row to contain data. If not + specified, it will be assumed that data rows begin immediately after + the header row, as defined by :param:`header`. This again is + zero-based, so if the very first row is the true header, but then + there is another "header" row also, you might specify a value of + ``2`` here, since the 3rd row is the first to contain data. """ self.book = xlrd.open_workbook(path) if sheet_name is not None: @@ -58,6 +67,10 @@ class ExcelReader(object): else: self.sheet = self.book.sheet_by_index(sheet) self.header = header + if first_data_row is not None: + self.first_data_row = first_data_row + else: + self.first_data_row = self.header + 1 self.fields = self.sheet.row_values(self.header) if strip_fieldnames: self.fields = [field.strip() for field in self.fields] @@ -75,7 +88,7 @@ class ExcelReader(object): for j, value in enumerate(values)]) rows.append(data) - progress_loop(append, range(self.header + 1, self.sheet.nrows), progress, + progress_loop(append, range(self.first_data_row, self.sheet.nrows), progress, message="Reading data from Excel file") return rows