rattail-project/rattail Changeset

Changeset - bbb76c500fa5

Parent rev.

Child rev.

[Not reviewed]

0 1 0

Lance Edgar (lance) - 4 years ago 2020-04-15 19:12:27
lance@edbob.org

Add support for extra header rows, for Excel reader

data doesn't always start right after main header row

1 file changed with 15 insertions and 2 deletions:

rattail/excel.py

0 comments (0 inline, 0 general)

rattail/excel.py

➞

Show inline comments

 # -*- coding: utf-8; -*-
 ################################################################################
+#
 #  Rattail -- Retail Software Framework
 #  Copyright © 2010-2018 Lance Edgar
+#
 #  This file is part of Rattail.
+#
 #  Rattail is free software: you can redistribute it and/or modify it under the
 #  terms of the GNU General Public License as published by the Free Software
 #  Foundation, either version 3 of the License, or (at your option) any later
 #  version.
+#
 #  Rattail is distributed in the hope that it will be useful, but WITHOUT ANY
 #  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 #  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 #  details.
+#
 #  You should have received a copy of the GNU General Public License along with
 #  Rattail.  If not, see <http://www.gnu.org/licenses/>.
+#
 ################################################################################
 """
 Excel utilities
 """
 from __future__ import unicode_literals, absolute_import
 import datetime
 import six
 import xlrd
 from xlrd.xldate import xldate_as_tuple
 import openpyxl
 from openpyxl.styles import Font, PatternFill
 from openpyxl.cell.cell import get_column_letter
 from rattail.util import progress_loop
 class ExcelReader(object):
     """
     Basic class for reading Excel files.
     """
     def __init__(self, path, sheet=0, sheet_name=None, header=0, datefmt='%Y-%m-%d', strip_fieldnames=True):
     def __init__(self, path, sheet=0, sheet_name=None, header=0,
                  first_data_row=None,
                  datefmt='%Y-%m-%d', strip_fieldnames=True):
         """
         Constructor; opens an Excel file for reading.
         :param header: Which row should be used as the header, i.e. to
            determine field (column) names.  This is a zero-based index, so is 0
            by default (i.e. the first row).
         :param first_data_row: Which is the first row to contain data.  If not
            specified, it will be assumed that data rows begin immediately after
            the header row, as defined by :param:`header`.  This again is
            zero-based, so if the very first row is the true header, but then
            there is another "header" row also, you might specify a value of
            ``2`` here, since the 3rd row is the first to contain data.
         """
         self.book = xlrd.open_workbook(path)
         if sheet_name is not None:
             self.sheet = self.book.sheet_by_name(sheet_name)
         else:
             self.sheet = self.book.sheet_by_index(sheet)
         self.header = header
         if first_data_row is not None:
             self.first_data_row = first_data_row
         else:
             self.first_data_row = self.header + 1
         self.fields = self.sheet.row_values(self.header)
         if strip_fieldnames:
             self.fields = [field.strip() for field in self.fields]
         self.datefmt = datefmt
     def sheet_by_name(self, name):
         return self.book.sheet_by_name(name)
     def read_rows(self, progress=None):
         rows = []
         def append(row, i):
             values = self.sheet.row_values(row)
             data = dict([(self.fields[j], value)
                          for j, value in enumerate(values)])
             rows.append(data)
-        progress_loop(append, range(self.header + 1, self.sheet.nrows), progress,
+        progress_loop(append, range(self.first_data_row, self.sheet.nrows), progress,
                       message="Reading data from Excel file")
         return rows
     def parse_date(self, value, fmt=None):
         if isinstance(value, float):
             args = xldate_as_tuple(value, self.book.datemode)
             return datetime.datetime(*args).date()
         if value:
             return datetime.datetime.strptime(value, fmt or self.datefmt).date()
 class ExcelWriter(object):
     """
     Base class for Excel writers.
     """
     def __init__(self, path, fields, sheet_title=None, number_formats={}):
         """
         Constructor; opens an Excel workbook for writing.
         """
         self.path = path
         self.fields = fields
         self.book = openpyxl.Workbook()
         self.sheet = self.book.active
         if sheet_title:
             self.sheet.title = sheet_title
         self.number_formats = number_formats
     def write_header(self):
         font = Font(bold=True)
         for i, field in enumerate(self.fields, 1):
             cell = self.sheet.cell(row=1, column=i, value=field)
             cell.font = font
     def write_row(self, data, row=None):
         """
         Write (append) a single data row to the current sheet.
         :param row: The 1-based row number to which data should be written.
         """
         if row is None:
             raise NotImplementedError("should be able to detect 'next' row here?")
         self.sheet.append(data)
         # apply number formats
         if self.number_formats:
             for col, field in enumerate(self.fields, 1):
                 if field in self.number_formats:
                     cell = self.sheet.cell(row=row, column=col)
                     cell.number_format = self.number_formats[field]
         # apply row highlighting
         if row % 2 == 0:
             fill_even = PatternFill(patternType='solid',
                                     fgColor='d9d9d9',
                                     bgColor='d9d9d9')
             for col, field in enumerate(self.fields, 1):
                 cell = self.sheet.cell(row=row, column=col)
                 cell.fill = fill_even
     def write_rows(self, rows, progress=None):
         """
         Write (append) a sequence of data rows to the current sheet.
         """
         def write(data, i):
             # must add 1 to account for header
             self.write_row(data, row=i + 1)
         progress_loop(write, rows, progress,
                       message="Generating cells for spreadsheet")
     def auto_freeze(self, row=2, column=1):
         """
         Freeze sheet per "the usual"
         """
         self.sheet.freeze_panes = self.sheet.cell(row=row, column=column)
     def auto_filter(self):
         """
         Add auto filters for all columns.
         """
         first = self.sheet.cell(row=1, column=1)
         last = self.sheet.cell(row=self.sheet.max_row, column=self.sheet.max_column)
         cellrange = '{}:{}'.format(first.coordinate, last.coordinate)
         self.sheet.auto_filter.ref = cellrange
     def auto_resize(self, progress=None):
         """
         (Try to) Auto-resize all data columns.
         """
         # note, some of the below uses efficiency tricks from these docs
         # https://openpyxl.readthedocs.io/en/stable/tutorial.html#accessing-many-cells
         # we must calculate desired column widths.  but for sake of progress,
         # we'll iterate through rows instead of columns, to do that.  (this is

0 comments (0 inline, 0 general)