""" These classes are a collection of the needed tools to read external data. The External type objects created by these classes are initialized before the Stateful objects by functions.Model.initialize. """ import re import warnings from openpyxl import load_workbook from openpyxl.utils.exceptions import InvalidFileException import numpy as np import xarray as xr import pandas as pd from . import utils from .data import Data from .lookups import Lookups _SPREADSHEET_EXTS = {'.xls', '.xlsx', '.xlsm', '.xlsb', '.odf', '.ods', '.odt'} class Excels(): """ Class to save the read Excel files and thus avoid double reading """ _Excels, _Excels_opyxl = {}, {} @classmethod def read(cls, file_name, tab): """ Read the Excel file or return the previously read one """ if file_name.joinpath(tab) in cls._Excels: return cls._Excels[file_name.joinpath(tab)] else: # get the function to read the data based on its extension read_kwargs = {} ext = file_name.suffix.lower() if ext in _SPREADSHEET_EXTS: read_func = pd.read_excel read_kwargs['sheet_name'] = tab elif ext == '.csv': read_func = pd.read_csv if tab and not tab[0].isalnum(): read_kwargs['sep'] = tab else: read_func = pd.read_table if tab and not tab[0].isalnum(): read_kwargs['sep'] = tab # read the data excel = np.array([ pd.to_numeric(ex, errors='coerce') for ex in read_func(file_name, header=None, **read_kwargs).values ]) # save data for future retrievals cls._Excels[file_name.joinpath(tab)] = excel return excel @classmethod def read_opyxl(cls, file_name): """ Read the Excel file using OpenPyXL or return the previously read one """ if file_name in cls._Excels_opyxl: return cls._Excels_opyxl[file_name] else: excel = load_workbook(file_name, read_only=True, data_only=True) cls._Excels_opyxl[file_name] = excel return excel @classmethod def clean(cls): """ Clean the dictionary of read files """ for file in cls._Excels_opyxl.values(): # close files open directly with openpyxls file.close() # files open with pandas are automatically closed cls._Excels, cls._Excels_opyxl = {}, {} class External(object): """ Main class of external objects Attributes ---------- py_name: str The Python name of the object missing: str ("warning", "error", "ignore", "keep") What to do with missing values. If "warning" (default) shows a warning message and interpolates the values. If "raise" raises an error. If "ignore" interpolates the values without showing anything. If "keep" it will keep the missing values, this option may cause the integration to fail, but it may be used to check the quality of the data. file: str File name from which the data is read. tab: str Tab name from which the data is read. If file type is not a spreadsheet this will be used as a separator. """ missing = "warning" def __init__(self, py_name): self.py_name = py_name self.file = None self.tab = None def __str__(self): return self.py_name def _get_data_from_file(self, rows, cols): """ Function to read data from excel file using rows and columns Parameters ---------- rows: list of len 2 first row and last row+1 to be read, starting from 0 cols: list of len 2 first col and last col+1 to be read, starting from 0 Returns ------- data: pandas.DataFrame, pandas.Series or float depending on the shape of the requested data """ # read data data = Excels.read( self.file, self.tab)[rows[0]:rows[1], cols[0]:cols[1]].copy() shape = data.shape # empty cells if shape[0] == 0 or shape[1] == 0: raise ValueError( self.py_name + "\n" "The cells are empty.\n" + self._file_sheet ) # if it is a single row remove its dimension if shape[1] == 1: data = data[:, 0] if shape[0] == 1: data = data[0] return data def _get_data_from_file_opyxl(self, cellname): """ Function to read data from excel file using cell range name Parameters ---------- cellname: str the cell range name Returns ------- data: numpy.ndarray or float depending on the shape of the requested data shape: list The shape of the data in 2D. """ # read data try: excel = Excels.read_opyxl(self.file) except InvalidFileException: raise ValueError( self.py_name + "\n" f"Cannot read the file '{self.file}'...\n" f"It could happen that cell='{cellname}' was " "read as a cell range name due to a wrong " "definition of cell value" ) # Get global and local cellrange names global_cellranges = excel.defined_names local_cellranges = None # need to lower the sheetnames as Vensim has no case sensitivity for sheet in excel.sheetnames: if sheet.lower() == self.tab.lower(): local_cellranges = excel[sheet].defined_names break if local_cellranges is None: # Error if it is not able to get the localSheetId raise ValueError( self.py_name + "\n" "The sheet doesn't exist...\n" + self._file_sheet ) try: # Search for local and global names cellrange = local_cellranges.get(cellname)\ or global_cellranges.get(cellname) sheet, cells = next(cellrange.destinations) assert sheet.lower() == self.tab.lower() self.tab = sheet # case insensitivity in sheet name # Get the cells where the cellrange is defined cells = re.split(r":|\$", cells) cols = [self._col_to_num(cells[1]), None] rows = [int(cells[2])-1, None] if len(cells) == 3: # 0 dim cell range cols[1] = cols[0]+1 rows[1] = rows[0]+1 else: # array or table cols[1] = self._col_to_num(cells[4])+1 rows[1] = int(cells[5]) # Use pandas to read the data and return its original shape return self._get_data_from_file(rows, cols), \ [rows[1]-rows[0], cols[1]-cols[0]] except (AttributeError, AssertionError): # key error if the cellrange doesn't exist in the file or sheet raise AttributeError( self.py_name + "\n" f"The cellrange name '{cellname}'\n" "Doesn't exist in:\n" + self._file_sheet ) def _get_series_data(self, series_across, series_row_or_col, cell, size): """ Function thar reads series and data from excel file for DATA and LOOKUPS. Parameters ---------- series_across: "row", "column" or "name" The way to read series file. series_row_or_col: int or str If series_across is "row" the row number where the series data is. If series_across is "column" the column name where the series data is. If series_across is "name" the cell range name where the series data is. cell: If series_across is not "name, the top left cell where the data table starts. Else the name of the cell range where the data is. size: The size of the 2nd dimension of the data. Returns ------- series, data: ndarray (1D), ndarray(1D/2D) The values of the series and data. """ if series_across == "row": # Horizontal data (dimension values in a row) # get the dimension values first_row, first_col = self._split_excel_cell(cell) series = self._get_data_from_file( rows=[int(series_row_or_col)-1, int(series_row_or_col)], cols=[first_col, None]) # read data data = self._get_data_from_file( rows=[first_row, first_row + size], cols=[first_col, None]).transpose() elif series_across == "column": # Vertical data (dimension values in a column) # get the dimension values first_row, first_col = self._split_excel_cell(cell) series_col = self._col_to_num(series_row_or_col) series = self._get_data_from_file( rows=[first_row, None], cols=[series_col, series_col+1]) # read data data = self._get_data_from_file( rows=[first_row, None], cols=[first_col, first_col + size]) else: # get series data series, s_shape = self._get_data_from_file_opyxl(series_row_or_col) if isinstance(series, float): series = np.array([series]) if s_shape[0] > 1 and s_shape[1] > 1: # Error if the lookup/time dimension is 2D raise ValueError( self.py_name + "\n" + "Dimension given in:\n" + self._file_sheet + "\tDimension name:" + "\t'{}'\n".format(series_row_or_col) + " is a table and not a vector" ) elif s_shape[1] != 1: transpose = True else: transpose = False # get data data, d_shape = self._get_data_from_file_opyxl(cell) if isinstance(data, float): data = np.array([data]) if transpose: # transpose for horizontal definition of dimension data = data.transpose() d_shape = d_shape[1], d_shape[0] if d_shape[0] != len(series): raise ValueError( self.py_name + "\n" + "Dimension and data given in:\n" + self._file_sheet + "\tDimension name:\t'{}'\n".format(series_row_or_col) + "\tData name:\t'{}'\n".format(cell) + " don't have the same length in the 1st dimension" ) if d_shape[1] != size: # Given coordinates length is different than # the lentgh of 2nd dimension raise ValueError( self.py_name + "\n" + "Data given in:\n" + self._file_sheet + "\tData name:\t'{}'\n".format(cell) + " has not the same size as the given coordinates" ) return series, data def _resolve_file(self, root): """ Resolve input file path. Joining the file with the root and checking if it exists. Parameters ---------- root: pathlib.Path or str The root path to the model file. Returns ------- None """ if str(self.file)[0] == '?': # TODO add an option to include indirect references raise ValueError( self.py_name + "\n" + f"Indirect reference to file: '{self.file}'") # Join path and resolve it to better print error messages self.file = root.joinpath(self.file).resolve() if not self.file.is_file(): raise FileNotFoundError( self.py_name + "\n" + "File '%s' not found." % self.file) def _initialize_data(self, element_type): """ Initialize one element of DATA or LOOKUPS Parameters ---------- element_type: str "lookup" for LOOKUPS, "data" for data. Returns ------- data: xarray.DataArray Dataarray with the time or interpolation dimension as first dimension. """ self._resolve_file(root=self.root) series_across = self._series_selector(self.x_row_or_col, self.cell) size = utils.compute_shape(self.coords, reshape_len=1, py_name=self.py_name)[0] series, data = self._get_series_data( series_across=series_across, series_row_or_col=self.x_row_or_col, cell=self.cell, size=size ) # remove nan or missing values from dimension if series_across != "name": # Remove last nans only if the method is to read by row or col i = 0 try: while np.isnan(series[i-1]): i -= 1 except IndexError: # series has len 0 raise ValueError( self.py_name + "\n" + "Dimension given in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(series_across, self.x_row_or_col) + " has length 0" ) if i != 0: series = series[:i] data = data[:i] # warning/error if missing data in the series if any(np.isnan(series)) and self.missing != "keep": valid_values = ~np.isnan(series) series = series[valid_values] data = data[valid_values] if all(np.isnan(series)): raise ValueError( self.py_name + "\n" + "Dimension given in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(series_across, self.x_row_or_col) + " has length 0" ) if self.missing == "warning": warnings.warn( self.py_name + "\n" + "Dimension value missing or non-valid in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(series_across, self.x_row_or_col) + " the corresponding data value(s) to the " + "missing/non-valid value(s) will be ignored\n\n" ) elif self.missing == "raise": raise ValueError( self.py_name + "\n" + "Dimension value missing or non-valid in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(series_across, self.x_row_or_col) ) # reorder data with increasing series if not np.all(np.diff(series) > 0) and self.missing != "keep": order = np.argsort(series) series = series[order] data = data[order] # Check if the lookup/time dimension is well defined if np.any(np.diff(series) == 0): raise ValueError(self.py_name + "\n" + "Dimension given in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format( series_across, self.x_row_or_col) + " has repeated values") # Check for missing values in data if np.any(np.isnan(data)) and self.missing != "keep": if series_across == "name": cell_type = "Cellrange" else: cell_type = "Reference cell" if self.missing == "warning": # Fill missing values with the chosen interpolation method # what Vensim does during running for DATA if self.interp != "raw": interpolate_message =\ " the corresponding value will be filled "\ + "with the interpolation method of the object." else: interpolate_message = "" warnings.warn( self.py_name + "\n" + "Data value missing or non-valid in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(cell_type, self.cell) + interpolate_message + "\n\n" ) elif self.missing == "raise": raise ValueError( self.py_name + "\n" + "Data value missing or non-valid in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(cell_type, self.cell) ) # fill values if self.interp != "raw": self._fill_missing(series, data) # reshape the data to fit in the xarray.DataArray reshape_dims = tuple([len(series)] + utils.compute_shape(self.coords)) data = self._reshape(data, reshape_dims) if element_type == "lookup": dim_name = "lookup_dim" else: dim_name = "time" data = xr.DataArray( data=data, coords={dim_name: series, **self.coords}, dims=[dim_name] + list(self.coords) ) return data def _fill_missing(self, series, data): """ Fills missing values in excel read data. Mutates the values in data. Parameters ---------- series: the time series without missing values data: the data with missing values Returns ------- None """ # if data is 2dims we need to interpolate datanan = np.isnan(data) keeping_nan = False if len(data.shape) == 1: if not np.all(datanan): data[datanan] = self._interpolate_missing( series[datanan], series[~datanan], data[~datanan]) else: keeping_nan = True else: for i, nanlist in enumerate(list(datanan.transpose())): if not np.all(nanlist): data[nanlist, i] = self._interpolate_missing( series[nanlist], series[~nanlist], data[~nanlist][:, i]) else: keeping_nan = True if keeping_nan: warnings.warn( "Not able to interpolate some values..." " keeping them as missing.\n") def _interpolate_missing(self, x, xr, yr): """ Interpolates a list of missing values from _fill_missing Parameters ---------- x: list of missing values interpolate xr: non-missing x values yr: non-missing y values Returns ------- y: Result after interpolating x with self.interp method """ y = np.empty_like(x, dtype=float) for i, value in enumerate(x): if value >= xr[-1]: y[i] = yr[-1] elif value <= xr[0]: y[i] = yr[0] elif self.interp == 'look_forward': y[i] = yr[xr >= value][0] elif self.interp == 'hold_backward': y[i] = yr[xr <= value][-1] else: y[i] = np.interp(value, xr, yr) return y @property def _file_sheet(self): """ Returns file and sheet name in a string """ return "\tFile name:\t'{}'\n".format(self.file)\ + "\tSheet name:\t'{}'\n".format(self.tab) @staticmethod def _col_to_num(col): """ Transforms the column name to int Parameters ---------- col: str Column name Returns ------- int Column number """ if len(col) == 1: return ord(col.upper()) - ord('A') elif len(col) == 2: left = ord(col[0].upper()) - ord('A') + 1 right = ord(col[1].upper()) - ord('A') return left * (ord('Z')-ord('A')+1) + right else: left = ord(col[0].upper()) - ord('A') + 1 center = ord(col[1].upper()) - ord('A') + 1 right = ord(col[2].upper()) - ord('A') return left * ((ord('Z')-ord('A')+1)**2)\ + center * (ord('Z')-ord('A')+1)\ + right def _split_excel_cell(self, cell): """ Splits a cell value given in a string. Returns None for non-valid cell formats. Parameters ---------- cell: str Cell like string, such as "A1", "b16", "AC19"... If it is not a cell like string will return None. Returns ------- row number, column number: int, int If the cell input is valid. Both numbers are given in Python enumeration, i.e., first row and first column are 0. """ split = re.findall(r'\d+|\D+', cell) try: # check that we only have two values [column, row] assert len(split) == 2 # check that the column name has no special characters assert not re.compile('[^a-zA-Z]+').search(split[0]) # check that row number is not 0 assert int(split[1]) != 0 # the column name has as maximum 3 letters assert len(split[0]) <= 3 return int(split[1])-1, self._col_to_num(split[0]) except AssertionError: return @staticmethod def _reshape(data, dims): """ Reshapes an pandas.DataFrame, pandas.Series, xarray.DataArray or np.ndarray in the given dimensions. Parameters ---------- data: xarray.DataArray/numpy.ndarray Data to be reshaped dims: tuple The dimensions to reshape. Returns ------- numpy.ndarray reshaped array """ if isinstance(data, (float, int)): data = np.array(data) elif isinstance(data, xr.DataArray): data = data.values return data.reshape(dims) def _series_selector(self, x_row_or_col, cell): """ Selects if a series data (DATA/LOOKUPS), should be read by columns, rows or cellrange name. Based on the input format of x_row_or_col and cell. The format of the 2 variables must be consistent. Parameters ---------- x_row_or_col: str String of a number if series is given in a row, letter if series is given in a column or name if the series is given by cellrange name. cell: str Cell identificator, such as "A1", or name if the data is given by cellrange name. Returns ------- series_across: str "row" if series is given in a row "column" if series is given in a column "name" if series and data are given by range name """ try: # if x_row_or_col is numeric the series must be a row int(x_row_or_col) return "row" except ValueError: if self._split_excel_cell(cell): # if the cell can be splitted means that the format is # "A1" like then the series must be a column return "column" else: return "name" class ExtData(External, Data): """ Class for Vensim GET XLS DATA/GET DIRECT DATA """ def __init__(self, file_name, tab, time_row_or_col, cell, interp, coords, root, final_coords, py_name): super().__init__(py_name) self.files = [file_name] self.tabs = [tab] self.time_row_or_cols = [time_row_or_col] self.cells = [cell] self.coordss = [coords] self.root = root self.final_coords = final_coords self.interp = interp or "interpolate" self.is_float = not bool(coords) # check if the interpolation method is valid if self.interp not in ["interpolate", "raw", "look_forward", "hold_backward"]: raise ValueError(self.py_name + "\n" + " The interpolation method (interp) must be " + "'raw', 'interpolate', " + "'look_forward' or 'hold_backward'") def add(self, file_name, tab, time_row_or_col, cell, interp, coords): """ Add information to retrieve new dimension in an already declared object """ self.files.append(file_name) self.tabs.append(tab) self.time_row_or_cols.append(time_row_or_col) self.cells.append(cell) self.coordss.append(coords) interp = interp or "interpolate" if interp.replace(" ", "_") != self.interp: raise ValueError(self.py_name + "\n" + "Error matching interpolation method with " + "previously defined one") if list(coords) != list(self.coordss[0]): raise ValueError(self.py_name + "\n" + "Error matching dimensions with previous data") def initialize(self): """ Initialize all elements and create the self.data xarray.DataArray """ if not self.coordss[0]: # Just load one value (no add) for self.file, self.tab, self.x_row_or_col, \ self.cell, self.coords\ in zip(self.files, self.tabs, self.time_row_or_cols, self.cells, self.coordss): self.data = self._initialize_data("data") else: # Load in several lines (add) self.data = xr.DataArray( np.nan, self.final_coords, list(self.final_coords)) for self.file, self.tab, self.x_row_or_col, \ self.cell, self.coords\ in zip(self.files, self.tabs, self.time_row_or_cols, self.cells, self.coordss): values = self._initialize_data("data") coords = {"time": values.coords["time"].values, **self.coords} if "time" not in self.data.dims: self.data = self.data.expand_dims( {"time": coords["time"]}, axis=0).copy() self.data.loc[coords] = values.values # set what to return when raw if self.final_coords: self.nan = xr.DataArray( np.nan, self.final_coords, list(self.final_coords)) else: self.nan = np.nan class ExtLookup(External, Lookups): """ Class for Vensim GET XLS LOOKUPS/GET DIRECT LOOKUPS """ def __init__(self, file_name, tab, x_row_or_col, cell, coords, root, final_coords, py_name): super().__init__(py_name) self.files = [file_name] self.tabs = [tab] self.x_row_or_cols = [x_row_or_col] self.cells = [cell] self.coordss = [coords] self.root = root self.final_coords = final_coords self.interp = "interpolate" self.is_float = not bool(coords) def add(self, file_name, tab, x_row_or_col, cell, coords): """ Add information to retrieve new dimension in an already declared object """ self.files.append(file_name) self.tabs.append(tab) self.x_row_or_cols.append(x_row_or_col) self.cells.append(cell) self.coordss.append(coords) if list(coords) != list(self.coordss[0]): raise ValueError(self.py_name + "\n" + "Error matching dimensions with previous data") def initialize(self): """ Initialize all elements and create the self.data xarray.DataArray """ if not self.coordss[0]: # Just loag one value (no add) for self.file, self.tab, self.x_row_or_col, \ self.cell, self.coords\ in zip(self.files, self.tabs, self.x_row_or_cols, self.cells, self.coordss): self.data = self._initialize_data("lookup") else: # Load in several lines (add) self.data = xr.DataArray( np.nan, self.final_coords, list(self.final_coords)) for self.file, self.tab, self.x_row_or_col, \ self.cell, self.coords\ in zip(self.files, self.tabs, self.x_row_or_cols, self.cells, self.coordss): values = self._initialize_data("lookup") coords = { "lookup_dim": values.coords["lookup_dim"].values, **self.coords } if "lookup_dim" not in self.data.dims: self.data = self.data.expand_dims( {"lookup_dim": coords["lookup_dim"]}, axis=0).copy() self.data.loc[coords] = values.values class ExtConstant(External): """ Class for Vensim GET XLS CONSTANTS/GET DIRECT CONSTANTS """ def __init__(self, file_name, tab, cell, coords, root, final_coords, py_name): super().__init__(py_name) self.files = [file_name] self.tabs = [tab] self.transposes = [ cell[-1] == '*' and np.prod(utils.compute_shape(coords)) > 1] self.cells = [cell.strip('*')] self.coordss = [coords] self.root = root self.final_coords = final_coords def add(self, file_name, tab, cell, coords): """ Add information to retrieve new dimension in an already declared object """ self.files.append(file_name) self.tabs.append(tab) self.transposes.append( cell[-1] == '*' and np.prod(utils.compute_shape(coords)) > 1) self.cells.append(cell.strip('*')) self.coordss.append(coords) if list(coords) != list(self.coordss[0]): raise ValueError(self.py_name + "\n" + "Error matching dimensions with previous data") def initialize(self): """ Initialize all elements and create the self.data xarray.DataArray """ if not self.coordss[0]: # Just loag one value (no add) for self.file, self.tab, self.transpose, self.cell, self.coords\ in zip(self.files, self.tabs, self.transposes, self.cells, self.coordss): self.data = self._initialize() else: # Load in several lines (add) self.data = xr.DataArray( np.nan, self.final_coords, list(self.final_coords)) for self.file, self.tab, self.transpose, self.cell, self.coords\ in zip(self.files, self.tabs, self.transposes, self.cells, self.coordss): self.data.loc[self.coords] = self._initialize().values def _initialize(self): """ Initialize one element """ self._resolve_file(root=self.root) split = self._split_excel_cell(self.cell) if split: data_across = "cell" cell = split else: data_across = "name" cell = self.cell shape = utils.compute_shape(self.coords, reshape_len=2, py_name=self.py_name) if self.transpose: shape.reverse() data = self._get_constant_data(data_across, cell, shape) if self.transpose: data = data.transpose() if np.any(np.isnan(data)): # nan values in data if data_across == "name": cell_type = "Cellrange" else: cell_type = "Reference cell" if self.missing == "warning": warnings.warn( self.py_name + "\n" + "Constant value missing or non-valid in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(cell_type, self.cell) ) elif self.missing == "raise": raise ValueError( self.py_name + "\n" + "Constant value missing or non-valid in:\n" + self._file_sheet + "\t{}:\t'{}'\n".format(cell_type, self.cell) ) # Create only an xarray if the data is not 0 dimensional if len(self.coords) > 0: reshape_dims = tuple(utils.compute_shape(self.coords)) data = self._reshape(data, reshape_dims) return xr.DataArray( data=data, coords=self.coords, dims=list(self.coords) ) else: # need to ensure float is returned and not numpy.float return float(data) def _get_constant_data(self, data_across, cell, shape): """ Function thar reads data from excel file for CONSTANT Parameters ---------- data_across: "cell" or "name" The way to read data file. cell: int or str If data_across is "cell" the lefttop split cell value where the data is. If data_across is "name" the cell range name where the data is. shape: list The shape of the data in 2D. Returns ------- data: float/ndarray(1D/2D) The values of the data. """ if data_across == "cell": # read data from topleft cell name using pandas start_row, start_col = cell return self._get_data_from_file( rows=[start_row, start_row + shape[0]], cols=[start_col, start_col + shape[1]]) else: # read data from cell range name using OpenPyXL data, xl_shape = self._get_data_from_file_opyxl(cell) if shape != xl_shape: raise ValueError(self.py_name + "\n" + "Data given in:\n" + self._file_sheet + "\tData name:\t{}\n".format(cell) + " has not the same shape as the" + " given coordinates") return data def __call__(self): return self.data class ExtSubscript(External): """ Class for Vensim GET XLS SUBSCRIPT/GET DIRECT SUBSCRIPT """ def __init__(self, file_name, tab, firstcell, lastcell, prefix, root): super().__init__("Hardcoded external subscript") self.file = file_name self.tab = tab self.prefix = prefix self._resolve_file(root=root) split = self._split_excel_cell(firstcell) if split: subs = self.get_subscripts_cell(*split, lastcell) else: subs = self.get_subscripts_name(firstcell) self.subscript = [ self.prefix + str(d) for d in subs.flatten() if self._not_nan(d) ] def get_subscripts_cell(self, row_first, col_first, lastcell): """Get subscripts from common cell definition""" if not lastcell: row_last, col_last = None, None else: split = self._split_excel_cell(lastcell) if split: # last cell is col and row row_last, col_last = split elif lastcell.isdigit(): # last cell is row number only row_last = int(lastcell)-1 col_last = None else: # last cell is a col value only row_last = None col_last = self._col_to_num(lastcell) # update read keywargs for rows and columns to read read_kwargs = {} if row_last is not None: read_kwargs['nrows'] = row_last-row_first+1 if col_last is not None: read_kwargs['usecols'] = np.arange(col_first, col_last+1) # get the function to read the data based on its extension ext = self.file.suffix.lower() if ext in _SPREADSHEET_EXTS: read_func = pd.read_excel read_kwargs['sheet_name'] = self.tab elif ext == '.csv': read_func = pd.read_csv if self.tab and not self.tab[0].isalnum(): read_kwargs['sep'] = self.tab else: read_func = pd.read_table if self.tab and not self.tab[0].isalnum(): read_kwargs['sep'] = self.tab # read the data data = read_func( self.file, skiprows=row_first, dtype=object, header=None, **read_kwargs ).values # skip columns if usecols couldn't be used if col_last is None: data = data[:, col_first:] return data def get_subscripts_name(self, cellname): """Get subscripts from cell range name definition""" try: excel = load_workbook(self.file, read_only=True, data_only=True) except InvalidFileException: raise ValueError( self.py_name + "\n" f"Cannot read the file '{self.file}'...\n" f"It could happen that firstcell='{cellname}' was " "read as a cell range name due to a wrong definition " "of cell value" ) global_cellranges = excel.defined_names local_cellranges = None # need to lower the sheetnames as Vensim has no case sensitivity for sheet in excel.sheetnames: if sheet.lower() == self.tab.lower(): local_cellranges = excel[sheet].defined_names break if local_cellranges is None: # Error if it is not able to get the localSheetId raise ValueError( self.py_name + "\n" "The sheet doesn't exist...\n" + self._file_sheet ) try: # Search for local and global names cellrange = local_cellranges.get(cellname)\ or global_cellranges.get(cellname) sheet, cells = next(cellrange.destinations) assert sheet.lower() == self.tab.lower() self.tab = sheet # case insensitivity in sheet name # Get the cells where the cellrange is defined first_cell, last_cell = cells.replace("$", '').split(":") except (AttributeError, AssertionError): # key error if the cellrange doesn't exist in the file or sheet raise AttributeError( self.py_name + "\n" f"The cellrange name '{cellname}'\n" "Doesn't exist in:\n" + self._file_sheet ) else: return self.get_subscripts_cell( *self._split_excel_cell(first_cell), last_cell) @staticmethod def _not_nan(value): """Check if a value is not nan""" if isinstance(value, str): return True return not np.isnan(value)