importpandasimportnumpyimportloggingimportitertoolsfromcollectionsimportOrderedDict,Counterfrom..analysis.elementsimportextract_cell_countsclassElementsParseError(Exception):""" Exception raised when a Elements spreadsheet cannot be parsed. """passdefread_cell_counts(filepath,sheetdict,manual_columns):# Those columns that were counted by handmanual_col_dict={}# Those columns that were counted automaticallyautomated_col_dict={}for(k,v)insheetdict.iteritems():ifmanual_columnsisTrueorkinmanual_columns:manual_col_dict[k]=velse:automated_col_dict[k]=v# Handle the case in which cell counting was# done manually#manual_counts=read_manual_cell_counts(filepath,manual_col_dict)# Handle the case in which the cell counting was# done using the Elements software.#ifautomated_col_dict:elem=read_elements(filepath,needed_sheets=itertools.chain.from_iterable(automated_col_dict.values()),)# Calculate the Elements cell counts#automated_counts=extract_cell_counts(automated_col_dict,elem,)else:automated_counts={}# Combine the two counts#total_counts=manual_countstotal_counts.update(automated_counts)returntotal_countsdefread_manual_cell_counts(filepath,sheetdict):""" Parses an Excel spreadsheet containing cell counts. Returns a dict of Counter objects, the same as the `extract_cell_counts` function in `analysis.elements`. :param filepath: Path to Excel file to parse :type filepath: str. :param sheetdict: Mapping from col number to sheet names :type sheetdict: dict :returns: dict of Counter objects. :raises: ElementsParseError """ifnotsheetdict:return{}xls=pandas.ExcelFile(filepath)cell_counts={}forcol_no,sheet_namesinsheetdict.iteritems():col_cell_count=Counter()forsheet_nameinsheet_names:raw_data=xls.parse(sheet_name,header=None)forwell_no,cell_countinraw_data.iterrows():col_cell_count[well_no]=cell_count[0]cell_counts[col_no]=col_cell_countreturncell_countsdefparse_elements_sheet(xls,sheet_name):""" Parses a single sheet from an Excel spreadsheet containing Elements data. This sheet should contain the data for one column of a device used in a LINCS experiment. :param xls: Excel file object containing sheets :type xls: pandas.io.parsers.ExcelFile. :param sheet_name: The name of the sheet to parse. :type sheet_name: str. :returns: data - a pandas.DataFrame object. :raises: ElementsParseError """raw_data=xls.parse(sheet_name)ifraw_data.shape[-1]notin(22,23):raiseElementsParseError("Sheet does not have 22 or 23 columns.")passed_feature=Falseforstop_row_idxinrange(raw_data.shape[0]-1,0,-1):row=raw_data.irow(stop_row_idx)ifnotpassed_featureandrow[0]=="Feature":# Go back until we've passed the metadata at the bottom# of the spreadsheet.passed_feature=Trueelifpassed_feature:# Now go back until we find a row that begins with an# integer.try:int(row[0])stop_row_idx+=1breakexceptTypeError:continuedata=raw_data.drop(range(stop_row_idx,raw_data.shape[0]))data=data.replace("N/A",numpy.nan)returndatadefread_elements(filepath,max_sheets=None,needed_sheets=None):""" Parses an Excel spreadsheet containing Elements data. The spreadsheet should have one sheet for each column on the device. :param filepath: Path to Excel file to parse :type xls: str. :param max_sheets: Max number of sheets to parse :type max_sheets: int. :param needed_sheets: List of names of sheets to be extracted. :type needed_sheets: iterable. :returns: int -- the return code. :raises: ElementsParseError """xls=pandas.ExcelFile(filepath)parsed_sheets=OrderedDict()ifnotneeded_sheets:needed_sheets=xls.sheet_namesfori,sheet_nameinenumerate(needed_sheets):ifmax_sheets!=Noneandi>max_sheets:breaklogging.debug("Elements sheet: {0}".format(sheet_name))data=parse_elements_sheet(xls,sheet_name)parsed_sheets[sheet_name]=datareturnparsed_sheets