[docs]def__init__(self,filename,fieldnames=None,encoding='utf-8',*args,**kwargs):""" Creates a new loader that processes CSV files. You can optionally give `fieldnames` option. If `fieldnames` is not specified (which is a default) or specifeid as True, the first line of the CSV is used for column names. If `fieldnames` is specified as False, sequential column names are automatically generated like ['c0', 'c1', ...]. If `fieldnames` is a list, it is used as column names. Any other optional or keyword arguments are passed to the underlying `csv.DictReader`. >>> loader = CSVLoader('dataset.tsv', fieldnames=False, encoding='cp932', delimiter='\t') """iffieldnames==True:# Automatically estimate field names later.fieldnames=Noneeliffieldnames==False:# Generate field names by peeking number of columns in the first row of the CSV.withio.open(filename,encoding=encoding,newline='')asf:# Use fieldnames from DictReader to count number of columns in the first row.r=_UnicodeDictReader(f,encoding,fieldnames=None,*args,**kwargs)fieldnames=['c{0}'.format(i)foriinrange(len(r.fieldnames))]self._filename=filenameself._fieldnames=fieldnamesself._encoding=encodingself._args=argsself._kwargs=kwargs

class_UnicodeDictReader(csv.DictReader):def__init__(self,f,encoding,*args,**kwargs):self._encoding=encoding# DictReader in Python 2.x use str (bytes) for parameters, whereas Python 3.x use# str (unicode) for them. The code below is intended to absorb the difference.forkin['delimiter','escapechar','quotechar','lineterminator','restkey','restval','fieldnames']:ifkinkwargs:kwargs[k]=self._native_all(kwargs[k],encoding)# DictReader in Python 2.x cannot handle Unicode input.# We transcode each line of CSV row into bytes for Py2.f=fifPYTHON3elseself._encode_file(f,encoding)csv.DictReader.__init__(self,f,*args,**kwargs)defnext(self):r=csv.DictReader.next(self)# DictReader in Python 2.x returns rows in bytes. We transcode keys/values of# the row dict into Unicode like in Python 3.x.returnrifPYTHON3elseself._decode_all(r,self._encoding)def_native_all(self,v,enc):""" Converts the input to native `str` (i.e., Unicode on Python 3, bytes on Python 2), which is supported by `csv.DictReader`, recursively. """ifPYTHON3andisinstance(v,bytes):returnv.decode(enc)elifnotPYTHON3andisinstance(v,unicode_t):returnv.encode(enc)elifisinstance(v,list):return[self._native_all(elem,enc)foreleminv]returnvdef_decode_all(self,v,enc):""" Decodes the input recursively. """ifisinstance(v,bytes):returnv.decode(enc)elifisinstance(v,list):return[self._decode_all(elem,enc)foreleminv]elifisinstance(v,dict):returndict([(self._decode_all(elem[0],enc),self._decode_all(elem[1],enc))foreleminv.items()])returnvdef_encode_file(self,f,enc):""" Read from the text stream (unicode) and emits it as bytes. """forlineinf:yieldline.encode(enc)