Skip to content

Passing a list of a list with two column names to dask.read_csv fails with ValueError #924

@user32000

Description

@user32000
import pandas as pd
import dask.dataframe as df

# this works...
delme1 = pd.read_csv('some_file.txt', delimiter='\t', parse_dates=[['Datum', 'Zeit']])

# this does not
delme2 = df.read_csv('some_file.txt', delimiter='\t', parse_dates=[['Datum', 'Zeit']])

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-41c1ed8fde3e> in <module>()
----> 1 delme = df.read_csv('some_file.txt', delimiter='\t', parse_dates=[['Datum', 'Zeit']])
      2 delme

/Applications/anaconda/lib/python2.7/site-packages/dask/dataframe/io.pyc in read_csv(fn, *args, **kwargs)
    182     kwargs = kwargs.copy()
    183 
--> 184     kwargs = fill_kwargs(fn, args, kwargs)
    185 
    186     # Handle glob strings

/Applications/anaconda/lib/python2.7/site-packages/dask/dataframe/io.pyc in fill_kwargs(fn, args, kwargs)
    147     kwargs = clean_kwargs(kwargs)
    148     try:
--> 149         head = pd.read_csv(fn, *args, **assoc(kwargs, 'nrows', sample_nrows))
    150     except StopIteration:
    151         head = pd.read_csv(fn, *args, **kwargs)

/Applications/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/Applications/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    273 
    274     # Create the parser.
--> 275     parser = TextFileReader(filepath_or_buffer, **kwds)
    276 
    277     if (nrows is not None) and (chunksize is not None):

/Applications/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    588             self.options['has_index_names'] = kwds['has_index_names']
    589 
--> 590         self._make_engine(self.engine)
    591 
    592     def _get_options_with_defaults(self, engine):

/Applications/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    729     def _make_engine(self, engine='c'):
    730         if engine == 'c':
--> 731             self._engine = CParserWrapper(self.f, **self.options)
    732         else:
    733             if engine == 'python':

/Applications/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1144                 raise ValueError("Usecols do not match names.")
   1145 
-> 1146         self._set_noconvert_columns()
   1147 
   1148         self.orig_names = self.names

/Applications/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _set_noconvert_columns(self)
   1178                 if isinstance(val, list):
   1179                     for k in val:
-> 1180                         _set(k)
   1181                 else:
   1182                     _set(val)

/Applications/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _set(x)
   1172                 self._reader.set_noconvert(x)
   1173             else:
-> 1174                 self._reader.set_noconvert(names.index(x))
   1175 
   1176         if isinstance(self.parse_dates, list):

ValueError: 'Datum' is not in list

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions