I have used groupby on 2 columns (the df have about 70 columns all float except the date that is datetime) to get a dask dataframe:
result_ddf = base_ddf.groupby(["firts_integer_column","second_integer_column"])
I can not use the result because it is in some strange format:
dask.dataframe.groupby.DataFrameGroupBy
how can I use the result as a dask dataframe, because when I just try to .head()
, or .compute()
I get erros.
CODE 1
result_ddf.get_partition(1)
ERROR 1
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/envs/rapids/lib/python3.7/site-packages/dask/dataframe/groupby.py in __getattr__(self, key)
1779 try:
-> 1780 return self[key]
1781 except KeyError as e:
~/anaconda3/envs/rapids/lib/python3.7/site-packages/dask/dataframe/groupby.py in __getitem__(self, key)
1765 # error is raised from pandas
-> 1766 g._meta = g._meta[key]
1767 return g
~/anaconda3/envs/rapids/lib/python3.7/site-packages/pandas/core/groupby/generic.py in __getitem__(self, key)
1609 )
-> 1610 return super().__getitem__(key)
1611
~/anaconda3/envs/rapids/lib/python3.7/site-packages/pandas/core/base.py in __getitem__(self, key)
227 if key not in self.obj:
--> 228 raise KeyError(f"Column not found: {key}")
229 return self._gotitem(key, ndim=1)
KeyError: 'Column not found: get_partition'
The above exception was the direct cause of the following exception:
AttributeError Traceback (most recent call last)
<ipython-input-279-2c7697a2a4f8> in <module>
----> 1 result_ddf.get_partition(1)
~/anaconda3/envs/rapids/lib/python3.7/site-packages/dask/dataframe/groupby.py in __getattr__(self, key)
1780 return self[key]
1781 except KeyError as e:
-> 1782 raise AttributeError(e) from e
1783
1784 @derived_from(pd.core.groupby.DataFrameGroupBy)
AttributeError: 'Column not found: get_partition'
CODE 2
result_ddf.head()
ERROR 2
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/envs/rapids/lib/python3.7/site-packages/dask/dataframe/groupby.py in __getattr__(self, key)
1779 try:
-> 1780 return self[key]
1781 except KeyError as e:
~/anaconda3/envs/rapids/lib/python3.7/site-packages/dask/dataframe/groupby.py in __getitem__(self, key)
1765 # error is raised from pandas
-> 1766 g._meta = g._meta[key]
1767 return g
~/anaconda3/envs/rapids/lib/python3.7/site-packages/pandas/core/groupby/generic.py in __getitem__(self, key)
1609 )
-> 1610 return super().__getitem__(key)
1611
~/anaconda3/envs/rapids/lib/python3.7/site-packages/pandas/core/base.py in __getitem__(self, key)
227 if key not in self.obj:
--> 228 raise KeyError(f"Column not found: {key}")
229 return self._gotitem(key, ndim=1)
KeyError: 'Column not found: head'
The above exception was the direct cause of the following exception:
AttributeError Traceback (most recent call last)
<ipython-input-277-bf3c0aecfa21> in <module>
----> 1 result_ddf.head()
~/anaconda3/envs/rapids/lib/python3.7/site-packages/dask/dataframe/groupby.py in __getattr__(self, key)
1780 return self[key]
1781 except KeyError as e:
-> 1782 raise AttributeError(e) from e
1783
1784 @derived_from(pd.core.groupby.DataFrameGroupBy)
AttributeError: 'Column not found: head'
Things I have tried
question from:
https://stackoverflow.com/questions/65906581/dask-dataframe-groupby-dataframegroupby-error 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…