from hulearn.datasets import *
load_fish(return_X_y=False, as_frame=False)
Show source code in hulearn/datasets.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64 | def load_fish(return_X_y: bool = False, as_frame: bool = False):
"""
Loads in a subset of the Fish market dataset. You can find the full dataset [here](https://www.kaggle.com/aungpyaeap/fish-market).
Arguments:
return_X_y: return a tuple of (`X`, `y`) for convenience
as_frame: return all the data as a pandas dataframe
Usage:
```python
from hulearn.datasets import load_fish
df = load_fish(as_frame=True)
X, y = load_fish(return_X_y=True)
```
"""
filepath = resource_filename("hulearn", os.path.join("data", "fish.zip"))
df = pd.read_csv(filepath)
if as_frame:
return df
X, y = (
df[["Species", "Length1", "Length2", "Length3", "Height", "Width"]].values,
df["Weight"].values,
)
if return_X_y:
return X, y
return {"data": X, "target": y}
|
Loads in a subset of the Fish market dataset. You can find the full dataset here.
Parameters
| Name |
Type |
Description |
Default |
return_X_y |
bool |
return a tuple of (X, y) for convenience |
False |
as_frame |
bool |
return all the data as a pandas dataframe |
False |
Usage:
from hulearn.datasets import load_fish
df = load_fish(as_frame=True)
X, y = load_fish(return_X_y=True)
load_titanic(return_X_y=False, as_frame=False)
Show source code in hulearn/datasets.py
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 | def load_titanic(return_X_y: bool = False, as_frame: bool = False):
"""
Loads in a subset of the titanic dataset. You can find the full dataset [here](https://www.kaggle.com/c/titanic/data).
Arguments:
return_X_y: return a tuple of (`X`, `y`) for convenience
as_frame: return all the data as a pandas dataframe
Usage:
```python
from hulearn.datasets import load_titanic
df = load_titanic(as_frame=True)
X, y = load_titanic(return_X_y=True)
```
"""
filepath = resource_filename("hulearn", os.path.join("data", "titanic.zip"))
df = pd.read_csv(filepath)
if as_frame:
return df
X, y = (
df[["pclass", "name", "sex", "age", "fare", "sibsp", "parch"]].values,
df["survived"].values,
)
if return_X_y:
return X, y
return {"data": X, "target": y}
|
Loads in a subset of the titanic dataset. You can find the full dataset here.
Parameters
| Name |
Type |
Description |
Default |
return_X_y |
bool |
return a tuple of (X, y) for convenience |
False |
as_frame |
bool |
return all the data as a pandas dataframe |
False |
Usage:
from hulearn.datasets import load_titanic
df = load_titanic(as_frame=True)
X, y = load_titanic(return_X_y=True)