MajorProject - Ipynb - Colaboratory
MajorProject - Ipynb - Colaboratory
ipynb - Colaboratory
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import ydata_profiling as pp
from sklearn.metrics import confusion_matrix,accuracy_score,roc_curve,classification_report
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
from sklearn import preprocessing
from collections import Counter
from sklearn import metrics
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = [12, 8]
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings('ignore')
drive.mount('/content/drive')
Mounted at /content/drive
df=pd.read_csv("/content/drive/MyDrive/Major project/train.csv")
df.head()
0 1 1 0 1 0 1 0 1 0
1 2 0 0 0 0 0 0 0 0
2 3 1 1 1 1 1 1 1 1
3 4 0 0 0 0 0 0 0 0
4 5 0 0 0 0 0 0 0 0
df.describe()
df.shape
(800, 22)
df.size
17600
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 1/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 22 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 800 non-null int64
1 A1_Score 800 non-null int64
2 A2_Score 800 non-null int64
3 A3_Score 800 non-null int64
4 A4_Score 800 non-null int64
5 A5_Score 800 non-null int64
6 A6_Score 800 non-null int64
7 A7_Score 800 non-null int64
8 A8_Score 800 non-null int64
9 A9_Score 800 non-null int64
10 A10_Score 800 non-null int64
11 age 800 non-null float64
12 gender 800 non-null object
13 ethnicity 800 non-null object
14 jaundice 800 non-null object
15 austim 800 non-null object
16 contry_of_res 800 non-null object
17 used_app_before 800 non-null object
18 result 800 non-null float64
19 age_desc 800 non-null object
20 relation 800 non-null object
21 Class/ASD 800 non-null int64
dtypes: float64(2), int64(12), object(8)
memory usage: 137.6+ KB
df.hist(figsize=(15, 15),grid=False)
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 2/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
df.isnull().sum()
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 3/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
ID 0
A1_Score 0
A2_Score 0
A3_Score 0
A4_Score 0
A5_Score 0
A6_Score 0
A7_Score 0
A8_Score 0
A9_Score 0
A10_Score 0
age 0
gender 0
ethnicity 0
jaundice 0
austim 0
contry_of_res 0
used_app_before 0
result 0
age_desc 0
relation 0
Class/ASD 0
dtype: int64
df.dtypes
ID int64
A1_Score int64
A2_Score int64
A3_Score int64
A4_Score int64
A5_Score int64
A6_Score int64
A7_Score int64
A8_Score int64
A9_Score int64
A10_Score int64
age float64
gender object
ethnicity object
jaundice object
austim object
contry_of_res object
used_app_before object
result float64
age_desc object
relation object
Class/ASD int64
dtype: object
df.duplicated().sum()
df["ethnicity"].value_counts()
White-European 257
? 203
Middle Eastern 97
Asian 67
Black 47
South Asian 34
Pasifika 32
Others 29
Latino 17
Hispanic 9
Turkish 5
others 3
Name: ethnicity, dtype: int64
df["austim"].value_counts()
no 669
yes 131
Name: austim, dtype: int64
ProfileReport(df)
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 4/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
Overview
Dataset statistics
Number of variables 22
Missing cells 0
Duplicate rows 0
Variable types
Numeric 3
Categorical 15
Boolean 3
Text 1
Alerts
# This line groups the train_data DataFrame by the 'Class/ASD' column and calculates the mean for the columns specified by score_features
df.groupby('Class/ASD')[score_features].mean().T.plot.bar()
plt.title('Mean score - Autism Spectrum Quotient (AQ) 10 item screening tool')
plt.xticks(ticks=range(len(score_features)), labels=[x.split('_')[0] for x in score_features], rotation=0);
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 5/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
df.groupby('ethnicity')['Class/ASD'].mean().sort_values().plot.bar()
plt.title('Ethnicity of the patient')
plt.xticks();
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 6/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
df['contry_of_res'].unique()
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 7/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
<Axes: >
import plotly.express as px
fig.show()
0
1
20.1%
79.9%
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 8/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
Class/ASD
United States New Zealand Jordan Canada Afghanistan Netherlands 1
1 1
1 0 0 0 0 1 0 0
1
0.8
Austria France Kazakhstan
Spain Ireland
1
0 0 1
0 1
0 0
1
0.6
United Arab Emirates
Iran Malaysia South Africa Viet Nam
AmericanSamoa
0
1
1
1
0
0 0 0 1 0
Australia Bahamas
0 1
0
1
0
0.2
Cyprus Oman Sweden
1
Azerbaijan Iceland
0 1 Brazil
0
1 Bolivia
Angola Belgium Egypt Hong Kong
0 1
0
Italy
1
0 China Niger
Argentina Burundi Japan
Sierra Leone
Ethiopia
1
0
Bangladesh Czech Republic
Romania Tonga
0
0 0 1
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 9/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
output
features = [
'age',
'jaundice',
'austim',
'result'
]
Collecting lazypredict
Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from lazypredict) (8.1.7)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from lazypredict) (1.2.2)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from lazypredict) (1.5.3)
Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from lazypredict) (4.66.2)
Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from lazypredict) (1.3.2)
Requirement already satisfied: lightgbm in /usr/local/lib/python3.10/dist-packages (from lazypredict) (4.1.0)
Requirement already satisfied: xgboost in /usr/local/lib/python3.10/dist-packages (from lazypredict) (2.0.3)
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from lightgbm->lazypredict) (1.25.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from lightgbm->lazypredict) (1.11.4)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->lazypredict) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->lazypredict) (2023.4)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->lazypredict) (3.3
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->lazypredict
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12
import lazypredict
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 10/11
3/14/24, 12:39 AM MajorProject.ipynb - Colaboratory
100%|██████████| 29/29 [00:01<00:00, 19.50it/s][LightGBM] [Info] Number of positive: 76, number of negative: 324
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000236 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 275
[LightGBM] [Info] Number of data points in the train set: 400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.190000 -> initscore=-1.450010
[LightGBM] [Info] Start training from score -1.450010
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
https://colab.research.google.com/drive/1RUQOzdtCsrC7GDSnMi1R0qacKcno1TK2#scrollTo=7BIqOTJ3rW06&printMode=true 11/11