In [ ]: import pandas as pd
In [ ]: df=pd.read_csv(r"C:\Users\Dell\Downloads\diabetes_unclean (1).csv")
In [ ]: df
In [ ]: df['total_values']=(df['LDL']+df['VLDL'])
df
In [ ]: df['medical condition']='good/bad'
In [ ]: df.loc[df['total_values']>=4,df['medical condition']]='bad'
In [ ]: df.loc[df['total_values']>=4,df['medical_condition']]='good'
In [ ]: df
In [1]: import pandas as pd
In [6]: india_weather= pd.DataFrame({'city':['mumbai','delhi','bangalore'],'temperature':[3
In [3]: india_weather
Out[3]: city temperature humidity
0 mumbai 36 68
1 delhi 35 65
2 bangalore 38 75
In [7]: us_weather= pd.DataFrame({'city':['new york','Houston','portland'],'temperature':[3
In [8]: us_weather
Out[8]: city temperature humidity
0 new york 36 23
1 Houston 25 65
2 portland 27 66
In [14]: df= pd.concat([india_weather,us_weather])
In [10]: df
Out[10]: city temperature humidity
0 mumbai 36 68
1 delhi 35 65
2 bangalore 38 75
0 new york 36 23
1 Houston 25 65
2 portland 27 66
In [12]: india_temp=pd.DataFrame({'city':['delhi','mumbai','bangalore'],
'temp':[23,36,46]})
india_temp
Out[12]: city temp
0 delhi 23
1 mumbai 36
2 bangalore 46
In [13]: india_humidity=pd.DataFrame({'city':['delhi','mumbai','bangalore'],
'humidity':[25,37,56]})
india_humidity
Out[13]: city humidity
0 delhi 25
1 mumbai 37
2 bangalore 56
In [16]: gg= pd.merge(india_temp,india_humidity,on='city')
In [17]: gg
Out[17]: city temp humidity
0 delhi 23 25
1 mumbai 36 37
2 bangalore 46 56
In [63]: df1=df.drop(columns=['temp','humidity'])
df1
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[63], line 1
----> 1 df1=df.drop(columns=['temp','humidity'])
2 df1
File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:5581, in DataFrame.drop(sel
f, labels, axis, index, columns, level, inplace, errors)
5433 def drop(
5434 self,
5435 labels: IndexLabel | None = None,
(...)
5442 errors: IgnoreRaise = "raise",
5443 ) -> DataFrame | None:
5444 """
5445 Drop specified labels from rows or columns.
5446
(...)
5579 weight 1.0 0.8
5580 """
-> 5581 return super().drop(
5582 labels=labels,
5583 axis=axis,
5584 index=index,
5585 columns=columns,
5586 level=level,
5587 inplace=inplace,
5588 errors=errors,
5589 )
File ~\anaconda3\Lib\site-packages\pandas\core\generic.py:4788, in NDFrame.drop(sel
f, labels, axis, index, columns, level, inplace, errors)
4786 for axis, labels in axes.items():
4787 if labels is not None:
-> 4788 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
4790 if inplace:
4791 self._update_inplace(obj)
File ~\anaconda3\Lib\site-packages\pandas\core\generic.py:4830, in NDFrame._drop_axi
s(self, labels, axis, level, errors, only_slice)
4828 new_axis = axis.drop(labels, level=level, errors=errors)
4829 else:
-> 4830 new_axis = axis.drop(labels, errors=errors)
4831 indexer = axis.get_indexer(new_axis)
4833 # Case for non-unique axis
4834 else:
File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:7070, in Index.drop(s
elf, labels, errors)
7068 if mask.any():
7069 if errors != "ignore":
-> 7070 raise KeyError(f"{labels[mask].tolist()} not found in axis")
7071 indexer = indexer[~mask]
7072 return self.delete(indexer)
KeyError: "['temp', 'humidity'] not found in axis"
In [ ]: df1
In [23]: india_temp=pd.DataFrame({'temp':[23,36,46]})
In [25]: india_temp
Out[25]: temp
0 23
1 36
2 46
In [26]: df2=df1.append('india_temp')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_29584\2877941717.py in ?()
----> 1 df2=df1.append('india_temp')
~\anaconda3\Lib\site-packages\pandas\core\generic.py in ?(self, name)
6295 and name not in self._accessors
6296 and self._info_axis._can_hold_identifiers_and_holds_name(name)
6297 ):
6298 return self[name]
-> 6299 return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'append'
In [2]: import pandas as pd
In [3]: df=pd.read_csv(r"C:\Users\Dell\Downloads\diabetes_unclean (1).csv")
In [4]: df
Out[4]: ID No_Pation Gender AGE Urea Cr HbA1c Chol TG HDL LDL VLDL BM
0 502 17975 F 50.0 4.7 46.0 4.9 4.2 0.9 2.4 1.4 0.5 24.0
1 735 34221 M 26.0 4.5 62.0 4.9 3.7 1.4 1.1 2.1 0.6 23.0
2 420 47975 F 50.0 4.7 46.0 4.9 4.2 0.9 2.4 1.4 0.5 24.0
3 680 87656 F 50.0 4.7 46.0 4.9 4.2 0.9 2.4 1.4 0.5 24.0
4 504 34223 M 33.0 7.1 46.0 4.9 4.9 1.0 0.8 2.0 0.4 21.0
... ... ... ... ... ... ... ... ... ... ... ... ... ..
1004 191 454316 M 55.0 NaN 62.0 6.8 5.3 2.0 1.0 3.5 0.9 30.1
1005 192 454316 M 55.0 4.8 88.0 NaN 5.7 4.0 0.9 3.3 1.8 30.0
1006 193 454316 M 62.0 6.3 82.0 6.7 5.3 2.0 1.0 3.5 NaN 30.1
1007 194 454316 F 57.0 4.1 70.0 9.3 5.3 3.3 1.0 1.4 1.3 29.0
1008 195 4543 f 55.0 4.1 34.0 13.9 5.4 1.6 1.6 3.1 0.7 33.0
1009 rows × 14 columns
In [5]: h=df.sort_values(by=['HDL'])
h
Out[5]: ID No_Pation Gender AGE Urea Cr HbA1c Chol TG HDL LDL VLDL BMI
691 214 27458 F 55.0 4.5 34.0 10.4 4.7 2.5 0.2 3.4 1.1 28.36
759 193 48088 F 55.0 6.0 43.0 11.2 2.4 1.3 0.4 1.4 0.6 32.00
525 557 34417 M 55.0 4.2 68.0 8.3 4.8 3.1 0.4 2.4 1.4 33.00
764 619 34528 F 60.0 6.6 43.0 11.2 2.4 1.3 0.4 1.4 0.6 26.00
762 455 34527 M 60.0 6.2 35.0 8.5 5.8 2.9 0.4 3.0 1.6 32.00
... ... ... ... ... ... ... ... ... ... ... ... ... ..
415 787 34361 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00
412 443 24119 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00
816 181 45511 F 55.0 5.5 50.0 12.2 6.3 2.3 9.0 4.4 1.0 31.00
752 154 34522 F 56.0 4.5 57.0 11.8 3.9 3.1 9.9 1.8 1.4 33.00
999 248 24054 M 54.0 5.0 67.0 6.9 3.8 1.7 NaN 3.0 0.7 33.00
1009 rows × 14 columns
In [6]: h=df.sort_values(by=['HDL'],ascending=False)
h
Out[6]: ID No_Pation Gender AGE Urea Cr HbA1c Chol TG HDL LDL VLDL BMI
752 154 34522 F 56.0 4.5 57.0 11.8 3.9 3.1 9.9 1.8 1.4 33.00
816 181 45511 F 55.0 5.5 50.0 12.2 6.3 2.3 9.0 4.4 1.0 31.00
412 443 24119 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00
415 787 34361 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00
447 220 34375 M 60.0 5.1 59.0 9.0 4.5 1.6 6.3 2.0 1.0 37.00
... ... ... ... ... ... ... ... ... ... ... ... ... ..
762 455 34527 M 60.0 6.2 35.0 8.5 5.8 2.9 0.4 3.0 1.6 32.00
764 619 34528 F 60.0 6.6 43.0 11.2 2.4 1.3 0.4 1.4 0.6 26.00
525 557 34417 M 55.0 4.2 68.0 8.3 4.8 3.1 0.4 2.4 1.4 33.00
691 214 27458 F 55.0 4.5 34.0 10.4 4.7 2.5 0.2 3.4 1.1 28.36
999 248 24054 M 54.0 5.0 67.0 6.9 3.8 1.7 NaN 3.0 0.7 33.00
1009 rows × 14 columns
In [16]: g=pd.DataFrame([[8,5,6],[2,8,9]], columns=['pen','pencil','sharpener'])
g
Out[16]: pen pencil sharpener
0 8 5 6
1 2 8 9
In [8]: h=pd.DataFrame([[9,8,3],[5,8,9]], columns=['pen','pencil','sharpener'])
h
Out[8]: pen pencil sharpener
0 9 8 3
1 5 8 9
In [2]: pip install numpy
Requirement already satisfied: numpy in c:\users\dell\anaconda3\lib\site-packages
(1.26.4)
Note: you may need to restart the kernel to use updated packages.
In [14]: h2=pd.concat([h,g],ignore_index=True)
h2
Out[14]: pen pencil sharpener
0 9 8 3
1 5 8 9
2 8 5 6
3 2 8 9
In [15]: h2=pd.concat([h,g])
h2
Out[15]: pen pencil sharpener
0 9 8 3
1 5 8 9
0 8 5 6
1 2 8 9
In [3]: import numpy as np
In [18]: z= [4,5,8]
z
Out[18]: [4, 5, 8]
In [19]: type(z)
Out[19]: list
In [21]: npa=np.array(z)
npa
Out[21]: array([4, 5, 8])
In [24]: print(type(npa))
<class 'numpy.ndarray'>
In [27]: a=np.array([0,1,2,3])
a
Out[27]: array([0, 1, 2, 3])
In [30]: a.ndim
Out[30]: 1
In [31]: d=np.array([[0,1,2,3],[0,4,5,3]])
d
Out[31]: array([[0, 1, 2, 3],
[0, 4, 5, 3]])
In [32]: d.ndim
Out[32]: 2
In [34]: mymatrix=[[1,2,3],[4,5,6],[8,9,7]]
mymatrix
Out[34]: [[1, 2, 3], [4, 5, 6], [8, 9, 7]]
In [35]: np.array(mymatrix).shape
Out[35]: (3, 3)
In [41]: c=np.array([[[0,1],[2,3]],[[4,5],[7,8]]])
c
Out[41]: array([[[0, 1],
[2, 3]],
[[4, 5],
[7, 8]]])
In [42]: c.ndim
Out[42]: 3
In [46]: a=np.linspace(2,9,9)
a
Out[46]: array([2. , 2.875, 3.75 , 4.625, 5.5 , 6.375, 7.25 , 8.125, 9. ])
In [51]: b=np.arange(10)
b
Out[51]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [52]: b=np.arange(1,10,2)
b
Out[52]: array([1, 3, 5, 7, 9])
In [54]: np.zeros(3)
Out[54]: array([0., 0., 0.])
In [55]: np.zeros((2,2))
Out[55]: array([[0., 0.],
[0., 0.]])
In [56]: np.ones((4,3))
Out[56]: array([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
In [59]: c=np.eye(3,20)
c
Out[59]: array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0.]])
In [60]: a=np.diag([7,8,9,10])
a
Out[60]: array([[ 7, 0, 0, 0],
[ 0, 8, 0, 0],
[ 0, 0, 9, 0],
[ 0, 0, 0, 10]])
In [61]: a=np.random.randint(1,121,5)
a
Out[61]: array([117, 13, 2, 11, 120])
In [6]: a=np.array([1,2,3,4])
b=np.array([5,2,2,4])
a==b
Out[6]: array([False, True, False, True])
In [7]: a=np.array([1,2,3,4])
b=np.array([5,2,2,4])
c=np.array([1,2,3,4])
In [8]: np.array_equal(a,b)
Out[8]: False
In [9]: np.array_equal(a,c)
Out[9]: True
In [11]: a=np.array([1,1,0,0],dtype=bool)
b=np.array([1,0,1,0],dtype=bool)
np.logical_or(a,b)
Out[11]: array([ True, True, True, False])
In [12]: a=np.array([1,1,0,0],dtype=bool)
b=np.array([1,0,1,0],dtype=bool)
np.logical_and(a,b)
Out[12]: array([ True, False, False, False])
In [13]: a=np.arange(5)
np.sin(a)
Out[13]: array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ])
In [14]: np.log(a)
C:\Users\Dell\AppData\Local\Temp\ipykernel_20252\176755284.py:1: RuntimeWarning: div
ide by zero encountered in log
np.log(a)
Out[14]: array([ -inf, 0. , 0.69314718, 1.09861229, 1.38629436])
In [15]: np.exp(a)
Out[15]: array([ 1. , 2.71828183, 7.3890561 , 20.08553692, 54.59815003])
In [19]: x=np.array([1,2,3,4])
np.sum(x)
Out[19]: 10
In [20]: x=np.array([[1,1],[2,2]])
x
Out[20]: array([[1, 1],
[2, 2]])
In [21]: x.sum(axis=0)
Out[21]: array([3, 3])
In [22]: x.sum(axis=1)
Out[22]: array([2, 4])
In [23]: x=np.array([1,3,2])
x.min()
Out[23]: 1
In [24]: x.max()
Out[24]: 3
In [26]: x.argmin()
Out[26]: 0
In [27]: x.argmax()
Out[27]: 1
In [29]: arr=np.arange(0,10,1)
arr
Out[29]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [30]: np.mean(arr)
Out[30]: 4.5
In [31]: np.median(arr)
Out[31]: 4.5
In [32]: np.std(arr)
Out[32]: 2.8722813232690143
In [33]: np.min(arr)
Out[33]: 0
In [34]: np.max(arr)
Out[34]: 9
In [33]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [3]: df = pd.DataFrame({'Length':np.random.randn(500)})
df.head()
Out[3]: Length
0 -0.389981
1 0.207945
2 0.550208
3 1.229969
4 0.065271
In [4]: df['Length'].hist() #pandas
plt.show()
In [8]: plt.hist(df['Length'])
plt.show()
In [10]: plt.hist(df['Length'],bins=15)
plt.xlabel('Length')
plt.ylabel('Count')
plt.title('Histogram of Length')
plt.show()
In [11]: plt.hist(df['Length'],bins=15,orientation='horizontal')
plt.xlabel('Length')
plt.ylabel('Count')
plt.title('Histogram of Length')
plt.show()
In [50]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [14]: df = pd.DataFrame({'Length':np.random.randn(500)})
df.head()
Out[14]: Length
0 -1.141807
1 1.965798
2 -1.360123
3 -0.313278
4 1.165918
In [16]: plt.boxplot(df['Length'])
plt.show()
In [17]: plt.boxplot(df['Length'],notch=True)
plt.show()
In [19]: x=np.arange(1,11)
y=np.arange(11,21)
plt.scatter(x,y)
plt.show()
In [24]: x=np.arange(1,11)
y=np.arange(11,21)
plt.scatter(x,y)
plt.xlabel('x values')
plt.ylabel('y values')
plt.title('2D scatter plot')
plt.show()
In [29]: plt.plot([16,12,10,15,14,17,19,11,15,13],color='pink',linewidth=5.0)
plt.xlabel('x values')
plt.ylabel('y values')
plt.title('2D lines plot')
plt.grid()
plt.text(4,12,'Sales')
plt.show()
In [31]: x= [1,2.1,0.4,8.9,3.5,6.5,8.5,3.4]
y=[2,3.4,8.4,0.5,2.5,3.5,3.5,8.5]
plt.plot(x,y,color='turquoise',marker='+',linestyle='--',markersize=10)
plt.show()
In [34]: x=[1,2,3,4,5,6,7]
y=[45,45,42,48,46,42,41]
plt.subplot(2,2,1)
plt.plot(x,y,'r--')
plt.subplot(2,2,2)
plt.plot(x,y,'g*--')
plt.subplot(2,2,3)
plt.plot(x,y,'bo',linewidth=2, linestyle='dashed')
plt.subplot(2,2,4)
plt.plot(x,y,'go',linestyle='dashed')
Out[34]: [<matplotlib.lines.Line2D at 0x17174f96c00>]
In [35]: list=[20,80,15,10,20,5]
lbl=['facecream','facewash','toothpaste','soap','shampoo','moisturizer']
In [41]: plt.pie(list,labels=lbl,autopct='%1.1f%%')
plt.show()
In [48]: la= ['python','c++','java','ruby']
sizes=[215,130,205,210]
co=['gold','yellowgreen','red','lightskyblue']
explode=(0,0.1,0,0)
plt.pie(sizes,explode=explode, labels=la, colors=co, autopct='%1.1f%%')
plt.axis('equal')
plt.savefig('plot.jpg')
plt.show()
In [49]: marks=[81,90,95,85,72]
roll_no=[11,12,13,14,15]
plt.bar(roll_no, marks)
plt.xlabel('roll_no')
plt.ylabel('marks')
plt.show()
In [54]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [59]: companies=['Amazon','FB','Google','GE']
profit=[50,60,120,52]
revenue=[60,80,150,82]
plt.bar(companies,profit,label='Profit')
plt.title('Top US Firms Stocks')
plt.legend()
plt.show()
In [60]: companies=['Amazon','FB','Google','GE']
profit=[50,60,120,52]
revenue=[60,80,150,82]
plt.bar(companies,profit,label='Profit')
plt.bar(companies,revenue,label='Revenue')
plt.title('Top US Firms Stocks')
plt.legend()
plt.show()
In [61]: df=pd.read_csv(r"C:\Users\Dell\Downloads\tip.csv")
In [65]: df
Out[65]: total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2
244 rows × 7 columns
In [64]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 total_bill 244 non-null float64
1 tip 244 non-null float64
2 sex 244 non-null object
3 smoker 244 non-null object
4 day 244 non-null object
5 time 244 non-null object
6 size 244 non-null int64
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB
In [67]: df.head()
Out[67]: total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [68]: plt.bar(df['size'],df['total_bill'])
plt.xlabel('Size')
plt.ylabel('Total Bill')
plt.title('Total bill Vs Size')
plt.show()
In [73]: plt.plot(df['size'])
plt.plot(df['tip'])
plt.show()
In [ ]: