0% found this document useful (0 votes)
5 views28 pages

Pythonclass File

Uploaded by

Nitin Sahsani
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views28 pages

Pythonclass File

Uploaded by

Nitin Sahsani
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

In [ ]: import pandas as pd

In [ ]: df=pd.read_csv(r"C:\Users\Dell\Downloads\diabetes_unclean (1).csv")

In [ ]: df

In [ ]: df['total_values']=(df['LDL']+df['VLDL'])
df

In [ ]: df['medical condition']='good/bad'

In [ ]: df.loc[df['total_values']>=4,df['medical condition']]='bad'

In [ ]: df.loc[df['total_values']>=4,df['medical_condition']]='good'

In [ ]: df

In [1]: import pandas as pd

In [6]: india_weather= pd.DataFrame({'city':['mumbai','delhi','bangalore'],'temperature':[3

In [3]: india_weather

Out[3]: city temperature humidity

0 mumbai 36 68

1 delhi 35 65

2 bangalore 38 75

In [7]: us_weather= pd.DataFrame({'city':['new york','Houston','portland'],'temperature':[3

In [8]: us_weather

Out[8]: city temperature humidity

0 new york 36 23

1 Houston 25 65

2 portland 27 66

In [14]: df= pd.concat([india_weather,us_weather])

In [10]: df
Out[10]: city temperature humidity

0 mumbai 36 68

1 delhi 35 65

2 bangalore 38 75

0 new york 36 23

1 Houston 25 65

2 portland 27 66

In [12]: india_temp=pd.DataFrame({'city':['delhi','mumbai','bangalore'],
'temp':[23,36,46]})
india_temp

Out[12]: city temp

0 delhi 23

1 mumbai 36

2 bangalore 46

In [13]: india_humidity=pd.DataFrame({'city':['delhi','mumbai','bangalore'],
'humidity':[25,37,56]})
india_humidity

Out[13]: city humidity

0 delhi 25

1 mumbai 37

2 bangalore 56

In [16]: gg= pd.merge(india_temp,india_humidity,on='city')

In [17]: gg

Out[17]: city temp humidity

0 delhi 23 25

1 mumbai 36 37

2 bangalore 46 56

In [63]: df1=df.drop(columns=['temp','humidity'])
df1
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[63], line 1
----> 1 df1=df.drop(columns=['temp','humidity'])
2 df1

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:5581, in DataFrame.drop(sel


f, labels, axis, index, columns, level, inplace, errors)
5433 def drop(
5434 self,
5435 labels: IndexLabel | None = None,
(...)
5442 errors: IgnoreRaise = "raise",
5443 ) -> DataFrame | None:
5444 """
5445 Drop specified labels from rows or columns.
5446
(...)
5579 weight 1.0 0.8
5580 """
-> 5581 return super().drop(
5582 labels=labels,
5583 axis=axis,
5584 index=index,
5585 columns=columns,
5586 level=level,
5587 inplace=inplace,
5588 errors=errors,
5589 )

File ~\anaconda3\Lib\site-packages\pandas\core\generic.py:4788, in NDFrame.drop(sel


f, labels, axis, index, columns, level, inplace, errors)
4786 for axis, labels in axes.items():
4787 if labels is not None:
-> 4788 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
4790 if inplace:
4791 self._update_inplace(obj)

File ~\anaconda3\Lib\site-packages\pandas\core\generic.py:4830, in NDFrame._drop_axi


s(self, labels, axis, level, errors, only_slice)
4828 new_axis = axis.drop(labels, level=level, errors=errors)
4829 else:
-> 4830 new_axis = axis.drop(labels, errors=errors)
4831 indexer = axis.get_indexer(new_axis)
4833 # Case for non-unique axis
4834 else:

File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:7070, in Index.drop(s


elf, labels, errors)
7068 if mask.any():
7069 if errors != "ignore":
-> 7070 raise KeyError(f"{labels[mask].tolist()} not found in axis")
7071 indexer = indexer[~mask]
7072 return self.delete(indexer)

KeyError: "['temp', 'humidity'] not found in axis"


In [ ]: df1

In [23]: india_temp=pd.DataFrame({'temp':[23,36,46]})

In [25]: india_temp

Out[25]: temp

0 23

1 36

2 46

In [26]: df2=df1.append('india_temp')

---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_29584\2877941717.py in ?()
----> 1 df2=df1.append('india_temp')

~\anaconda3\Lib\site-packages\pandas\core\generic.py in ?(self, name)


6295 and name not in self._accessors
6296 and self._info_axis._can_hold_identifiers_and_holds_name(name)
6297 ):
6298 return self[name]
-> 6299 return object.__getattribute__(self, name)

AttributeError: 'DataFrame' object has no attribute 'append'

In [2]: import pandas as pd

In [3]: df=pd.read_csv(r"C:\Users\Dell\Downloads\diabetes_unclean (1).csv")

In [4]: df
Out[4]: ID No_Pation Gender AGE Urea Cr HbA1c Chol TG HDL LDL VLDL BM

0 502 17975 F 50.0 4.7 46.0 4.9 4.2 0.9 2.4 1.4 0.5 24.0

1 735 34221 M 26.0 4.5 62.0 4.9 3.7 1.4 1.1 2.1 0.6 23.0

2 420 47975 F 50.0 4.7 46.0 4.9 4.2 0.9 2.4 1.4 0.5 24.0

3 680 87656 F 50.0 4.7 46.0 4.9 4.2 0.9 2.4 1.4 0.5 24.0

4 504 34223 M 33.0 7.1 46.0 4.9 4.9 1.0 0.8 2.0 0.4 21.0

... ... ... ... ... ... ... ... ... ... ... ... ... ..

1004 191 454316 M 55.0 NaN 62.0 6.8 5.3 2.0 1.0 3.5 0.9 30.1

1005 192 454316 M 55.0 4.8 88.0 NaN 5.7 4.0 0.9 3.3 1.8 30.0

1006 193 454316 M 62.0 6.3 82.0 6.7 5.3 2.0 1.0 3.5 NaN 30.1

1007 194 454316 F 57.0 4.1 70.0 9.3 5.3 3.3 1.0 1.4 1.3 29.0

1008 195 4543 f 55.0 4.1 34.0 13.9 5.4 1.6 1.6 3.1 0.7 33.0

1009 rows × 14 columns

 

In [5]: h=df.sort_values(by=['HDL'])
h

Out[5]: ID No_Pation Gender AGE Urea Cr HbA1c Chol TG HDL LDL VLDL BMI

691 214 27458 F 55.0 4.5 34.0 10.4 4.7 2.5 0.2 3.4 1.1 28.36

759 193 48088 F 55.0 6.0 43.0 11.2 2.4 1.3 0.4 1.4 0.6 32.00

525 557 34417 M 55.0 4.2 68.0 8.3 4.8 3.1 0.4 2.4 1.4 33.00

764 619 34528 F 60.0 6.6 43.0 11.2 2.4 1.3 0.4 1.4 0.6 26.00

762 455 34527 M 60.0 6.2 35.0 8.5 5.8 2.9 0.4 3.0 1.6 32.00

... ... ... ... ... ... ... ... ... ... ... ... ... ..

415 787 34361 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00

412 443 24119 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00

816 181 45511 F 55.0 5.5 50.0 12.2 6.3 2.3 9.0 4.4 1.0 31.00

752 154 34522 F 56.0 4.5 57.0 11.8 3.9 3.1 9.9 1.8 1.4 33.00

999 248 24054 M 54.0 5.0 67.0 6.9 3.8 1.7 NaN 3.0 0.7 33.00

1009 rows × 14 columns

 
In [6]: h=df.sort_values(by=['HDL'],ascending=False)
h

Out[6]: ID No_Pation Gender AGE Urea Cr HbA1c Chol TG HDL LDL VLDL BMI

752 154 34522 F 56.0 4.5 57.0 11.8 3.9 3.1 9.9 1.8 1.4 33.00

816 181 45511 F 55.0 5.5 50.0 12.2 6.3 2.3 9.0 4.4 1.0 31.00

412 443 24119 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00

415 787 34361 F 52.0 6.4 88.0 8.5 8.5 0.8 6.6 1.3 1.3 33.00

447 220 34375 M 60.0 5.1 59.0 9.0 4.5 1.6 6.3 2.0 1.0 37.00

... ... ... ... ... ... ... ... ... ... ... ... ... ..

762 455 34527 M 60.0 6.2 35.0 8.5 5.8 2.9 0.4 3.0 1.6 32.00

764 619 34528 F 60.0 6.6 43.0 11.2 2.4 1.3 0.4 1.4 0.6 26.00

525 557 34417 M 55.0 4.2 68.0 8.3 4.8 3.1 0.4 2.4 1.4 33.00

691 214 27458 F 55.0 4.5 34.0 10.4 4.7 2.5 0.2 3.4 1.1 28.36

999 248 24054 M 54.0 5.0 67.0 6.9 3.8 1.7 NaN 3.0 0.7 33.00

1009 rows × 14 columns

 

In [16]: g=pd.DataFrame([[8,5,6],[2,8,9]], columns=['pen','pencil','sharpener'])


g

Out[16]: pen pencil sharpener

0 8 5 6

1 2 8 9

In [8]: h=pd.DataFrame([[9,8,3],[5,8,9]], columns=['pen','pencil','sharpener'])


h

Out[8]: pen pencil sharpener

0 9 8 3

1 5 8 9

In [2]: pip install numpy

Requirement already satisfied: numpy in c:\users\dell\anaconda3\lib\site-packages


(1.26.4)
Note: you may need to restart the kernel to use updated packages.
In [14]: h2=pd.concat([h,g],ignore_index=True)
h2

Out[14]: pen pencil sharpener

0 9 8 3

1 5 8 9

2 8 5 6

3 2 8 9

In [15]: h2=pd.concat([h,g])
h2

Out[15]: pen pencil sharpener

0 9 8 3

1 5 8 9

0 8 5 6

1 2 8 9

In [3]: import numpy as np

In [18]: z= [4,5,8]
z

Out[18]: [4, 5, 8]

In [19]: type(z)

Out[19]: list

In [21]: npa=np.array(z)
npa

Out[21]: array([4, 5, 8])

In [24]: print(type(npa))

<class 'numpy.ndarray'>

In [27]: a=np.array([0,1,2,3])
a

Out[27]: array([0, 1, 2, 3])

In [30]: a.ndim
Out[30]: 1

In [31]: d=np.array([[0,1,2,3],[0,4,5,3]])
d

Out[31]: array([[0, 1, 2, 3],


[0, 4, 5, 3]])

In [32]: d.ndim

Out[32]: 2

In [34]: mymatrix=[[1,2,3],[4,5,6],[8,9,7]]
mymatrix

Out[34]: [[1, 2, 3], [4, 5, 6], [8, 9, 7]]

In [35]: np.array(mymatrix).shape

Out[35]: (3, 3)

In [41]: c=np.array([[[0,1],[2,3]],[[4,5],[7,8]]])
c

Out[41]: array([[[0, 1],


[2, 3]],

[[4, 5],
[7, 8]]])

In [42]: c.ndim

Out[42]: 3

In [46]: a=np.linspace(2,9,9)
a

Out[46]: array([2. , 2.875, 3.75 , 4.625, 5.5 , 6.375, 7.25 , 8.125, 9. ])

In [51]: b=np.arange(10)
b

Out[51]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [52]: b=np.arange(1,10,2)
b

Out[52]: array([1, 3, 5, 7, 9])

In [54]: np.zeros(3)

Out[54]: array([0., 0., 0.])


In [55]: np.zeros((2,2))

Out[55]: array([[0., 0.],


[0., 0.]])

In [56]: np.ones((4,3))

Out[56]: array([[1., 1., 1.],


[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])

In [59]: c=np.eye(3,20)
c

Out[59]: array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0.]])

In [60]: a=np.diag([7,8,9,10])
a

Out[60]: array([[ 7, 0, 0, 0],


[ 0, 8, 0, 0],
[ 0, 0, 9, 0],
[ 0, 0, 0, 10]])

In [61]: a=np.random.randint(1,121,5)
a

Out[61]: array([117, 13, 2, 11, 120])

In [6]: a=np.array([1,2,3,4])
b=np.array([5,2,2,4])
a==b

Out[6]: array([False, True, False, True])

In [7]: a=np.array([1,2,3,4])
b=np.array([5,2,2,4])
c=np.array([1,2,3,4])

In [8]: np.array_equal(a,b)

Out[8]: False

In [9]: np.array_equal(a,c)

Out[9]: True
In [11]: a=np.array([1,1,0,0],dtype=bool)
b=np.array([1,0,1,0],dtype=bool)
np.logical_or(a,b)

Out[11]: array([ True, True, True, False])

In [12]: a=np.array([1,1,0,0],dtype=bool)
b=np.array([1,0,1,0],dtype=bool)
np.logical_and(a,b)

Out[12]: array([ True, False, False, False])

In [13]: a=np.arange(5)
np.sin(a)

Out[13]: array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ])

In [14]: np.log(a)

C:\Users\Dell\AppData\Local\Temp\ipykernel_20252\176755284.py:1: RuntimeWarning: div


ide by zero encountered in log
np.log(a)
Out[14]: array([ -inf, 0. , 0.69314718, 1.09861229, 1.38629436])

In [15]: np.exp(a)

Out[15]: array([ 1. , 2.71828183, 7.3890561 , 20.08553692, 54.59815003])

In [19]: x=np.array([1,2,3,4])
np.sum(x)

Out[19]: 10

In [20]: x=np.array([[1,1],[2,2]])
x

Out[20]: array([[1, 1],


[2, 2]])

In [21]: x.sum(axis=0)

Out[21]: array([3, 3])

In [22]: x.sum(axis=1)

Out[22]: array([2, 4])

In [23]: x=np.array([1,3,2])
x.min()

Out[23]: 1
In [24]: x.max()

Out[24]: 3

In [26]: x.argmin()

Out[26]: 0

In [27]: x.argmax()

Out[27]: 1

In [29]: arr=np.arange(0,10,1)
arr

Out[29]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]: np.mean(arr)

Out[30]: 4.5

In [31]: np.median(arr)

Out[31]: 4.5

In [32]: np.std(arr)

Out[32]: 2.8722813232690143

In [33]: np.min(arr)

Out[33]: 0

In [34]: np.max(arr)

Out[34]: 9

In [33]: import pandas as pd


import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]: df = pd.DataFrame({'Length':np.random.randn(500)})
df.head()
Out[3]: Length

0 -0.389981

1 0.207945

2 0.550208

3 1.229969

4 0.065271

In [4]: df['Length'].hist() #pandas


plt.show()

In [8]: plt.hist(df['Length'])
plt.show()
In [10]: plt.hist(df['Length'],bins=15)
plt.xlabel('Length')
plt.ylabel('Count')
plt.title('Histogram of Length')
plt.show()
In [11]: plt.hist(df['Length'],bins=15,orientation='horizontal')
plt.xlabel('Length')
plt.ylabel('Count')
plt.title('Histogram of Length')
plt.show()
In [50]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [14]: df = pd.DataFrame({'Length':np.random.randn(500)})
df.head()

Out[14]: Length

0 -1.141807

1 1.965798

2 -1.360123

3 -0.313278

4 1.165918

In [16]: plt.boxplot(df['Length'])
plt.show()
In [17]: plt.boxplot(df['Length'],notch=True)
plt.show()

In [19]: x=np.arange(1,11)
y=np.arange(11,21)
plt.scatter(x,y)
plt.show()

In [24]: x=np.arange(1,11)
y=np.arange(11,21)
plt.scatter(x,y)
plt.xlabel('x values')
plt.ylabel('y values')
plt.title('2D scatter plot')
plt.show()
In [29]: plt.plot([16,12,10,15,14,17,19,11,15,13],color='pink',linewidth=5.0)
plt.xlabel('x values')
plt.ylabel('y values')
plt.title('2D lines plot')
plt.grid()
plt.text(4,12,'Sales')
plt.show()
In [31]: x= [1,2.1,0.4,8.9,3.5,6.5,8.5,3.4]
y=[2,3.4,8.4,0.5,2.5,3.5,3.5,8.5]
plt.plot(x,y,color='turquoise',marker='+',linestyle='--',markersize=10)
plt.show()
In [34]: x=[1,2,3,4,5,6,7]
y=[45,45,42,48,46,42,41]

plt.subplot(2,2,1)
plt.plot(x,y,'r--')

plt.subplot(2,2,2)
plt.plot(x,y,'g*--')

plt.subplot(2,2,3)
plt.plot(x,y,'bo',linewidth=2, linestyle='dashed')

plt.subplot(2,2,4)
plt.plot(x,y,'go',linestyle='dashed')

Out[34]: [<matplotlib.lines.Line2D at 0x17174f96c00>]


In [35]: list=[20,80,15,10,20,5]
lbl=['facecream','facewash','toothpaste','soap','shampoo','moisturizer']

In [41]: plt.pie(list,labels=lbl,autopct='%1.1f%%')
plt.show()
In [48]: la= ['python','c++','java','ruby']
sizes=[215,130,205,210]
co=['gold','yellowgreen','red','lightskyblue']
explode=(0,0.1,0,0)

plt.pie(sizes,explode=explode, labels=la, colors=co, autopct='%1.1f%%')

plt.axis('equal')
plt.savefig('plot.jpg')
plt.show()

In [49]: marks=[81,90,95,85,72]
roll_no=[11,12,13,14,15]
plt.bar(roll_no, marks)
plt.xlabel('roll_no')
plt.ylabel('marks')
plt.show()
In [54]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [59]: companies=['Amazon','FB','Google','GE']
profit=[50,60,120,52]
revenue=[60,80,150,82]
plt.bar(companies,profit,label='Profit')

plt.title('Top US Firms Stocks')


plt.legend()
plt.show()
In [60]: companies=['Amazon','FB','Google','GE']
profit=[50,60,120,52]
revenue=[60,80,150,82]
plt.bar(companies,profit,label='Profit')
plt.bar(companies,revenue,label='Revenue')
plt.title('Top US Firms Stocks')
plt.legend()
plt.show()
In [61]: df=pd.read_csv(r"C:\Users\Dell\Downloads\tip.csv")

In [65]: df

Out[65]: total_bill tip sex smoker day time size

0 16.99 1.01 Female No Sun Dinner 2

1 10.34 1.66 Male No Sun Dinner 3

2 21.01 3.50 Male No Sun Dinner 3

3 23.68 3.31 Male No Sun Dinner 2

4 24.59 3.61 Female No Sun Dinner 4

... ... ... ... ... ... ... ...

239 29.03 5.92 Male No Sat Dinner 3

240 27.18 2.00 Female Yes Sat Dinner 2

241 22.67 2.00 Male Yes Sat Dinner 2

242 17.82 1.75 Male No Sat Dinner 2

243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns


In [64]: df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 total_bill 244 non-null float64
1 tip 244 non-null float64
2 sex 244 non-null object
3 smoker 244 non-null object
4 day 244 non-null object
5 time 244 non-null object
6 size 244 non-null int64
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB

In [67]: df.head()

Out[67]: total_bill tip sex smoker day time size

0 16.99 1.01 Female No Sun Dinner 2

1 10.34 1.66 Male No Sun Dinner 3

2 21.01 3.50 Male No Sun Dinner 3

3 23.68 3.31 Male No Sun Dinner 2

4 24.59 3.61 Female No Sun Dinner 4

In [68]: plt.bar(df['size'],df['total_bill'])
plt.xlabel('Size')
plt.ylabel('Total Bill')
plt.title('Total bill Vs Size')
plt.show()
In [73]: plt.plot(df['size'])

plt.plot(df['tip'])
plt.show()
In [ ]:

You might also like