PANDAS
4. Explore Pandas Data Structures.
[8]: import pandas as pd
from pandas import Series, DataFrame
[3]: obj = pd.Series([4, 7, -5, 3])
obj
[3]: 0 4
1 7
2 -5
3 3
dtype: int64
[4]: obj2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
obj2
[4]: d 4
b 7
a -5
c 3
dtype: int64
[5]: obj2.index
[5]: Index(['d', 'b', 'a', 'c'], dtype='object')
[6]: obj2[obj2 > 0]
[6]: d 4
b 7
c 3
dtype: int64
[9]: sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3
1
Manav Tarsariya ET22BTIT132
[9]: Ohio 35000
Texas 71000
Oregon 16000
Utah 5000
dtype: int64
[10]: states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = pd.Series(sdata, index=states)
obj4
[10]: California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
dtype: float64
[11]: obj3 + obj4
[11]: California NaN
Ohio 70000.0
Oregon 32000.0
Texas 142000.0
Utah NaN
dtype: float64
[12]: data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame
[12]: state year pop
0 Ohio 2000 1.5
1 Ohio 2001 1.7
2 Ohio 2002 3.6
3 Nevada 2001 2.4
4 Nevada 2002 2.9
5 Nevada 2003 3.2
[13]: pd.DataFrame(data, columns=['year', 'state', 'pop'])
[13]: year state pop
0 2000 Ohio 1.5
1 2001 Ohio 1.7
2 2002 Ohio 3.6
3 2001 Nevada 2.4
4 2002 Nevada 2.9
Manav Tarsariya ET22BTIT132
5 2003 Nevada 3.2
[15]: frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
index=['one', 'two', 'three', 'four','five', 'six'])
frame2
[15]: year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 NaN
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN
six 2003 Nevada 3.2 NaN
[16]: frame2.columns
[16]: Index(['year', 'state', 'pop', 'debt'], dtype='object')
[17]: frame2['state']
[17]: one Ohio
two Ohio
three Ohio
four Nevada
five Nevada
six Nevada
Name: state, dtype: object
[18]: frame.year
[18]: 0 2000
1 2001
2 2002
3 2001
4 2002
5 2003
Name: year, dtype: int64
[19]: frame2.loc['three']
[19]: year 2002
state Ohio
pop 3.6
debt NaN
Name: three, dtype: object
[20]: frame2['debt'] = 16.5
3
Manav Tarsariya
ET22BTIT132
[21]: frame2
[21]: year state pop debt
one 2000 Ohio 1.5 16.5
two 2001 Ohio 1.7 16.5
three 2002 Ohio 3.6 16.5
four 2001 Nevada 2.4 16.5
five 2002 Nevada 2.9 16.5
six 2003 Nevada 3.2 16.5
[22]: import numpy as np
frame2['debt'] = np.arange(6.)
[23]: frame2
[23]: year state pop debt
one 2000 Ohio 1.5 0.0
two 2001 Ohio 1.7 1.0
three 2002 Ohio 3.6 2.0
four 2001 Nevada 2.4 3.0
five 2002 Nevada 2.9 4.0
six 2003 Nevada 3.2 5.0
[24]: val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2
[24]: year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 -1.2
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 -1.5
five 2002 Nevada 2.9 -1.7
six 2003 Nevada 3.2 NaN
[25]: frame2['eastern'] = frame2.state == 'Ohio'
frame2
[25]: year state pop debt eastern
one 2000 Ohio 1.5 NaN True
two 2001 Ohio 1.7 -1.2 True
three 2002 Ohio 3.6 NaN True
four 2001 Nevada 2.4 -1.5 False
five 2002 Nevada 2.9 -1.7 False
six 2003 Nevada 3.2 NaN False
4
Manav Tarsariya ET22BTIT132
[26]: del frame2['eastern']
frame2.columns
[26]: Index(['year', 'state', 'pop', 'debt'], dtype='object')
[27]: pop = {'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.
↪6}}
frame4 = pd.DataFrame(pop)
frame4
[27]: Nevada Ohio
2001 2.4 1.7
2002 2.9 3.6
2000 NaN 1.5
[28]: frame3 = frame4
frame3
[28]: Nevada Ohio
2001 2.4 1.7
2002 2.9 3.6
2000 NaN 1.5
[29]: frame3.T
[29]: 2001 2002 2000
Nevada 2.4 2.9 NaN
Ohio 1.7 3.6 1.5
[30]: pd.DataFrame(pop, index=[2001, 2002, 2003])
[30]: Nevada Ohio
2001 2.4 1.7
2002 2.9 3.6
2003 NaN NaN
[31]: frame3.index.name = 'year'
frame3.columns.name = 'state'
frame3
[31]: state Nevada Ohio
year
2001 2.4 1.7
2002 2.9 3.6
2000 NaN 1.5
[35]: frame3.values
5
Manav Tarsariya ET22BTIT132
[35]: array([[2.4, 1.7],
[2.9, 3.6],
[nan, 1.5]])
[36]: frame2.values
[36]: array([[2000, 'Ohio', 1.5, nan],
[2001, 'Ohio', 1.7, -1.2],
[2002, 'Ohio', 3.6, nan],
[2001, 'Nevada', 2.4, -1.5],
[2002, 'Nevada', 2.9, -1.7],
[2003, 'Nevada', 3.2, nan]], dtype=object)
[37]: labels = pd.Index(np.arange(3))
labels
[37]: Int64Index([0, 1, 2], dtype='int64')
[38]: obj2 = pd.Series([1.5, -2.5, 0], index=labels)
obj2
[38]: 0 1.5
1 -2.5
2 0.0
dtype: float64
[41]: 'Ohio' in frame3.columns
[41]: True
[42]: 2003 in frame3.index
[42]: False
[43]: obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj
[43]: d 4.5
b 7.2
a -5.3
c 3.6
dtype: float64
[44]: obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2
6
Manav Tarsariya ET22BTIT132
[44]: a -5.3
b 7.2
c 3.6
d 4.5
e NaN
dtype: float64
[45]: obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3
[45]: 0 blue
2 purple
4 yellow
dtype: object
[46]: obj3.reindex(range(6), method='ffill')
[46]: 0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
[47]: frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
index=['a', 'c', 'd'],
columns=['Ohio', 'Texas', 'California'])
frame
[47]: Ohio Texas California
a 0 1 2
c 3 4 5
d 6 7 8
[49]: frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2
[49]: Ohio Texas California
a 0.0 1.0 2.0
b NaN NaN NaN
c 3.0 4.0 5.0
d 6.0 7.0 8.0
[50]: states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)
7
Manav Tarsariya ET22BTIT132
[50]: Texas Utah California
a 1 NaN 2
c 4 NaN 5
d 7 NaN 8
[51]: obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj
[51]: a 0.0
b 1.0
c 2.0
d 3.0
e 4.0
dtype: float64
[52]: new_obj = obj.drop('c')
new_obj
[52]: a 0.0
b 1.0
d 3.0
e 4.0
dtype: float64
[53]: data = pd.DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
data
[53]: one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
[54]: data.drop(['Colorado', 'Ohio'])
[54]: one two three four
Utah 8 9 10 11
New York 12 13 14 15
[55]: data.drop('two', axis=1)
[55]: one three four
Ohio 0 2 3
Colorado 4 6 7
Utah 8 10 11
8
Manav Tarsariya ET22BTIT132
New York 12 14 15
[56]: data.drop(['two', 'four'], axis='columns')
[56]: one three
Ohio 0 2
Colorado 4 6
Utah 8 10
New York 12 14
[57]: data['two']
[57]: Ohio 1
Colorado 5
Utah 9
New York 13
Name: two, dtype: int32
[58]: data = pd.DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
data
[58]: one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
[59]: data[['three', 'one']]
[59]: three one
Ohio 2 0
Colorado 6 4
Utah 10 8
New York 14 12
[60]: data[:2]
[60]: one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7
[61]: data[data['three'] > 5]
[61]: one two three four
Colorado 4 5 6 7
9
Manav Tarsariya
ET22BTIT132
Utah 8 9 10 11
New York 12 13 14 15
[62]: data.loc['Colorado', ['two', 'three']]
[62]: two 5
three 6
Name: Colorado, dtype: int32
[63]: data.iloc[2, [3, 0, 1]]
[63]: four 11
one 8
two 9
Name: Utah, dtype: int32
[64]: data.iloc[[1, 2], [3, 0, 1]]
[64]: four one two
Colorado 7 4 5
Utah 11 8 9
[65]: data.loc[:'Utah', 'two']
[65]: Ohio 1
Colorado 5
Utah 9
Name: two, dtype: int32
[66]: data.iloc[:, :3][data.three > 5]
[66]: one two three
Colorado 4 5 6
Utah 8 9 10
New York 12 13 14
[67]: s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],index=['a', 'c', 'e', 'f', 'g'])
s1+s2
[67]: a 5.2
c 1.1
d NaN
e 0.0
f NaN
g NaN
dtype: float64
10
Manav Tarsariya ET22BTIT132
[68]: s2+s1
[68]: a 5.2
c 1.1
d NaN
e 0.0
f NaN
g NaN
dtype: float64
[69]: df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)),␣
↪columns=list('bcd'),index=['Ohio', 'Texas', 'Colorado'])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)),␣
↪columns=list('bde'),index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df1
[69]: b c d
Ohio 0.0 1.0 2.0
Texas 3.0 4.0 5.0
Colorado 6.0 7.0 8.0
[70]: df2
[70]: b d e
Utah 0.0 1.0 2.0
Ohio 3.0 4.0 5.0
Texas 6.0 7.0 8.0
Oregon 9.0 10.0 11.0
[71]: df1+df2
[71]: b c d e
Colorado NaN NaN NaN NaN
Ohio 3.0 NaN 6.0 NaN
Oregon NaN NaN NaN NaN
Texas 9.0 NaN 12.0 NaN
Utah NaN NaN NaN NaN
[72]: df2+df1
[72]: b c d e
Colorado NaN NaN NaN NaN
Ohio 3.0 NaN 6.0 NaN
Oregon NaN NaN NaN NaN
Texas 9.0 NaN 12.0 NaN
Utah NaN NaN NaN NaN
11
Manav Tarsariya ET22BTIT132
[73]: df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)),columns=list('abcd'))
df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))
df2.loc[1, 'b'] = np.nan
df1
[73]: a b c d
0 0.0 1.0 2.0 3.0
1 4.0 5.0 6.0 7.0
2 8.0 9.0 10.0 11.0
[74]: df2
[74]: a b c d e
0 0.0 1.0 2.0 3.0 4.0
1 5.0 NaN 7.0 8.0 9.0
2 10.0 11.0 12.0 13.0 14.0
3 15.0 16.0 17.0 18.0 19.0
[75]: df1+df2
[75]: a b c d e
0 0.0 2.0 4.0 6.0 NaN
1 9.0 NaN 13.0 15.0 NaN
2 18.0 20.0 22.0 24.0 NaN
3 NaN NaN NaN NaN NaN
[76]: df2+df1
[76]: a b c d e
0 0.0 2.0 4.0 6.0 NaN
1 9.0 NaN 13.0 15.0 NaN
2 18.0 20.0 22.0 24.0 NaN
3 NaN NaN NaN NaN NaN
[77]: df1
[77]: a b c d
0 0.0 1.0 2.0 3.0
1 4.0 5.0 6.0 7.0
2 8.0 9.0 10.0 11.0
[78]: df2
[78]: a b c d e
0 0.0 1.0 2.0 3.0 4.0
1 5.0 NaN 7.0 8.0 9.0
2 10.0 11.0 12.0 13.0 14.0
12
Manav Tarsariya ET22BTIT132
3 15.0 16.0 17.0 18.0 19.0
[79]: df2.loc[1, 'b'] = 6.0
[80]: df2
[80]: a b c d e
0 0.0 1.0 2.0 3.0 4.0
1 5.0 6.0 7.0 8.0 9.0
2 10.0 11.0 12.0 13.0 14.0
3 15.0 16.0 17.0 18.0 19.0
[81]: df1.add(df2, fill_value=0)
[81]: a b c d e
0 0.0 2.0 4.0 6.0 4.0
1 9.0 11.0 13.0 15.0 9.0
2 18.0 20.0 22.0 24.0 14.0
3 15.0 16.0 17.0 18.0 19.0
[83]: df2.add(df1, fill_value=0) //special case
File "<ipython-input-83-f863dd22da70>", line 1
df2.add(df1, fill_value=0) //special case
^
SyntaxError: invalid syntax
[84]: 1 / df1
[84]: a b c d
0 inf 1.000000 0.500000 0.333333
1 0.250 0.200000 0.166667 0.142857
2 0.125 0.111111 0.100000 0.090909
[85]: df1.rdiv(1)
[85]: a b c d
0 inf 1.000000 0.500000 0.333333
1 0.250 0.200000 0.166667 0.142857
2 0.125 0.111111 0.100000 0.090909
[86]: df1.reindex(columns=df2.columns, fill_value=0)
[86]: a b c d e
0 0.0 1.0 2.0 3.0 0
13
Manav Tarsariya ET22BTIT132
1 4.0 5.0 6.0 7.0 0
2 8.0 9.0 10.0 11.0 0
[87]: arr = np.arange(12.).reshape((3, 4))
arr
[87]: array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
[88]: arr[0]
[88]: array([0., 1., 2., 3.])
[89]: arr-arr[0]
[89]: array([[0., 0., 0., 0.],
[4., 4., 4., 4.],
[8., 8., 8., 8.]])
[90]: frame = pd.DataFrame(np.arange(12.).reshape((4, 3)),␣
↪columns=list('bde'),index=['Utah', 'Ohio', 'Texas', 'Oregon'])
series = frame.iloc[0]
series
[90]: b 0.0
d 1.0
e 2.0
Name: Utah, dtype: float64
[91]: frame
[91]: b d e
Utah 0.0 1.0 2.0
Ohio 3.0 4.0 5.0
Texas 6.0 7.0 8.0
Oregon 9.0 10.0 11.0
[92]: series3 = frame['d']
series3
[92]: Utah 1.0
Ohio 4.0
Texas 7.0
Oregon 10.0
Name: d, dtype: float64
14
Manav Tarsariya ET22BTIT132
[93]: frame.sub(series3, axis='index')
[93]: b d e
Utah -1.0 0.0 1.0
Ohio -1.0 0.0 1.0
Texas -1.0 0.0 1.0
Oregon -1.0 0.0 1.0
[94]: frame.sub(series3, axis='columns')
[94]: Ohio Oregon Texas Utah b d e
Utah NaN NaN NaN NaN NaN NaN NaN
Ohio NaN NaN NaN NaN NaN NaN NaN
Texas NaN NaN NaN NaN NaN NaN NaN
Oregon NaN NaN NaN NaN NaN NaN NaN
[95]: frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),index=['Utah',␣
↪'Ohio', 'Texas', 'Oregon'])
frame
[95]: b d e
Utah -1.878290 -0.008359 -0.423879
Ohio -1.838317 -0.319728 -1.481255
Texas 0.265776 -0.403625 0.374745
Oregon 0.671574 -0.775854 1.068877
[96]: np.abs(frame)
[96]: b d e
Utah 1.878290 0.008359 0.423879
Ohio 1.838317 0.319728 1.481255
Texas 0.265776 0.403625 0.374745
Oregon 0.671574 0.775854 1.068877
[97]: f = lambda x: x.max() - x.min()
frame.apply(f)
[97]: b 2.549863
d 0.767494
e 2.550132
dtype: float64
[98]: frame.apply(f, axis='columns')
[98]: Utah 1.869930
Ohio 1.518589
Texas 0.778369
15
Manav Tarsariya ET22BTIT132
Oregon 1.844731
dtype: float64
[99]: frame
[99]: b d e
Utah -1.878290 -0.008359 -0.423879
Ohio -1.838317 -0.319728 -1.481255
Texas 0.265776 -0.403625 0.374745
Oregon 0.671574 -0.775854 1.068877
[100]: def f(x): return pd.Series([x.min(), x.max()], index=['min', 'max'])
frame.apply(f)
[100]: b d e
min -1.878290 -0.775854 -1.481255
max 0.671574 -0.008359 1.068877
[101]: f = lambda x: x*x
frame.apply(f)
[101]: b d e
Utah 3.527972 0.000070 0.179674
Ohio 3.379409 0.102226 2.194117
Texas 0.070637 0.162913 0.140434
Oregon 0.451011 0.601949 1.142498
[105]: f = lambda x: for i in x:
fact=fact*i
x=fact
frame.apply(f)
File "<ipython-input-105-bfc1b36dd0fd>", line 1
f = lambda x: for i in x:
^
SyntaxError: invalid syntax
[108]: def f(x):
d=x
fact=1
for i in d:
fact=fact*i
d=d-1
x=fact
16
Manav Tarsariya ET22BTIT132
[109]: frame.apply(f)
[109]: b None
d None
e None
dtype: object
[114]: f = lambda x: d=x ; fact=1 ; for i in d:
fact = fact*i
d=d-1
x=fact
File "<ipython-input-114-4331e758b064>", line 1
f = lambda x: d=x ; fact=1 ; for i in d:
^
SyntaxError: invalid syntax
[115]: format = lambda x: '%.2f' % x
frame.applymap(format)
[115]: b d e
Utah -1.88 -0.01 -0.42
Ohio -1.84 -0.32 -1.48
Texas 0.27 -0.40 0.37
Oregon 0.67 -0.78 1.07
[116]: frame['e'].map(format)
[116]: Utah -0.42
Ohio -1.48
Texas 0.37
Oregon 1.07
Name: e, dtype: object
[117]: obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
obj.sort_index()
[117]: a 1
b 2
c 3
d 0
dtype: int64
[118]: frame = pd.DataFrame(np.arange(8).reshape((2, 4)),index=['three',␣
↪'one'],columns=['d', 'a', 'b', 'c'])
17
Manav Tarsariya ET22BTIT132
frame.sort_index()
[118]: d a b c
one 4 5 6 7
three 0 1 2 3
[119]: frame.sort_index(axis=1)
[119]: a b c d
three 1 2 3 0
one 5 6 7 4
[120]: frame.sort_index(axis=1, ascending=False)
[120]: d c b a
three 0 3 2 1
one 4 7 6 5
[121]: frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})
frame
[121]: b a
0 4 0
1 7 1
2 -3 0
3 2 1
[122]: frame.sort_values(by='b')
[122]: b a
2 -3 0
3 2 1
0 4 0
1 7 1
[123]: frame.sort_values(by=['a', 'b'])
[123]: b a
2 -3 0
0 4 0
3 2 1
1 7 1
[124]: obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj.rank()
18
Manav Tarsariya ET22BTIT132
[124]: 0 6.5
1 1.0
2 6.5
3 4.5
4 3.0
5 2.0
6 4.5
dtype: float64
[125]: obj.rank(method='first')
[125]: 0 6.0
1 1.0
2 7.0
3 4.0
4 3.0
5 2.0
6 5.0
dtype: float64
[126]: obj.rank(ascending=False, method='max')
[126]: 0 2.0
1 7.0
2 2.0
3 4.0
4 5.0
5 6.0
6 4.0
dtype: float64
[128]: frame = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1], 'c': [-2, 5, 8,␣
↪-2.5]})
frame.rank(axis='columns')frame
[128]: b a c
0 4.3 0 -2.0
1 7.0 1 5.0
2 -3.0 0 8.0
3 2.0 1 -2.5
[129]: frame.rank(axis='columns')
[129]: b a c
0 3.0 2.0 1.0
1 3.0 1.0 2.0
2 1.0 2.0 3.0
19
Manav Tarsariya ET22BTIT132
3 3.0 2.0 1.0
[130]: obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
obj
[130]: a 0
a 1
b 2
b 3
c 4
dtype: int64
[131]: obj['a']
[131]: a 0
a 1
dtype: int64
[132]: df = pd.DataFrame(np.random.randn(4, 3), index=['a', 'a', 'b', 'b'])
df
[132]: 0 1 2
a -0.120697 -1.900689 0.659151
a -0.161534 -0.120115 -0.697666
b 1.762015 -0.733370 -1.154350
b -0.476266 -1.405778 1.035751
[133]: df.loc['b']
[133]: 0 1 2
b 1.762015 -0.733370 -1.154350
b -0.476266 -1.405778 1.035751
[9]: import numpy as np
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
[np.nan, np.nan], [0.75, -1.3]],
index=['a', 'b', 'c', 'd'],
columns=['one', 'two'])
df
[9]: one two
a 1.40 NaN
b 7.10 -4.5
c NaN NaN
d 0.75 -1.3
20
Manav Tarsariya ET22BTIT132
[10]: df.sum()
[10]: one 9.25
two -5.80
dtype: float64
[11]: df.sum(axis='columns')
[11]: a 1.40
b 2.60
c 0.00
d -0.55
dtype: float64
[12]: df.mean(axis='columns', skipna=False)
[12]: a NaN
b 1.300
c NaN
d -0.275
dtype: float64
[13]: df.idxmax()
[13]: one b
two d
dtype: object
[14]: df.cumsum()
[14]: one two
a 1.40 NaN
b 8.50 -4.5
c NaN NaN
d 9.25 -5.8
[15]: df.tail()
[15]: one two
a 1.40 NaN
b 7.10 -4.5
c NaN NaN
d 0.75 -1.3
[16]: obj = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])
[17]: obj
21
Manav Tarsariya ET22BTIT132
[17]: 0 c
1 a
2 d
3 a
4 a
5 b
6 b
7 c
8 c
dtype: object
[18]: unique = obj.unique()
[19]: unique
[19]: array(['c', 'a', 'd', 'b'], dtype=object)
[20]: obj.value_counts()
[20]: c 3
a 3
b 2
d 1
dtype: int64
[21]: pd.value_counts(obj.values, sort=False)
[21]: d 1
a 3
b 2
c 3
dtype: int64
[22]: [257]: mask = obj.isin(['b', 'c'])
File "<ipython-input-22-1bcd2edc3d46>", line 1
[257]: mask = obj.isin(['b', 'c'])
^
SyntaxError: only single target (not list) can be annotated
[24]: mask = obj.isin(['b', 'c'])
mask
[24]: 0 True
1 False
22
Manav Tarsariya ET22BTIT132
2 False
3 False
4 False
5 True
6 True
7 True
8 True
dtype: bool
[25]: obj[mask]
[25]: 0 c
5 b
6 b
7 c
8 c
dtype: object
[ ]:
23
Manav Tarsariya ET22BTIT132