6.pandas | Notion

Series-map

import numpy as np
import pandas as pd
labels = ['a','b','c']
my_data = [1,2,3]
arr = np.array(my_data)
pd.Series(my_data,labels) #data,labels #data-numpyarray,array
pd.Series(dict)#convert the dictionary to series
pd.Series(arr)#this will just map 0->1st, 1->2nd thus act like list

ser1+ser2 # take single digit and do the operations

DataFrames

import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(5,4),['A','b','c','d','e'],['w','x','y','z'])
# Columns
df['w'] #->print w column only work with column,
df[['w','z']]
df['new'] = df['w']+df['z']
df.drop('new',axis = 1,inplace = True)#axis = 0 rows #axis = 1 column #inplace ->because pandas don't want you to lose your data, if you are sure, then do the inplace
#rows
df.loc['A'] # i wantd the A row
df.iloc[0]#using the index of the location (row)
#subset
df.loc['B','Y'] #just like numpy
df.loc[['a','b'],['x','y']] #just like numpy

DataFrames-2

import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(5,4),['A','b','c','d','e'],['w','x','y','z'])

df[df>0] #conditional selection, like numpy
df[df['w']>0] #you will get only the row where it happens to be true
df[df['w']>0]['x'] #-> since that return the dataframe, i can use it like a dataframe
and -> & 
or -> |
df[(df['w']>0) & (df['x']<0)] #multiple operation we use & here
df.reset_index() #add a column named index with the index values you specify

#adding a new column with custom data
df['state'] = 'ca ba da as df'.split()
df.set_index('States') #return the index values converted to the states

DataFrames-3

import pandas as pd
import numpy as np

outside =['G1', 'G1', 'G1', 'G2', 'G2','G2']
inside = [1,2,3,1,2,3]
hier_index= list(zip(outside,inside)) #-> zip the list of tuple pairs,(G1,1),(62,2)
hier_index = pd.MultiIndex.from_tuples(hier_index) #multi index
df = pd.DataFrame(np.random.randn(6,2),hier_index,['A','B']) #created a grop of grop
df.loc['G1']
df.index.names = ['Groups','Nums']#pass the list of name coz, there is no name for the index part
print(df.loc['G1']['A'][2]) #print(df.loc['G1'][2]['A']) A->2, 2->A
df.xs('G1') #cross section examplee, i want num == 1 in both the group
df.xs(1,level='Num')

Missing Data

import pandas as pd
import numpy as np
d= {'A' :[1,2,np.nan], 'B' :[5,np.nan,np.nan], 'C' :[1,2,3]}
df = pd.DataFrame(d)
df.dropna() # axis = 0 columns, here it will drop the whole row if the value is Nan #thresh = 2: if the row has two NaN then it will drop
df.fillna(value = "fil value")# fill the value with this value
df['A'].fillna(value = df['A'].mean())

Groupby

import pandas as pd
import numpy as np

#groupby allows you to group together rows, ans show you the sum, perfomr an aggregate of the values
data= {'Company' : ['GOOG' , 'GOOG', 'MSFT', 'MSFT','FB','FB'],
'Person' : ['Sam', 'Charlie', 'Amy' , 'Vanessa' , 'Carl', 'Sarah'],
'Sales ' : [200, 120, 340, 124,243,350]
}

df = pd.DataFrame(data)
bycomp = df.groupby('Company')
bycomp.mean()
bycomp.sum()
bycomp.std()
df.groupby('company').sum().loc['FB']
df.groupby('company').count()
df.groupby('company').max()
df.groupby('company').describe() #.transpose give in a single line #bohooottt important, give every details of the data

Merging Joining and Concatinating

import numpy as np
import pandas as pd

df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'C': ['C0', 'C1', 'C2', 'C3'],
                        'D': ['D0', 'D1', 'D2', 'D3']},
                        index=[0, 1, 2, 3])
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                        'B': ['B4', 'B5', 'B6', 'B7'],
                        'C': ['C4', 'C5', 'C6', 'C7'],
                        'D': ['D4', 'D5', 'D6', 'D7']},
                         index=[4, 5, 6, 7]) 
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                        'B': ['B8', 'B9', 'B10', 'B11'],
                        'C': ['C8', 'C9', 'C10', 'C11'],
                        'D': ['D8', 'D9', 'D10', 'D11']},
                        index=[8, 9, 10, 11])
pd.concat([df1,df2,df3]) #column axis = 1 and rows = axis = 0
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']}) 
pd.merge(left,right,how = 'inner',on='key') #insted of concatinating we are gluing the data together
pd.merge(left,right,on = ['key1','key2'])#on list of keys when they are equal

#how outer,right,left
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                      index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                    'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])
left.join(right)
left.join(right,how = 'outer')