# 8)Feature engineering

`class Xfrmer_rateofinterest(BaseEstimator,TransformerMixin): """ This calculates rate of interest should be one of the GOLDEN FEATURES Lets wait and watch """ # constructor def __init__(self): #we are not going to use this self.rteofintrst = None#Return self def fit(self,X,y=None): return selfdef transform(self,X,y=None): #rate of interest calculation I hope this will be a stellar feature let see #the formula is numerator = 6(a-n) # denominator = square(n) + (5+2a)n - 2a # RATEOFINTEREST = numerator / denominator n =X["CNT_PAYMENT"] anuity = X["AMT_ANNUITY"] numerator = 6 * (anuity - n) denominator = np.square(n) + (5+2*anuity)*n - 2*anuity rteofintrst = (numerator / denominator).replace((np.inf, -np.inf), (-9, -9)) X = rteofintrst.values.reshape(-1,1) X[np.isnan(X)] = 0 print('rteint shape',X.shape)return Xclass Xfrmer_rteofintrst(BaseEstimator,TransformerMixin): """ This calculates rate of interest should be one of the GOLDEN FEATURES Lets wait and watch """ # constructor def __init__(self): #we are not going to use this self.rteofintrst = None#Return self def fit(self,X,y=None): return selfdef transform(self,X,y=None): #rate of interest used by other competitors #https://www.kaggle.com/c/home-credit-default-risk/discussion/64598 n =X["CNT_PAYMENT"] anuity = X["AMT_ANNUITY"]interst = (n*anuity) - X["AMT_CREDIT"] numerator = 24 * interst denominator =X["AMT_CREDIT"] * (n+1) rteofintrst = (numerator / denominator).replace((np.inf, -np.inf), (-9, -9)) X = rteofintrst.values.reshape(-1,1) X[np.isnan(X)] = 0return X````
`def display_mising_value(self): """ fn that displays the missing values of the dataset """ dct_tmp = {} #self.df_mis_val = pd.DataFrame((self.df_source.isnull().sum())*100/self.df_source.shape[0], columns=['COLNAME','PCT']).reset_index() dct_tmp["PCT"] = (self.df_source.isnull().sum())*100/self.df_source.shape[0] dct_tmp["SKEWNES"] = (self.df_source.skew().round(2)) #self.df_mis_val = pd.DataFrame((self.df_source.isnull().sum())*100/self.df_source.shape[0], columns=['PCT']) self.df_mis_val = pd.DataFrame(dct_tmp,columns=["PCT","SKEWNES"]) self.df_misval_lvl1 = pd.DataFrame(self.df_mis_val.loc[(self.df_mis_val["PCT"]< 5)]) self.df_misval_lvl2 = pd.DataFrame(self.df_mis_val.loc[(self.df_mis_val["PCT"] >= 5) & (self.df_mis_val["PCT"] < 20 )]) self.df_misval_lvl3 = pd.DataFrame(self.df_mis_val.loc[(self.df_mis_val["PCT"] >= 20) & (self.df_mis_val["PCT"] < 75)]) self.df_misval_lvl4 = pd.DataFrame(self.df_mis_val.loc[(self.df_mis_val["PCT"] >= 75)]) self.df_misval_lvl1.sort_values(by="PCT", axis=0,inplace=True) self.df_misval_lvl2.sort_values(by="PCT", axis=0,inplace=True) self.df_misval_lvl3.sort_values(by="PCT", axis=0,inplace=True) self.df_misval_lvl4.sort_values(by="PCT", axis=0,inplace=True)`
`mdn =X.loc[(X['FLAG_OWN_CAR'] == 'Y'),'OWN_CAR_AGE'].median()   X.loc[(X['FLAG_OWN_CAR'] == 'Y' ) & (X['OWN_CAR_AGE'].isna()),'OWN_CAR _AGE'] = mdn  X.loc[(X['FLAG_OWN_CAR'] == 'N' ) & (X['OWN_CAR_AGE'].isna()),'OWN_CAR _AGE'] = 0#name_income_type='Commercial associate' and occupation_type  cat_mode =X.loc[(X['NAME_INCOME_TYPE'] == 'Commercial associate'),'OCC UPATION_TYPE'].value_counts().index[0]  )  X.loc[(X['NAME_INCOME_TYPE'] == 'Civil marriage' ) & (X['OCCUPATION_TY PE'].isna()),'OCCUPATION_TYPE'] = cat_mode`

# 9) Model Explanation

`class Xfrmer_replacenp(BaseEstimator, TransformerMixin):  """  this transformer does the global repplace within the dataframe  replace 365243 spcific to this case study with 0  replace +/-inf , nan with zero  """  # constructor  def __init__(self):   #we are not going to use this   self._features = Nonedef fit(self, X,y=None ):   return selfdef transform(self,X,y=None):   X[X==365243.0] = 0   X[X=="XAP"] = 0   X[X=="XNA"] = 0   X[X=="nan"] = 0   print('all replace',X.shape)   return X`
`if tmpval.dtype == 'object': tmpval= pd.DataFrame(tmpval) tmpval.fillna(0,inplace=True) X = tmpval`
`col_lvl_ft_names = [] ft_union = crdcrd_ft_gen_piplin['engineer_data'] tpl_xfrmr_lst =ft_union.transformer_listcatpipe = tpl_xfrmr_lst[22][1] ccohe = catpipe.named_steps['CC_OHE'] #print(type(ccohe)) print(len(ccohe.categories_)) lst_ohe_ft_name = ccohe.categories_[0]print('Generating column names for numerical features...') for xfrmr_lvl in range(len(tpl_xfrmr_lst)-1):   col_lvl_ft_names.append(tpl_xfrmr_lst[xfrmr_lvl][0])#add OHE categories as column names print('Generating column names for categorical features...') for itm in lst_ohe_ft_name :  col_lvl_ft_names.append("CC_NMECTRCTSTAT_"+ itm.upper())#recreate the dataframe with imputed column values df_trn_colvl = pd.DataFrame(X_trn_xfrm,columns=col_lvl_ft_names) df_tst_colvl = pd.DataFrame(x_tst_xfrm,columns=col_lvl_ft_names)  #concatenate the two dataframes to the original dataframe dtprcs.X_train = pd.concat([dtprcs.X_train,df_trn_colvl], axis=1)dtprcs.x_test = pd.concat([dtprcs.x_test,df_tst_colvl], axis=1)`

# 11) Conclusion and Future Work

`Read Mathematics formulae as poetry                 -- Srikanth Varma`

# 13)References

In the making Machine Learner programmer music lover

## More from Janardhanan a r

In the making Machine Learner programmer music lover