Wokwi - Online ESP32, STM32, Arduino Simulator

# -*- coding: utf-8 -*-
"""
@author: GITAA
"""

######################## MODULE: BINARY CLASSIFICATION ##############################


#%%

#Importing the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#%%
#Reading the datasets
train_data = pd.read_csv('engineTest.csv')
test_data = pd.read_csv('engineTestCheck.csv')

#%%
#Printing the columns 
print(train_data.columns)
print(test_data.columns)

#%%
train_data['Result'].value_counts()


#%%
"""
In this example the first two variables X1 and X2 are used for the 
optimization in order to demonstrate the result graphically
"""

# Creating a new data frame then adding a column for the intercept 
#'X0' and subseting 'X1','X2' and 'Result' from the train_data

train_data1 = pd.DataFrame({'X0': np.repeat(1, len(train_data))})

train_data1[['X1','X2', 'Result']] = train_data[['X1','X2', 'Result']]


#%%

# n_k is the variable created, will finally be equation of the line
n_k = np.repeat(10,3)


# n_kprev is created to enable updates to the n_k value in the loop
n_kprev = np.repeat(0,3)

# step length
cLearn = 0.1 

# variable is created to verify if all samples meet condition
updateCounter = True

# classification criteria
maxIteration = 2000
iteration = 1

#%%

# grouping the train_data according to the 'Result'
grouped_data = train_data1.groupby(by='Result')

for key in grouped_data.groups.keys():
    data = grouped_data.get_group(key)
    x =data ['X1']
    y =data ['X2']
    plt.scatter(x,y,label = key)
    plt.xlim(0,80)
    plt.ylim(-5,80)
    plt.legend()
    
    slope = -1*n_k[1]/n_k[2]
    intercept = -1*n_k[0]/n_k[2]
    X = np.arange(80)
    Y=slope*X+intercept
    plt.plot(X,Y,c ='red')
plt.show()


#%%


while updateCounter and iteration < maxIteration:
    updateCounter = False
    for i in range(len(train_data1)):
        
        # assignment of initial guess to the previous value for gradient search algorithm
        n_kprev = n_k
        
        pred = np.array(train_data1.iloc[i,0:3]).dot(n_k)
        #print(pred)
        if(train_data1.Result[i] == "Pass" and pred < 0):
            #This condition checks if the "passed" samples are classified 
            #properly
            n_k = n_kprev + cLearn *((np.array(train_data1.iloc[i,0:3] )).T)
            #updated counter changed to true to reflect the fact that 
            #equation was modified
            updateCounter = True
        elif(train_data1.Result[i] == "Fail" and pred > 0):
                
            #This condition checks if the "failed" samples are classified 
            #properly
            n_k = n_kprev - cLearn *((np.array(train_data1.iloc[i,0:3] )).T)
            #updated counter changed to true to reflect the fact that 
            #equation was modified
            updateCounter = True 
            
        if(i % 15 == 0 and iteration <= 2):
            grouped_data = train_data1.groupby(by='Result')
            for key in grouped_data.groups.keys():
                data = grouped_data.get_group(key)
                x =data ['X1']
                y =data ['X2']
                plt.scatter(x,y,label = key)
                plt.xlim(0,80)
                plt.ylim(-5,80)
                plt.legend()
                
                slope = -1*n_k[1]/n_k[2]
                intercept = -1*n_k[0]/n_k[2]
                X = np.arange(80)
                Y=slope*X+intercept
                plt.plot(X,Y,c ='red')
            plt.show()
    #print(iteration)
    iteration = iteration +1

#%%



test_data.columns

test_data1 = pd.DataFrame({'X0': np.repeat(1, len(test_data))})

test_data1[['Engine','X1','X2']] = test_data[['Unnamed: 0','X1','X2']]

test_data1.sort_index(axis =1, inplace =True)

test_result = np.array(test_data1.iloc[:,1:4]).dot(n_k)
test_data1["Test_Result"] = test_result

test_data1["Predicted_Result"] = np.where(test_data1['Test_Result'] > 0 ,"Pass","Fail")

grouped_data = test_data1.groupby(by='Predicted_Result')
for key in grouped_data.groups.keys():
    data = grouped_data.get_group(key)
    x =data ['X1']
    y =data ['X2']
    plt.scatter(x,y,label = key)
    plt.xlim(0,80)
    plt.ylim(-5,80)
    plt.legend()
    
    slope = -1*n_k[1]/n_k[2]
    intercept = -1*n_k[0]/n_k[2]
    X = np.arange(80)
    Y=slope*X+intercept
    plt.plot(X,Y,c ='red')
plt.show()