ml4201
🧩 Syntax:
1. DATA_PREPROCESSING
Importing the librariees
import numpy as np
import pandas as pd
Importing the datset
dataset=pd.read_csv('/Data - Data (1).csv')
X=dataset.iloc[:, :-1].values
y=dataset.iloc[:,-1].values
dataset.iloc[0] #row extraction
dataset.iloc[[0]] #column extraction
dataset.iloc[0,1] #value extraction
dataset.iloc[[0,1]] #multiple row and column extraction
dataset.iloc[0:3,0:4]
dataset.iloc[0:3,0:2]
dataset.iloc[0:3,:-2]
print(X) #independent attributes
print(y) #dependent attributes
Tacking care of missing data
from sklearn.impute import SimpleImputer
imputer=SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, 1:3]) #calculating mean
X[:, 1:3]=imputer.transform(X[:, 1:3]) #filling with mean
print(X)
Encoding independent categorical column ie Country column ONEHOT
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0])], remainder='passthrough')
X=np.array(ct.fit_transform(X))
print(X)
Encoding Dependent Variable ie Output using Label Encoding
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
y=le.fit_transform(y)
print(y)
Splitting the Dataset into Training and Testing Set
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
print(X_train)
Feature Scaling - Standardization
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train[:,3:]=sc.fit_transform(X_train[:,3:])
X_test[:,3:]=sc.transform(X_test[:,3:])
print(X_train)
<------------------------------------------------------------------------------------------------------>
2. Binary classifier 5 detector
from sklearn.datasets import fetch_openml
import numpy as np
mnist= fetch_openml('mnist_784',cache=True,version=1)
mnist.keys()
np.sqrt(784)
mnist['target']
mnist
mnist.target=mnist.target.astype(np.int8)
print(mnist['DESCR'])
np.unique(mnist['target'])
X,y =mnist["data"],mnist["target"]
np.unique(mnist["data"])
y.value_counts()
X,y =X.values,y.values
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
some_digit =X[2]
some_digit_image =some_digit.reshape(28,28)
plt.imshow(some_digit_image)
plt.axis("off")
X.shape
y=y.astype(np.uint8)
digit=X[0]
digit
y[5]
digit=digit.reshape(28,28)
digit
import matplotlib
import matplotlib.pyplot as plt
plt.imshow(digit,cmap=matplotlib.cm.binary)
plt.axis("off")
plt.show()
X_train,X_test,y_train,y_test =X[:60000],X[60000:],y[:60000],y[60000:]
shuffling_index=np.random.permutation(60000)
shuffling_index
X_train,ytrain=X_train[shuffling_index],y_train[shuffling_index]
X_train
y_train_5=y_train==5
y_test_5=y_train==5
from sklearn.linear_model import SGDClassifier
sgd_clf=SGDClassifier(max_iter=20,tol=np.infty)
sgd_clf.fit(X_train,y_train_5)
sgd_clf.predict([X[0]])
y_train_pred = sgd_clf.predict(X_train)
y[130]
y_train_5=y_train==5
y_test_5=y_train==5
from sklearn.linear_model import SGDClassifier
sgd_clf =SGDClassifier(max_iter=20,tol=np.infty)
sgd_clf.fit(X_train,y_train_5)
from sklearn.metrics import classification_report,confusion_matrix
confusion_matrix(y_train_5,y_train_pred)
from sklearn import metrics
print(
f"Classification report for classifier {sgd_clf}:\n"
f"{metrics.classification_report(y_train_5,y_train_pred)}\n"
)
from sklearn.metrics import precision_score,recall_score
precision_score(y_train_5,y_train_pred)
recall_score(y_train_5,y_train_pred)
<------------------------------------------------------------------------------------------------------>
3. Linear Regression=Using Salary dataset predicts the salary using linear regression.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('petrolconsumption.csv')
dataset.shape
dataset.head()
dataset.describe()
X=dataset[['Petrol_tax','Average_income','Population_Driverlicence']]
y=dataset['Petrol_Consumption']
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X ,y,test_size=0.2 , random_state=0)
from sklearn.linear_model import LinearRegression
reg=LinearRegression()
reg.fit(X_train,y_train)
coefficent = pd.DataFrame(reg.coef_, X.columns, columns=['Coefficient'])
print(coefficent)
y_pred=reg.predict(X_test)
df=pd.DataFrame({'Actual':y_test , 'Predicted':y_pred})
print(df)
from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
************************************************************************************************
4. Polynomial_Regression
import numpy as np
import pandas as pd
dataset=pd.read_csv('/content/poly_dataset - Sheet1.csv')
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values
print(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3,random_state=0)
from sklearn.linear_model import LinearRegression
lin_reg=LinearRegression()
lin_reg.fit(X_train,y_train)
import matplotlib.pyplot as plt
plt.scatter(X,y,color='red')
plt.plot(X,lin_reg.predict(X),color='blue')
plt.title('Truth or bluff(linear regression)')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly_reg=PolynomialFeatures(degree=3,include_bias=False)
X_train_trans=poly_reg.fit_transform(X_train)
X_test_trans=poly_reg.fit_transform(X_test)
lin_reg_2=LinearRegression()
lin_reg_2.fit(X_train_trans,y_train)
print(lin_reg_2.coef_)
print(lin_reg_2.intercept_)
import matplotlib.pyplot as plt
plt.scatter(X,y,color='red')
plt.plot(X,lin_reg_2.predict(poly_reg.fit_transform(X)),color='blue')
plt.title('polynomial regression')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
lin_reg_2.predict(poly_reg.fit_transform([[6.5]]))
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
print("MAE",mean_absolute_error(y_test,lin_reg_2.predict(poly_reg.fit_transform(X_test))))
print("MSE",mean_squared_error(y_test,lin_reg_2.predict(poly_reg.fit_transform(X_test))))
print("RMSE",np.sqrt(mean_squared_error(y_test,lin_reg_2.predict(poly_reg.fit_transform(X_test)))))
r2_score(y_test,lin_reg_2.predict(poly_reg.fit_transform(X_test)))
*******************************************************************************************
5. Logistic Regression
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
print(X_train)
print(y_train)
print(X_test)
print(y_test)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train)
print(X_test)
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0) #instance of class by invoking function,
classifier.fit(X_train, y_train)
print(classifier.predict(sc.transform([[30,87000]])))
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)
**********************************************************************************************************
6. Decision Tree
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
iris=load_iris()
X=iris.data[:,2:]
y=iris.target
tree_clf=DecisionTreeClassifier(max_depth=2)
tree_clf.fit(X,y)
from sklearn.tree import export_graphviz
export_graphviz(
tree_clf,
out_file="iris_tree.dot",
feature_names=iris.feature_names[2:],
class_names=iris.target_names,
# rounded=True,
filled=True
)
!dot -Tpng iris_tree.dot -o iris_tree.png
from IPython import display
display.Image(filename="iris_tree.png")
******************************************************************************************
7. PCA mnist
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
mnist=fetch_openml('mnist_784')
X,y=mnist["data"],mnist["target"]
X_train,X_test,y_train,y_test=train_test_split(X,y)
X=X_train
#applying pca
from sklearn.decomposition import PCA
import numpy as np
pca=PCA()
pca.fit(X)
cumsum=np.cumsum(pca.explained_variance_ratio_)
d=np.argmax(cumsum>=0.95)+1
d
#projecting into principle components
pca=PCA(n_components=0.95)
X_reduced=pca.fit_transform(X)
pca.n_components_
#Checking for the variance explained
#did we hit the min 95% ?
np.sum(pca.explained_variance_ratio_)
some_digit=X_reduced[0]
# implementing SVM classifier
from sklearn.svm import SVC
svm_clf=SVC()
svm_clf.fit(X_reduced,y_train)
svm_clf.predict([some_digit])
**************************************************************************************
8. Kmeans
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('/content/Mall_Customers.csv')
df.head()
x=df.iloc[:,[3,4]].values #annual income in dollers
plt.scatter(df['Annual Income (k$)'],df['Spending Score (1-100)'])
from sklearn.cluster import KMeans
wcss = [] #sse
for i in range (1,11):
kmeans=KMeans(n_clusters = i)
kmeans.fit(x)
wcss.append(kmeans.inertia_)
plt.plot(range(1,11),wcss)
plt.title('The Elbow Method')
plt.xlabel('No. of clusters')
plt.ylabel('WCSS')
plt.show()
kmeans = KMeans(n_clusters=5, random_state=42)
y_pred=kmeans.fit_predict(x)
print(y_pred)
df['cluster']=y_pred
df.head()
uniqueVal=(df['cluster']).unique
uniqueVal
****************************************************************************yasserkhan14
Member