https://drive.google.com/drive/folders/1IDy4wuQyQhuJqEH4M1znYD_0ZKcmUloE -------------------------------------------------------------------------------------------------------------- 1. A* search # -*- coding: utf-8 -*- """ Created on Sun Dec 18 18:25:08 2022 @author: Divyansh """ import sys inf=99999 g=[ [0,4,3,inf,inf,inf,inf], [inf,0,inf,inf,12,5,inf], [inf,inf,0,7,10,inf,inf], [inf,inf,inf,0,2,inf,inf], [inf,inf,inf,inf,0,inf,5], [inf,inf,inf,inf,inf,0,16], [inf,inf,inf,inf,inf,inf,0] ] h=[14,12,11,6,4,11,0] goal=6 src=0 arr=[] class obj: def __init__(self,cost,path): self.cost=cost self.path=path new_item=obj(h[src],[src]) arr.append(new_item) while arr: cur_item=arr[0] cur_node=cur_item.path[-1] cur_cost=cur_item.cost cur_path=cur_item.path for i in range(0,len(h)): if g[cur_node][i]!=inf and g[cur_node][i]!=0: new_cost=cur_cost-h[cur_node]+h[i]+g[cur_node][i] new_path=cur_path.copy() new_path.append(i) if i == goal: print(new_cost) print(new_path) sys.exit() new_item=obj(new_cost,new_path) arr.append(new_item) arr.pop(0) arr=sorted(arr,key=lambda item:item.cost) ------------------------------------------------------------------------------------------- 2. AO* class Graph: def __init__(self, graph, heuristicNodeList, startNode): #instantiate graph object with graph topology, heuristic values, start node self.graph = graph self.H=heuristicNodeList self.start=startNode self.parent={} self.status={} self.solutionGraph={} def applyAOStar(self): # starts a recursive AO* algorithm self.aoStar(self.start, False) def getNeighbors(self, v): # gets the Neighbors of a given node return self.graph.get(v,'') def getStatus(self,v): # return the status of a given node return self.status.get(v,0) def setStatus(self,v, val): # set the status of a given node self.status[v]=val def getHeuristicNodeValue(self, n): return self.H.get(n,0) # always return the heuristic value of a given node def setHeuristicNodeValue(self, n, value): self.H[n]=value # set the revised heuristic value of a given node def printSolution(self): print("FOR GRAPH SOLUTION, TRAVERSE THE GRAPH FROM THE START NODE:",self.start) print("------------------------------------------------------------") print(self.solutionGraph) print("------------------------------------------------------------") def computeMinimumCostChildNodes(self, v): # Computes the Minimum Cost of child nodes of a given node v minimumCost=0 costToChildNodeListDict={} costToChildNodeListDict[minimumCost]=[] flag=True for nodeInfoTupleList in self.getNeighbors(v): # iterate over all the set of child node/s cost=0 nodeList=[] for c, weight in nodeInfoTupleList: cost=cost+self.getHeuristicNodeValue(c)+weight nodeList.append(c) if flag==True: # initialize Minimum Cost with the cost of first set of child node/s minimumCost=cost costToChildNodeListDict[minimumCost]=nodeList # set the Minimum Cost child node/s flag=False else: # checking the Minimum Cost nodes with the current Minimum Cost if minimumCost>cost: minimumCost=cost costToChildNodeListDict[minimumCost]=nodeList # set the Minimum Cost child node/s return minimumCost, costToChildNodeListDict[minimumCost] # return Minimum Cost and Minimum Cost child node/s def aoStar(self, v, backTracking): # AO* algorithm for a start node and backTracking status flag print("HEURISTIC VALUES :", self.H) print("SOLUTION GRAPH :", self.solutionGraph) print("PROCESSING NODE :", v) print("-----------------------------------------------------------------------------------------") if self.getStatus(v) >= 0: # if status node v >= 0, compute Minimum Cost nodes of v minimumCost, childNodeList = self.computeMinimumCostChildNodes(v) print(minimumCost, childNodeList) self.setHeuristicNodeValue(v, minimumCost) self.setStatus(v,len(childNodeList)) solved=True # check the Minimum Cost nodes of v are solved for childNode in childNodeList: self.parent[childNode]=v if self.getStatus(childNode)!=-1: solved=solved & False if solved==True: # if the Minimum Cost nodes of v are solved, set the current node status as solved(-1) self.setStatus(v,-1) self.solutionGraph[v]=childNodeList # update the solution graph with the solved nodes which may be a part of solution if v!=self.start: # check the current node is the start node for backtracking the current node value self.aoStar(self.parent[v], True) # backtracking the current node value with backtracking status set to true if backTracking==False: # check the current call is not for backtracking for childNode in childNodeList: # for each Minimum Cost child node self.setStatus(childNode,0) # set the status of child node to 0(needs exploration) self.aoStar(childNode, False) # Minimum Cost child node is further explored with backtracking status as false #for simplicity we ll consider heuristic distances given print ("Graph - 1") h1 = {'A': 1, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 5, 'H': 7, 'I': 7, 'J': 1} graph1 = { 'A': [[('B', 1), ('C', 1)], [('D', 1)]], 'B': [[('G', 1)], [('H', 1)]], 'C': [[('J', 1)]], 'D': [[('E', 1), ('F', 1)]], 'G': [[('I', 1)]] } G1= Graph(graph1, h1, 'A') G1.applyAOStar() G1.printSolution() -------------------------------------------------------------------------------------------------- 3. Candidate elemination import random import csv def g_0(n): return ("?",)*n def s_0(n): return ('0',)*n def more_general(h1, h2): more_general_parts = [] for x, y in zip(h1, h2): mg = x == "?" or (x != "0" and (x == y or y == "0")) more_general_parts.append(mg) return all(more_general_parts) def fulfills(example, hypothesis): ### the implementation is the same as for hypotheses: return more_general(hypothesis, example) def min_generalizations(h, x): h_new = list(h) for i in range(len(h)): if not fulfills(x[i:i+1], h[i:i+1]): h_new[i] = '?' if h[i] != '0' else x[i] return [tuple(h_new)] min_generalizations(h=('0', '0' , 'sunny'),x=('rainy', 'windy', 'cloudy')) def min_specializations(h, domains, x): results = [] for i in range(len(h)): if h[i] == "?": for val in domains[i]: if x[i] != val: h_new = h[:i] + (val,) + h[i+1:] results.append(h_new) elif h[i] != "0": h_new = h[:i] + ('0',) + h[i+1:] results.append(h_new) return results min_specializations(h=('?', 'x',), domains=[['a', 'b', 'c'], ['x', 'y']], x=('b', 'x')) with open('data21_shape.csv') as csvFile: examples = [tuple(line) for line in csv.reader(csvFile)] print(examples) def get_domains(examples): d = [set() for i in examples[0]] for x in examples: for i, xi in enumerate(x): d[i].add(xi) return [list(sorted(x)) for x in d] get_domains(examples) def candidate_elimination(examples): domains = get_domains(examples)[:-1] G = set([g_0(len(domains))]) S = set([s_0(len(domains))]) i=0 print("\n G[{0}]:".format(i),G) print("\n S[{0}]:".format(i),S) for xcx in examples: i=i+1 x, cx = xcx[:-1], xcx[-1] # Splitting data into attributes and decisions if cx=='Y': # x is positive example G = {g for g in G if fulfills(x, g)} S = generalize_S(x, G, S) else: # x is negative example S = {s for s in S if not fulfills(x, s)} G = specialize_G(x, domains, G, S) print("\n G[{0}]:".format(i),G) print("\n S[{0}]:".format(i),S) return def generalize_S(x, G, S): S_prev = list(S) for s in S_prev: if s not in S: continue if not fulfills(x, s): S.remove(s) Splus = min_generalizations(s, x) ## keep only generalizations that have a counterpart in G S.update([h for h in Splus if any([more_general(g,h) for g in G])]) ## remove hypotheses less specific than any other in S S.difference_update([h for h in S if any([more_general(h, h1) for h1 in S if h != h1])]) return S def specialize_G(x, domains, G, S): G_prev = list(G) for g in G_prev: if g not in G: continue if fulfills(x, g): G.remove(g) Gminus = min_specializations(g, domains, x) ## keep only specializations that have a conuterpart in S G.update([h for h in Gminus if any([more_general(h, s) for s in S])]) ## remove hypotheses less general than any other in G G.difference_update([h for h in G if any([more_general(g1, h) for g1 in G if h != g1])]) return G print(candidate_elimination(examples)) ----------------------------------------------------------------------------------------------------- 4. ID3 import math import csv def load_csv(filename): lines=csv.reader(open(filename,"r")); dataset = list(lines) headers = dataset.pop(0) return dataset,headers class Node: def __init__(self,attribute): self.attribute=attribute self.children=[] self.answer="" def subtables(data,col,delete): dic={} coldata=[row[col] for row in data] attr=list(set(coldata)) counts=[0]*len(attr) r=len(data) c=len(data[0]) for x in range(len(attr)): for y in range(r): if data[y][col]==attr[x]: counts[x]+=1 for x in range(len(attr)): dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])] pos=0 for y in range(r): if data[y][col]==attr[x]: if delete: del data[y][col] dic[attr[x]][pos]=data[y] pos+=1 return attr,dic def entropy(S): attr=list(set(S)) if len(attr)==1: return 0 counts=[0,0] for i in range(2): counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0) sums=0 for cnt in counts: sums+=-1*cnt*math.log(cnt,2) return sums def compute_gain(data,col): attr,dic = subtables(data,col,delete=False) total_size=len(data) entropies=[0]*len(attr) ratio=[0]*len(attr) total_entropy=entropy([row[-1] for row in data]) for x in range(len(attr)): ratio[x]=len(dic[attr[x]])/(total_size*1.0) entropies[x]=entropy([row[-1] for row in dic[attr[x]]]) total_entropy-=ratio[x]*entropies[x] return total_entropy def build_tree(data,features): lastcol=[row[-1] for row in data] if(len(set(lastcol)))==1: node=Node("") node.answer=lastcol[0] return node n=len(data[0])-1 gains=[0]*n for col in range(n): gains[col]=compute_gain(data,col) split=gains.index(max(gains)) node=Node(features[split]) fea = features[:split]+features[split+1:] attr,dic=subtables(data,split,delete=True) for x in range(len(attr)): child=build_tree(dic[attr[x]],fea) node.children.append((attr[x],child)) return node def print_tree(node,level): if node.answer!="": print(" "*level,node.answer) return print(" "*level,node.attribute) for value,n in node.children: print(" "*(level+1),value) print_tree(n,level+2) def classify(node,x_test,features): if node.answer!="": print(node.answer) return pos=features.index(node.attribute) for value, n in node.children: if x_test[pos]==value: classify(n,x_test,features) '''Main program''' dataset,features=load_csv("id3dataset.csv") node1=build_tree(dataset,features) print("The decision tree for the dataset using ID3 algorithm is") print_tree(node1,0) testdata,features=load_csv("id3dataset.csv") for xtest in testdata: print("The test instance:",xtest) print("The label for test instance:",end=" ") classify(node1,xtest,features) --------------------------------------------------------------------------------- 5. Back propogation import numpy as np X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) y = np.array(([92], [86], [89]), dtype=float) X = X/np.amax(X,axis=0) # maximum of X array longitudinally y = y/100 #Sigmoid Function def sigmoid (x): return 1/(1 + np.exp(-x)) #Derivative of Sigmoid Function def derivatives_sigmoid(x): return x * (1 - x) #Variable initialization epoch=7000 #Setting training iterations lr=0.1 #Setting learning rate inputlayer_neurons = 2 #number of features in data set hiddenlayer_neurons = 3 #number of hidden layers neurons output_neurons = 1 #number of neurons at output layer #weight and bias initialization wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) bh=np.random.uniform(size=(1,hiddenlayer_neurons)) wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) bout=np.random.uniform(size=(1,output_neurons)) #draws a random range of numbers uniformly of dim x*y for i in range(epoch): #Forward Propogation hinp1=np.dot(X,wh) hinp=hinp1 + bh hlayer_act = sigmoid(hinp) outinp1=np.dot(hlayer_act,wout) outinp= outinp1+ bout output = sigmoid(outinp) #Backpropagation EO = y-output outgrad = derivatives_sigmoid(output) d_output = EO* outgrad EH = d_output.dot(wout.T) hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to error d_hiddenlayer = EH * hiddengrad wout += hlayer_act.T.dot(d_output) *lr# dotproduct of nextlayererror and currentlayerop # bout += np.sum(d_output, axis=0,keepdims=True) *lr wh += X.T.dot(d_hiddenlayer) *lr #bh += np.sum(d_hiddenlayer, axis=0,keepdims=True) *lr print("Input: \n" + str(X)) print("Actual Output: \n" + str(y)) print("Predicted Output: \n" ,output) ----------------------------------------------------------------------------------------- 6. Naive Bayes import pandas as pd msg=pd.read_csv('naivtext.csv',names=['message','label']) print('The dimensions of the dataset',msg.shape) msg['labelnum']=msg.label.map({'pos':1,'neg':0}) X=msg.message y=msg.labelnum print(X) print(y) #splitting the dataset into train and test data from sklearn.model_selection import train_test_split xtrain,xtest,ytrain,ytest=train_test_split(X,y) print(xtest.shape) print(xtrain.shape) print(ytest.shape) print(ytrain.shape) #output of count vectoriser is a sparse matrix from sklearn.feature_extraction.text import CountVectorizer count_vect = CountVectorizer() xtrain_dtm = count_vect.fit_transform(xtrain) xtest_dtm=count_vect.transform(xtest) print(count_vect.get_feature_names()) df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names()) print(df)#tabular representation print(xtrain_dtm) #sparse matrix representation # Training Naive Bayes (NB) classifier on training data. from sklearn.naive_bayes import MultinomialNB clf = MultinomialNB().fit(xtrain_dtm,ytrain) predicted = clf.predict(xtest_dtm) #printing accuracy metrics from sklearn import metrics print('Accuracy metrics') print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted)) print('Confusion matrix') print(metrics.confusion_matrix(ytest,predicted)) print('Recall and Precison ') print(metrics.recall_score(ytest,predicted)) print(metrics.precision_score(ytest,predicted)) '''docs_new = ['I like this place', 'My boss is not my saviour'] X_new_counts = count_vect.transform(docs_new) predictednew = clf.predict(X_new_counts) for doc, category in zip(docs_new, predictednew): print('%s->%s' % (doc, msg.labelnum[category]))''' ---------------------------------------------------------------------------------------------------------- 7. EM and Kmeans from copy import deepcopy import numpy as np import pandas as pd from matplotlib import pyplot as plt from sklearn.mixture import GaussianMixture from sklearn.cluster import KMeans # Importing the dataset data = pd.read_csv('xclara.csv') print("Input Data and Shape") print(data.shape) data.head() # Getting the values and plotting it f1 = data['V1'].values f2 = data['V2'].values X = np.array(list(zip(f1, f2))) print('Graph for whole dataset') plt.scatter(f1, f2, c='black', s=7) plt.show() ########################################## kmeans = KMeans(3, random_state=0) labels = kmeans.fit(X).predict(X) centroids = kmeans.cluster_centers_ plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis'); print('Graph using Kmeans Algorithm') plt.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=200, c='#050505') plt.show() #gmm demo gmm = GaussianMixture(n_components=3).fit(X) labels = gmm.predict(X) # for ploting probs = gmm.predict_proba(X) size = 10 * probs.max(1) ** 3 print('Graph using EM Algorithm') #print(probs[:300].round(4)) plt.scatter(X[:, 0], X[:, 1], c=labels, s=size, cmap='viridis'); plt.show() ---------------------------------------------------------------------------------------------------- 8. KNN from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import classification_report, confusion_matrix from sklearn import datasets iris=datasets.load_iris() iris_data=iris.data iris_labels=iris.target print(iris_data) print(iris_labels) x_train, x_test, y_train, y_test=train_test_split(iris_data,iris_labels,test_size=0.30) classifier=KNeighborsClassifier(n_neighbors=5) classifier.fit(x_train,y_train) y_pred=classifier.predict(x_test) print('confusion matrix is as follows') print(confusion_matrix(y_test,y_pred)) print('Accuracy metrics') print(classification_report(y_test,y_pred)) -------------------------------------------------------------------------------------------- 9. LWR from numpy import * import operator from os import listdir import matplotlib import matplotlib.pyplot as plt import pandas as pd import numpy as np1 import numpy.linalg as np from scipy.stats.stats import pearsonr def kernel(point,xmat, k): m,n = np1.shape(xmat) weights = np1.mat(np1.eye((m))) for j in range(m): diff = point - X[j] weights[j,j] = np1.exp(diff*diff.T/(-2.0*k**2)) return weights def localWeight(point,xmat,ymat,k): wei = kernel(point,xmat,k) W=(X.T*(wei*X)).I*(X.T*(wei*ymat.T)) return W def localWeightRegression(xmat,ymat,k): m,n = np1.shape(xmat) ypred = np1.zeros(m) for i in range(m): ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k) return ypred # load data points data = pd.read_csv('10data.csv') bill = np1.array(data.total_bill) tip = np1.array(data.tip) #preparing and add 1 in bill mbill = np1.mat(bill) mtip = np1.mat(tip) m= np1.shape(mbill)[1] one = np1.mat(np1.ones(m)) X= np1.hstack((one.T,mbill.T)) #set k here ypred = localWeightRegression(X,mtip,2) SortIndex = X[:,1].argsort(0) xsort = X[SortIndex][:,0] fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.scatter(bill,tip, color='green') ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5) plt.xlabel('Total bill') plt.ylabel('Tip') plt.show();