import pandas as pd import numpy as np import nltk df = pd.read_csv('data1.csv') corp_data =df['Name'].tolist() c=[] for data in corp_data: c.append(word_tokenize(data)) from nltk.stem import WordNetLemmatizer lem=WordNetLemmatizer() token=[] for data in c: for t in data : token.append(lem.lemmatize(t)) token from nltk.corpus import stopwords nltk.download('stopwords') stop_words=set(stopwords.words('english')) token_filter=[] for data in token: if data.lower() in stop_words: continue if data.lower() not in token_filter: token_filter.append(data.lower())