import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import PorterStemmer sentences = final_text # Initialize stopwords and stemmer stop_words = set(stopwords.words('english')) stemmer = PorterStemmer() # Preprocessing for each sentence in the list preprocessed_sentences = [] for sentence in sentences: # Tokenization tokens = word_tokenize(sentence) # Remove stopwords and apply stemming filtered_tokens = [stemmer.stem(word.lower()) for word in tokens if word.lower() not in stop_words] # Normalization (remove punctuation) normalized_tokens = [word for word in filtered_tokens if word not in string.punctuation] # Combine the normalized tokens back into a sentence preprocessed_sentence = " ".join(normalized_tokens) preprocessed_sentences.append(preprocessed_sentence) # print(preprocessed_sentences) for i in preprocessed_sentences: print(i)