from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
sent1 = "The cat sat on the mat"
#sent2 = "The cat sat on the mat" # Cosine similarity: 1.0000000000000002
sent2 = "The dog sat on the rug"
# Convert sentences to TF‑IDF vectors
vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform([sent1, sent2])
# Compute cosine similarity
similarity = cosine_similarity(tfidf[0:1], tfidf[1:2])
print("Cosine similarity:", similarity[0][0])
'''
run:
Cosine similarity: 0.6029748160380571
'''