Back to Table of Content Back to ML
from sklearn.feature_extraction.text import CountVectorizer text = ['This is a line', 'This is another line', 'Completely different line'] cv = CountVectorizer(stop_words='english') sparse_matrix = cv.fit_transform(text) sparse_matrix.todense() cv.vocabulary_
from sklearn.feature_extraction.text import TfidfTransformer tfidf = TfidfTransformer() results = tfidf.fit_transform(sparse_matrix) results.todense()
from sklearn.feature_extraction.text import TfidfVectorizer tv = TfidfVectorizer(stop_words='english') X_train_tfidf = tv.fit_transform(X_train) X_test_tfidf = tv.transform(X_test)
from sklearn.naive_bayes import MultinomialNB nb = MultinomialNB() nb.fit(X_train_tfidf,y_train)