notebook.community

Edit and run



In [1]:

    
import spacy
from sklearn import datasets

texts = datasets.fetch_20newsgroups(subset='train').data
nlp = spacy.load('en')



In [2]:

    
import texture

%timeit texture.document_matrix(texts, nlp.tokenizer)









    



1 loop, best of 3: 9.02 s per loop



In [3]:

    
from sklearn.feature_extraction.text import CountVectorizer
vec = CountVectorizer()

%timeit vec.fit_transform(texts)









    



1 loop, best of 3: 3.61 s per loop



In [ ]:

    
%timeit next(nlp.pipe(texts, batch_size=10000, n_threads=5))



In [ ]: