Last active
December 23, 2015 22:39
-
-
Save tdhopper/6705042 to your computer and use it in GitHub Desktop.
Causes error. Seems to be related to type.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn_pandas import DataFrameMapper | |
from sklearn.feature_extraction.text import CountVectorizer | |
df = pd.DataFrame({"content":["tim hopper", "this data tim"]}) | |
mapper = DataFrameMapper([("content", CountVectorizer())]) | |
mapper.fit_transform(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-7-01896d0e98b9> in <module>() | |
5 df = pd.DataFrame({"content":["tim hopper", "this data tim"]}) | |
6 mapper = DataFrameMapper([("content", CountVectorizer())]) | |
----> 7 mapper.fit_transform(df) | |
C:\Anaconda\lib\site-packages\sklearn\base.pyc in fit_transform(self, X, y, **fit_params) | |
406 if y is None: | |
407 # fit method of arity 1 (unsupervised transformation) | |
--> 408 return self.fit(X, **fit_params).transform(X) | |
409 else: | |
410 # fit method of arity 2 (supervised transformation) | |
C:\Anaconda\lib\site-packages\sklearn_pandas\__init__.pyc in fit(self, X, y) | |
100 for columns, transformer in self.features: | |
101 if transformer is not None: | |
--> 102 transformer.fit(self._get_col_subset(X, columns)) | |
103 return self | |
104 | |
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in fit(self, raw_documents, y) | |
754 self | |
755 """ | |
--> 756 self.fit_transform(raw_documents) | |
757 return self | |
758 | |
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in fit_transform(self, raw_documents, y) | |
778 max_features = self.max_features | |
779 | |
--> 780 vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary) | |
781 X = X.tocsc() | |
782 | |
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in _count_vocab(self, raw_documents, fixed_vocab) | |
713 indptr.append(0) | |
714 for doc in raw_documents: | |
--> 715 for feature in analyze(doc): | |
716 try: | |
717 j_indices.append(vocabulary[feature]) | |
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in <lambda>(doc) | |
227 | |
228 return lambda doc: self._word_ngrams( | |
--> 229 tokenize(preprocess(self.decode(doc))), stop_words) | |
230 | |
231 else: | |
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in <lambda>(x) | |
193 | |
194 if self.lowercase: | |
--> 195 return lambda x: strip_accents(x.lower()) | |
196 else: | |
197 return strip_accents | |
AttributeError: 'numpy.ndarray' object has no attribute 'lower' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment