prrao87 · January 13, 2019 00:03
diff --git a/clean_stance.py b/clean_stance.py
 def _stance(path, topic=None):
    def clean_ascii(text):
        # function to remove non-ASCII chars from data
        return ''.join(i for i in text if ord(i) < 128)
    orig = pd.read_csv(path, delimiter='\t', header=0, encoding = "latin-1")
    orig['Tweet'] = orig['Tweet'].apply(clean_ascii)
    df = orig
    # Get only those tweets that pertain to a single topic in the training data
    if topic is not None:
        df = df.loc[df['Target'] == topic]
    X = df.Tweet.values
    stances = ["AGAINST", "FAVOR", "NONE", "UNKNOWN"]
    class_nums = {s: i for i, s in enumerate(stances)}
    Y = np.array([class_nums[s] for s in df.Stance])
 return X, Y
	def _stance(path, topic=None):
	def clean_ascii(text):
	# function to remove non-ASCII chars from data
	return ''.join(i for i in text if ord(i) < 128)
	orig = pd.read_csv(path, delimiter='\t', header=0, encoding = "latin-1")
	orig['Tweet'] = orig['Tweet'].apply(clean_ascii)
	df = orig
	# Get only those tweets that pertain to a single topic in the training data
	if topic is not None:
	df = df.loc[df['Target'] == topic]
	X = df.Tweet.values
	stances = ["AGAINST", "FAVOR", "NONE", "UNKNOWN"]
	class_nums = {s: i for i, s in enumerate(stances)}
	Y = np.array([class_nums[s] for s in df.Stance])
	return X, Y
No results found