Skip to content

Instantly share code, notes, and snippets.

@KentaroAOKI
Created August 6, 2015 14:00
Show Gist options
  • Save KentaroAOKI/51d4dc92bae918b52cd5 to your computer and use it in GitHub Desktop.
Save KentaroAOKI/51d4dc92bae918b52cd5 to your computer and use it in GitHub Desktop.
divide letters for text analysis
# The script MUST contain a function named azureml_main
# which is the entry point for this module.
#
# The entry point function can contain up to two input arguments:
# Param<dataframe1>: a pandas.DataFrame
# Param<dataframe2>: a pandas.DataFrame
# coding: UTF-8
def azureml_main(dataframe1 = None, dataframe2 = None):
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
dy, dx = dataframe1.shape
data = [0] * dy
namedata = dataframe1["name1"]
for y in range(0,dy):
line = namedata[y]
output_line = '';
x = 0
xe = len(line);
while x < xe:
if ord(line[x]) >= 128:
output_line = output_line + hex(ord(line[x])) + hex(ord(line[x+1])) + hex(ord(line[x+2])) + ' '
x = x + 2
else:
output_line = output_line + hex(ord(line[x])) + ' '
x = x + 1
data[y] = output_line
dataframe1["name_tx"] = Series(data)
# Return value must be of a sequence of pandas.DataFrame
return dataframe1,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment