Skip to content

Instantly share code, notes, and snippets.

@vallantin
Last active October 29, 2018 14:19
Show Gist options
  • Save vallantin/c59d459587fb1a7bca10a18b2f0a438b to your computer and use it in GitHub Desktop.
Save vallantin/c59d459587fb1a7bca10a18b2f0a438b to your computer and use it in GitHub Desktop.
from rasa_nlu.training_data import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.model import Trainer, Metadata, Interpreter
from rasa_nlu import config
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import jsonpickle
def remove_html(filename):
soup = BeautifulSoup(open(filename), 'lxml')
[x.extract() for x in soup.findAll('script')]
[x.extract() for x in soup.findAll('noscript')]
[x.extract() for x in soup.findAll('style')]
raw_txt = soup.find('div', {'class':'centerColAlign'}).get_text()
text = []
for word in raw_txt.split(' '):
for subword in word.split('\n'):
if subword is not '':
text.append(subword)
text = ' '.join(text)
text = text.replace('\n', '')
return text
def train(data, config_file, model_dir):
print('Training, wait...')
training_data = load_data(data)
configuration = config.load(config_file)
trainer = Trainer(configuration)
trainer.train(training_data)
model_directory = trainer.persist(model_dir, fixed_model_name = 'amazon')
print('training completed.')
def predict(filename):
print('Starting to predict...')
interpreter = Interpreter.load('./models/nlu/default/amazon')
text = remove_html(filename)
json = interpreter.parse(text)
json = jsonpickle.encode(json)
json = jsonpickle.decode(json)
return json
train('./data/smartwatches_training.json', './config/config.yml', './models/nlu')
predictions = predict('amazon.html')
print(predictions)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment