Note: Assuming you add
--bootstrap-serverto all of them
> kcat -C -t <TOPIC> -e -o -beginning | jq -r '. | {id: .id, name: .name, tombstone: .tombstone} | [.[]] | @csv' > prod_companies.csv
| joined_vector = VectorAssembler(inputCols=['gender_dummy', 'level_dummy', 'logSessionCount', | |
| 'sqrtMeanSongCount', 'sqrtSessionsFreqDay'], | |
| outputCol='nonScaledFeatures')\ | |
| .transform(joined) | |
| joined_vector = joined_vector.withColumn('label', joined_vector.churned.cast('integer')) | |
| joined_vector.drop('userId','level','gender', 'sessionCount', 'meanSongCount', | |
| 'sessionsFreqDay', 'gender_idx', 'level_idx', 'churned').show(4) |
| # Keep a list for Pipeline stages | |
| stages = [] | |
| # Convert categorical variables to indexes | |
| indexers = [StringIndexer(inputCol=column, outputCol=column+"_idx").fit(j) for column in ['level', 'gender']] | |
| # Convert indexes to OnHotEncoded Sparse Vectors | |
| onehotencoder = OneHotEncoderEstimator(inputCols=['gender_idx', 'level_idx'], | |
| outputCols=['gender_dummy','level_dummy']) |
| # Our Grid Search object allows us to define various hyperparameters to test our model | |
| params = ParamGridBuilder()\ | |
| .addGrid(classifier.maxDepth, [2, 5, 10])\ | |
| .addGrid(classifier.featureSubsetStrategy, ['all', 'onethird', 'sqrt', 'log2'])\ | |
| .build() | |
| # Define the evaluator, this will measure teh success of model(s) | |
| evaluator = binary_evaluator = BinaryClassificationEvaluator(labelCol='churned') | |
| # CrossValidator will build pipeline, create models based on ParamGridBuilder, |
| import re | |
| import nltk | |
| nltk.download(['punkt', 'stopwords', 'wordnet']) | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| def tokenize(text): | |
| """ |
| import random | |
| from collections import Counter | |
| class Person: | |
| def __init__(self, issick): | |
| self.issick = issick | |
| def __repr__(self): | |
| if self.issick: |
| library(highcharter) | |
| library(dplyr) | |
| mapdata <- get_data_from_map(download_map_data("countries/gb/gb-all")) | |
| set.seed(1234) | |
| data_fake <- mapdata %>% | |
| select(code = `hc-a2`) %>% | |
| mutate(value = 1e5 * abs(rt(nrow(.), df = 10))) |
Note: Assuming you add
--bootstrap-serverto all of them
> kcat -C -t <TOPIC> -e -o -beginning | jq -r '. | {id: .id, name: .name, tombstone: .tombstone} | [.[]] | @csv' > prod_companies.csv
| # I couldn't get return generators from chains so I had to do a bit of low level SSE, Hope this is useful | |
| # Probably you'll use another Vector Store instead of OpenSearch, but if you want to mimic what I did here, | |
| # please use the fork of `OpenSearchVectorSearch` in https://github.com/oneryalcin/langchain | |
| import json | |
| import os | |
| import logging | |
| from typing import List, Generator |
| from langchain.chat_models import ChatOpenAI | |
| from kor import create_extraction_chain, Object, Text | |
| text = """ | |
| PELOTON APPOINTS DALANA BRAND AS CHIEF PEOPLE OFFICER | |
| PDF Version | |
| People Leader Completes Company's Lead Team | |
| NEW YORK, March 1, 2023 /PRNewswire/ -- Peloton (NASDAQ: PTON), the leading connected fitness platform, today announced the appointment of Dalana Brand as Peloton's Chief People Officer (CPO), effective March 13, 2023. As a seasoned executive with significant global leadership experience in multiple industries, Brand joins the team with a strong reputation for organizational transformation. She will report to CEO Barry McCarthy and serve as a member of the leadership team, leading the company's Global People Team. |