Skip to content

Instantly share code, notes, and snippets.

View kyoto-cheng's full-sized avatar
🎯
Focusing

Cheng ✨ kyoto-cheng

🎯
Focusing
  • London, UK
  • 15:06 (UTC +01:00)
View GitHub Profile
# Remove outliers as defined
df = df[50000 <= df['avg_salary']]
df = df[df['avg_salary'] < 200000]
def salary_category(salary):
if 50000 <= salary < 75000:
return 1
if 75000 <= salary < 100000:
return 2
if 100000 <= salary < 125000:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('data_cleaned.csv')
# Select features for the pivot table
df_pivots = df[['Rating', 'Type of ownership', 'Industry', 'Sector', 'Revenue', 'Size', 'Age', 'Python', 'R', 'SQL', 'AWS', 'Excel', 'GCP',
'Azure', 'Spark', 'PyTorch', 'TensorFlow', 'Tableau', 'Keras', 'Job', 'Seniority','avg_salary']]
from wordcloud import WordCloud, ImageColorGenerator, STOPWORDS
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
words = " ".join(df['Job Description'])
def punctuation_stop(text):
"""remove punctuation and stop words"""
filtered = []
stop_words = set(stopwords.words('english'))
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords
stopWords = set(stopwords.words('english'))
df = pd.read_csv('data.csv')
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium import webdriver
import time
import pandas as pd
def get_jobs(keyword, num_jobs, verbose):
'''Gathers jobs as a dataframe, scraped from Glassdoor'''
# Initializing the webdriver
def predict(cls, test_data):
# Character cleaning function that replace a certain set of special characters with spaces
def char_clean(string):
new_string = re.sub('[^a-zA-Z0-9&+@ \n\.]', ' ', string)
new_string = ' '.join(new_string.split())
return new_string
# Input dataframe as test_data.
df = test_data
import spacy
import random
# Example of training data. For me, I am training a spaCy model to do NER task.
TRAIN_DATA =
[
('Amazon co ca', {'entities': [(0, 6, 'BRD')]}),
('AMZNMKTPLACE AMAZON CO', {'entities': [(13, 19, 'BRD')]}),
('APPLE COM BILL', {'entities': [(0, 5, 'BRD')]}),
('BOOKING COM New York City', {'entities': [(0, 7, 'BRD')]}),