This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import CountVectorizer | |
# normalize to lower case | |
df_lower = covid1_df['covid_prepare'].str.lower().to_frame() | |
df_lower['covid_prepare'].values | |
#Convert all including nan to strings | |
doc = df_lower['covid_prepare'].map(str).values | |
#delete nans | |
doc = np.delete(doc, np.where(doc == 'nan')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def createPropertyTypeCol(rental_df): | |
"""This function adds a column called property_type_class to the dataframe | |
Args: | |
rental_df ([type]): [The dataframe has a column called property_type] | |
Returns: | |
[type]: [Dataframe with a more concise list of property types] | |
""" | |
# Property types Private room and Shared Room identified |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def createRentalMap(df, fixed_radius, nbd_or_grp): | |
# Initialize a Folium map. Center it to the mean of latitude and longitude of the entire dataset | |
this_map = folium.Map( | |
location=[df["latitude"].mean(), df["longitude"].mean()], | |
tiles="CartoDB positron", | |
zoom_start=13, | |
) | |
def plotDot(point_row, fixed_radius, nbd_or_grp): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import datetime | |
# The historical data setup | |
assets = ['Automobiles', 'Building'] * 2 | |
countries = ['Andorra', 'Andorra', 'Brunei', 'Brunei'] | |
rates = [12.5, 4.5, 14.0, 6.5] | |
effective_from = ['2019-01-01'] * 4 | |
effective_until = [None] * 4 | |
depreciation_rate_df = pd.DataFrame(list(zip(assets, countries, rates, effective_from, effective_until)), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Oracle Merge | |
merge into depreciation_rate d | |
using | |
(select asset, | |
country, | |
effective_from, | |
lead(effective_from) over (partition by asset, country order by effective_from) - 1/86400 as effective_until | |
from depreciation_rate) n | |
on (d.asset = n.asset | |
and d.country = n.country |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Create a GroupBy object | |
df_groups = df.groupby(['Destination City Name', 'Origin State', 'Origin City', 'Unique Carrier']) | |
new_df = df_groups.agg(Total_Flights = ('Flight Date','count')) | |
new_df['Count Delayed Flights'] = df_groups.apply(lambda x : x[x['Arrival Delay'] > 0]['Arrival Delay'].count() ) | |
new_df['Delayed Per Origin'] = new_df.groupby(['Destination City Name', 'Origin State', 'Origin City'])['Count Delayed Flights'].transform('sum') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with cf as (select | |
unique_carrier, | |
origin_city, | |
origin_state, | |
destination_city_name, | |
destination_state, | |
sum (case when arrival_delay > 0 then 1 else 0 end) as count_delayed_flights, | |
count(arrival_delay) as total_flight_count | |
from "ArrivalInfo" | |
group by |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 'dataset' holds the input data for this script | |
import pandas as pd | |
df_groups = dataset.groupby(['Destination City Name', 'Origin State', 'Origin City', 'Unique Carrier']) | |
final_df = df_groups.agg(Total_Flights = ('Flight Date','count')) | |
final_df['Count of Delayed Flights'] = df_groups.apply(lambda x : x[x['Arrival Delay'] > 0]['Arrival Delay'].count() ) | |
final_df = final_df.reset_index() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#'MetAreaPopAndTeams_df' and 'NHLData_df' hold the input data for this script | |
import pandas as pd | |
MetAreaPopAndTeams_df['Team Name Map'] = MetAreaPopAndTeams_df['NHL Teams'].apply(lambda teamname: NHLData_df[NHLData_df['Team'].str.contains(teamname)]['Team'].any(0)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Option 1 | |
select metropolitanarea, | |
nhl_team, | |
team, | |
win, | |
loss | |
from cities_expanded c, | |
nhl_data n | |
where n.team like '%' || nhl_team || '%'; |