Last active
January 30, 2025 09:36
-
-
Save pajachiet/62eb85805cee55053d208521e0bdaf13 to your computer and use it in GitHub Desktop.
Scripting on Superset backend to fix bugs or automate tasks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf8 | |
# Script to create superset users | |
import pandas as pd | |
from sqlalchemy import create_engine | |
import datetime | |
import pexpect | |
import os | |
DB = create_engine('mysql+pymysql://superset:superset@localhost/superset') | |
CSV_USER = 'ext_source/superset_users.csv' | |
CSV_PASSWORD = 'secrets/email_password.csv' | |
DOCKER_PATH = '/usr/local/bin' # Useful for Mac | |
def main(): | |
create_missing_users() | |
# reset_all_passwords() | |
def create_missing_users(): | |
""" Create users present in csv file but missing in Superset""" | |
print("== Creating users from csv file, if they do not exist in Superset") | |
df_existing_users = pd.read_sql_table('ab_user', DB) | |
df_password_users = pd.read_csv(CSV_PASSWORD, sep=';') | |
df_csv_users = pd.read_csv(CSV_USER, sep=';') | |
df_csv_users['username'] = concat_username(df_csv_users['first_name'], df_csv_users['last_name']) | |
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
df_csv_users['now'] = "TIMESTAMP '{}'".format(now) | |
for user_row in df_csv_users.iterrows(): | |
user_dict = user_row[1].to_dict() | |
# Skip existing users | |
to_insert = True | |
for key in ['username', 'email']: | |
if user_dict[key] in df_existing_users[key].values: | |
print("{} {} already exists in Superset.".format(key, user_dict[key])) | |
to_insert = False | |
break | |
# Insert others | |
if to_insert: | |
print("Creating user : {first_name}, {last_name}, {username}, {email}".format(**user_dict)) | |
query = """ | |
INSERT INTO ab_user (first_name, last_name, username, active, email, created_on, changed_on, created_by_fk, changed_by_fk, login_count, fail_login_count) | |
VALUES ('{first_name}', '{last_name}', '{username}', 1, '{email}', {now}, {now}, 1, 1, 0, 0); | |
""".format(**user_dict) | |
connection = DB.connect() | |
connection.execute(query) | |
connection.close() | |
password = get_corresponding_value(df_password_users, 'email', user_dict['email'], 'password') | |
if password is None: | |
print("No password defined for user with email '{email}'. You will have to reset it manually".format(**user_dict)) | |
else: | |
print("Reset password of user '{username}' with email '{email}'".format(**user_dict)) | |
reset_password(username=user_dict['username'], password=password) | |
def concat_username(first_name, last_name): | |
return first_name + '.' + last_name | |
def reset_all_passwords(): | |
""" Reset all passwords from password file""" | |
print("== Reset all users passwords from secret file") | |
df_csv_users = pd.read_csv(CSV_USER, sep=';') | |
df_password_users = pd.read_csv(CSV_PASSWORD, sep=';') | |
for user_row in df_password_users.iterrows(): | |
user_dict = user_row[1].to_dict() | |
email = user_dict['email'] | |
password = user_dict['password'] | |
first_name = get_corresponding_value(df_csv_users, 'email', email, 'first_name') | |
last_name = get_corresponding_value(df_csv_users, 'email', email, 'last_name') | |
if first_name is None or last_name is None: | |
print("No user found for email {} in {}\n".format(email, CSV_USER)) | |
continue | |
username = concat_username(first_name, last_name) | |
reset_password(username, password) | |
def reset_password(username, password): | |
""" Reset password of user, by executing fabmanager script in docker | |
""" | |
env = os.environ.copy() | |
env['PATH'] = "{}{}{}".format(DOCKER_PATH, os.pathsep, env['PATH']) | |
child = pexpect.spawnu( | |
"""docker exec -ti superset sh -c \"fabmanager reset-password --username {} --app superset\"""".format(username), | |
env=env, | |
timeout=3) | |
child.expect('Password:') | |
child.sendline(password) | |
child.expect('Repeat for confirmation:') | |
child.sendline(password) | |
def get_corresponding_value(df, column_to_equal, value, column_to_get='id'): | |
""" Get corresponding value in column_to_get, for column_to_equal == value | |
Suppose that only one row match column_to_equal == value | |
""" | |
ids = list(df[df[column_to_equal] == value][column_to_get]) | |
if ids: | |
if len(ids) > 1: | |
raise ValueError("Value {} is present multiple times in columns {}".format(value, column_to_equal)) | |
return ids[0] | |
else: | |
return None | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf8 | |
# Set owner to admin for slices and dashboards without owner | |
# Add missing datasource_access permissions for tables | |
from sqlalchemy import create_engine | |
SUPERSET_BACKEND_URI = 'mysql+pymysql://superset:superset@localhost/superset' | |
def run_superset_backend_query(query_string): | |
db = create_engine(SUPERSET_BACKEND_URI) | |
return db.engine.execute(query_string) | |
def main(): | |
print("== Set missing owner to admin for slice, dashboard and tables") | |
run_superset_backend_query(missing_slice_owner_query) | |
run_superset_backend_query(missing_dashboard_owner_query) | |
run_superset_backend_query(missing_table_owner_query) | |
# The next part seemed to have been resolved with newer version of Superset (0.18.5) | |
print("== Create missing datasource access permission on tables") | |
create_datasource_access_perm_if_missing() | |
run_superset_backend_query(missing_table_permission_query) | |
run_superset_backend_query(missing_table_datasource_access_query) | |
missing_dashboard_owner_query = """ | |
INSERT INTO dashboard_user (user_id, dashboard_id) | |
SELECT 1, dashboards.id | |
FROM dashboards | |
WHERE dashboards.id NOT IN (SELECT dashboard_id FROM dashboard_user) | |
""" | |
missing_slice_owner_query = """ | |
INSERT INTO slice_user (user_id, slice_id) | |
SELECT 1, slices.id | |
FROM slices | |
WHERE slices.id NOT IN (SELECT slice_id FROM slice_user) | |
""" | |
missing_table_owner_query = """ | |
UPDATE tables | |
SET user_id=1 | |
WHERE user_id IS NULL; | |
""" | |
missing_table_permission_query = """ | |
INSERT INTO ab_view_menu (name) | |
SELECT tables.perm | |
FROM tables | |
WHERE tables.perm NOT IN (SELECT name FROM ab_view_menu) | |
""" | |
missing_table_datasource_access_query = """ | |
INSERT INTO ab_permission_view (permission_id, view_menu_id) | |
SELECT ab_permission.id, tables_view_id.id | |
FROM | |
( | |
SELECT ab_view_menu.id as id | |
FROM tables | |
LEFT JOIN ab_view_menu ON ab_view_menu.name = tables.perm | |
) as tables_view_id, | |
ab_permission | |
WHERE | |
ab_permission.name = 'datasource_access' AND | |
tables_view_id.id NOT IN (SELECT view_menu_id FROM ab_permission_view) | |
""" | |
def create_datasource_access_perm_if_missing(): | |
get_datasource_access_perm_query = """ | |
SELECT * FROM ab_permission WHERE ab_permission.name = 'datasource_access' | |
""" | |
result = run_superset_backend_query(get_datasource_access_perm_query) | |
rows = result.fetchall() | |
if not rows: # No datasource_access permission | |
create_datasource_access_perm_query = """ | |
INSERT INTO ab_permission (name) values ('datasource_access'); | |
""" | |
run_superset_backend_query(create_datasource_access_perm_query) | |
if __name__ == "__main__": | |
main() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf8 | |
# Reset roles for databases access | |
# Reset user roles | |
import pandas as pd | |
from six import string_types | |
from superset_backend import run_superset_backend_query | |
CSV_USER = 'ext_source/superset_users.csv' | |
def init_superset_user_role(): | |
print("== Delete non-default roles") | |
delete_non_admin_user_roles() | |
delete_non_default_roles() | |
print("== Create roles to access all datasources in databases") | |
create_databases_roles() | |
print("== Set roles to user from csv file") | |
set_user_roles() | |
def delete_non_admin_user_roles(): | |
""" Delete all user roles, except for admin user | |
""" | |
delele_non_admin_user_roles_query = """ | |
DELETE FROM ab_user_role WHERE user_id != 1; | |
""" | |
run_superset_backend_query(delele_non_admin_user_roles_query) | |
def delete_non_default_roles(): | |
""" Delete all non default roles from superset | |
Previously, we have to | |
- delete these roles attributed to users | |
- delete all permissions linked to theses roles | |
""" | |
delete_non_default_user_role_query = """ | |
DELETE FROM ab_user_role | |
WHERE role_id IN | |
(SELECT id FROM ab_role WHERE name NOT IN ('Admin', 'Alpha', 'Gamma', 'granter', 'Public', 'sql_lab')); | |
""" | |
run_superset_backend_query(delete_non_default_user_role_query) | |
delete_non_default_permission_view_role_query = """ | |
DELETE FROM ab_permission_view_role | |
WHERE role_id IN | |
(SELECT id FROM ab_role WHERE name NOT IN ('Admin', 'Alpha', 'Gamma', 'granter', 'Public', 'sql_lab')); | |
""" | |
run_superset_backend_query(delete_non_default_permission_view_role_query) | |
delete_non_default_roles_query = """ | |
DELETE FROM ab_role WHERE name NOT IN ('Admin', 'Alpha', 'Gamma', 'granter', 'Public', 'sql_lab'); | |
""" | |
run_superset_backend_query(delete_non_default_roles_query) | |
def create_missing_database_access_permission_view(): | |
""" Add database_access permission on all databases | |
""" | |
query = """ | |
INSERT INTO ab_permission_view (permission_id, view_menu_id) | |
SELECT ab_permission.id, ab_view_menu.id | |
FROM | |
ab_permission, | |
dbs LEFT JOIN ab_view_menu ON ab_view_menu.name = dbs.perm | |
WHERE | |
ab_permission.name = 'database_access' AND | |
NOT ((ab_permission.id, ab_view_menu.id) IN (SELECT permission_id, view_menu_id FROM ab_permission_view)) | |
""" | |
run_superset_backend_query(query) | |
def create_databases_roles(): | |
""" Create roles for databases access | |
- for each database defined in superset | |
- for all datasource defined in superset, associated to a database | |
""" | |
create_database_role_template = """ | |
INSERT INTO ab_role (name) VALUES ('{database}'); | |
""" | |
create_database_permission_role_template = """ | |
INSERT INTO ab_permission_view_role (role_id, permission_view_id) | |
-- datasource access on all table based on database | |
SELECT | |
ab_role.id, ab_permission_view.id | |
FROM | |
ab_role, | |
ab_permission_view | |
LEFT JOIN ab_permission on ab_permission.id = ab_permission_view.permission_id | |
LEFT JOIN ab_view_menu on ab_view_menu.id = ab_permission_view.view_menu_id | |
WHERE | |
ab_role.name = '{database}' AND | |
ab_permission.name = 'datasource_access' AND | |
ab_view_menu.name LIKE '[{database}]%%' | |
UNION | |
-- database_access on database | |
SELECT | |
ab_role.id, ab_permission_view.id | |
FROM | |
ab_role, | |
ab_permission_view | |
LEFT JOIN ab_permission on ab_permission.id = ab_permission_view.permission_id | |
LEFT JOIN ab_view_menu on ab_view_menu.id = ab_permission_view.view_menu_id | |
WHERE | |
ab_role.name = '{database}' AND | |
ab_permission.name = 'database_access' AND | |
ab_view_menu.name LIKE '[{database}]%%'; | |
""" | |
for database in get_databases_names(): | |
print("Creating role for database {} giving access to all datasource associated to it.".format(database)) | |
query = create_database_role_template.format(database=database) | |
run_superset_backend_query(query) | |
query = create_database_permission_role_template.format(database=database) | |
run_superset_backend_query(query) | |
def set_user_roles(): | |
""" Reset user roles based on csv file | |
""" | |
add_role_to_user_template = """ | |
INSERT INTO ab_user_role (user_id, role_id) | |
SELECT | |
ab_user.id as user_id, ab_role.id as role_id | |
FROM | |
ab_user, ab_role | |
WHERE | |
ab_user.email = "{email}" AND | |
ab_role.name = "{role_name}" | |
""" | |
df_csv_users = pd.read_csv(CSV_USER, sep=';') | |
print("Setting user roles") | |
for user_row in df_csv_users.iterrows(): | |
user_dict = user_row[1].to_dict() | |
email = user_dict['email'] | |
roles = user_dict['roles'] | |
if isinstance(roles, string_types): | |
print(" email '{}', roles : ".format(email), end='') | |
role_list = roles.split(',') | |
print(', '.join(role_list)) | |
for role in role_list: | |
query = add_role_to_user_template.format(email=email, role_name=role) | |
run_superset_backend_query(query) | |
def get_databases_names(): | |
""" List databases names defined in Superset | |
:return: database_list | |
""" | |
query = """SELECT database_name from dbs""" | |
database_list = [line[0] for line in run_superset_backend_query(query)] | |
return database_list | |
if __name__ == "__main__": | |
init_superset_user_role() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment