This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def repartition_par_pourcentiles(values, pct_step=5): | |
""" | |
Calcule la répartition par tranche de population et renvoie bornes, effectifs et proportions. | |
""" | |
series = pd.Series(values).dropna().sort_values().reset_index(drop=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Le modèle S+ repose sur trois approches complémentaires afin de maximiser la précision et l'efficacité de la détection. La première approche consiste à analyser les dépôts en espèces, particulièrement ceux effectués de manière inhabituelle ou fréquente. La deuxième approche se concentre sur la recherche de mots clés spécifiques directement dans les informations associées aux virements bancaires, visant à repérer des indices linguistiques liés au commerce illégal d'espèces sauvages. Enfin, la troisième approche identifie les comportements transactionnels atypiques, tels que des volumes inhabituels, des fréquences élevées, ou des montants significatifs de paiements et de virements internationaux, notamment vers ou depuis des pays considérés à risque pour ce type de criminalité. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1. Import des librairies nécessaires | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from matplotlib import colors | |
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
# On suppose que df_merged est déjà chargé avec toutes les colonnes nécessaires. | |
# Pour cet exemple, nous utilisons la colonne 'PARTY_TYPE_CODE' qui doit contenir | |
# 1 pour les particuliers et 2 pour les entreprises. | |
# Initialisation de la colonne de score total | |
df_merged['risk_score_total'] = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# 1. Création d'une nouvelle colonne pour distinguer les top 10 | |
df_merged['highlight_top10'] = 0 | |
df_merged.loc[df_final_top10.index, 'highlight_top10'] = 1 | |
# 2. Nouveau graphe PCA avec 3 groupes : | |
# - 0 = normal | |
# - 1 = anomalies hors top 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from sklearn.cluster import DBSCAN | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# === ÉTAPE 1 : Préparation des données === | |
# Sélection des colonnes utilisées pour la détection d'anomalies |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import IsolationForest | |
from sklearn.cluster import DBSCAN | |
from sklearn.neighbors import LocalOutlierFactor | |
from sklearn.covariance import EllipticEnvelope | |
from sklearn.svm import OneClassSVM | |
from sklearn.decomposition import PCA | |
from sklearn.preprocessing import StandardScaler | |
import matplotlib.pyplot as plt | |
import seaborn as sns |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.ensemble import IsolationForest | |
from sklearn.cluster import DBSCAN | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from tensorflow.keras.layers import Input, Dense | |
from tensorflow.keras.models import Model | |
from tensorflow.keras.callbacks import EarlyStopping |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.ensemble import IsolationForest | |
from sklearn.cluster import DBSCAN | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from tensorflow.keras.layers import Input, Dense | |
from tensorflow.keras.models import Model | |
from tensorflow.keras.callbacks import EarlyStopping | |
import matplotlib.pyplot as plt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df['col1'] = df['col1'].fillna(df['col2']) | |
# --- 2. Prétraitement du DataFrame TV2 --- | |
# On s'attend à ce que TV2 contienne les colonnes suivantes en MAJUSCULE : | |
# PARTY_KEY, ROLE, PARTY_TYPE_CODE, RISQUE_SECTEUR, BASE_CURR_AMOUNT, etc. | |
TV2['PARTY_TYPE_CODE'] = TV2['PARTY_TYPE_CODE'].astype(int) | |
# Agrégation générale par PARTY_KEY pour les indicateurs financiers | |
tv2_agg = TV2.groupby('PARTY_KEY').agg( |
NewerOlder