Skip to content

Instantly share code, notes, and snippets.

@misraX
Last active March 5, 2025 15:46
Show Gist options
  • Save misraX/399f19920821758cda586c2e82aedc65 to your computer and use it in GitHub Desktop.
Save misraX/399f19920821758cda586c2e82aedc65 to your computer and use it in GitHub Desktop.
Bash Script To Install Python Packages and Generate requirements.txt Inside Python Virtualenv.
#!/bin/bash
#
# Created By: misraX
# Github: github.com/misrax
# License: MIT
# CopyWrite: 2017
# Bash script to install and generate pip requirements.txt from inside a virtualenv.
# By using pip freeze to creat a list of all the virtualenviroment pip packeges.
# It's just a simple way of creating the pip requirements.txt for a development use.
# usage pip-install package1 package2 package3 .. package(n)... etc
#
packages=($1)
echo "Starting to install packages, Hooopa....."
#check virtualenv path
if [[ "$VIRTUAL_ENV" != "" ]]; then
echo "You are in a working virtualenv $VIRTUAL_ENV";
# virtual_env > check if packages is empty .. if [[ ]]; then
if [[ "$packages" != "" ]]; then
pip install "${packages[@]}";
echo "Whre do u wanna save your requirements.txt type the path and click [ENTER]"
ls "$VIRTUAL_ENV"
read requirements_path
# virtual_env > > package > if read is empty ... if [[ ]]; then
if [[ "$requirements_path" == "" ]]; then
echo "Oh pleae, Enter something :D, now to call this script again use prepare"
exit 1;
# virtual_env > packages > read > if virtualevn/read is a valid directory.. elif [[ ]]; then
elif [[ -d "$VIRTUAL_ENV/$requirements_path" ]]; then
#statements
echo "creating requirements.txt";
pip freeze > "$VIRTUAL_ENV/$requirements_path/requirements.txt"
# > virtul_env > package > read > not a valid directory .. else; fi
else
echo "This is not a vail directory under $(ls $VIRTUAL_ENV) path"
fi
# virtual_env > packages .. else; fi
else
echo "Please enter at least one package to install";
exit 1;
fi
# virtual_env .. else; fi
else
echo "You are not in a working virtualenv"
echo "Exiting .........."
exit 1;
fi
#
#TODO
#adding enviroments variable to set DJANG_PATH and REQUIRMETS_PATH.
#
@swetabhmukherjee
Copy link

swetabhmukherjee commented Sep 23, 2019

I'm trying to embed a requirements.txt file with the package variable but its showing an 'invalid requirement' error for the same. Please help. I want the packages to be installed via a text file, i.e. requirements.txt and the package dependencies via the generated additional_requirements.txt file.

P.S. My code works when only one package is specified in the requirements.txt file but fails when more than one package name is entered.

@mamparra69
Copy link

import os
import pandas as pd
import numpy as np
from math import radians, sin, cos, atan2, sqrt
from datetime import datetime

-----------------------------------------------------------------------------

CONFIGURATION

-----------------------------------------------------------------------------

DATA_FOLDER = "./" # Folder where your Excel files are stored
EXCEL_FILES = [
# List all your Excel filenames here:
"WK.xlsx",
"27727861641.xlsx",
"G(1).xlsx",
"Schalk Pienaar.xlsx",
"CR.xlsx",
"BP_1123383_-Police_Case_Query-_AMA.xlsx",
"summary of cell numbers.xlsx",
"Manie 27823078393_1.xlsx",
"Manie 27823078393_2.xlsx"
]

Update these lists as needed or glob them automatically if you prefer.

Phone number references (update to match your data format exactly)

MANIE = "+27823078393"
SP = "+27824634777"
GG = "+27637762604"
JU = "+27727861641"
CALLIE = "+27834124723"
WK = "+27825248358" # Example victim

Columns in your Excel data (update to match your actual column names)

COL_CALLING = "CallingNumber" # e.g. "MSISDN" or "CallingNumber"
COL_CALLED = "CalledNumber" # e.g. "Other Party"
COL_LAT = "Latitude"
COL_LON = "Longitude"
COL_DATE = "Start Date" # e.g. "StartDate", "Date/Time"
COL_DURATION = "Call Duration" # e.g. "Call Duration", "Duration"

-----------------------------------------------------------------------------

HELPER FUNCTIONS

-----------------------------------------------------------------------------

def load_all_excel(files, folder=DATA_FOLDER):
"""
Loads and concatenates all Excel files into a single pandas DataFrame.
"""
combined_df = pd.DataFrame()
for f in files:
path = os.path.join(folder, f)
if os.path.exists(path):
df = pd.read_excel(path)
combined_df = pd.concat([combined_df, df], ignore_index=True)
else:
print(f"WARNING: File not found -> {path}")
return combined_df

def haversine_distance(lat1, lon1, lat2, lon2):
"""
Calculate Haversine distance (in meters) between two lat/lon pairs.
"""
R = 6371_000 # Radius of Earth in meters
phi1, phi2 = radians(lat1), radians(lat2)
dphi = radians(lat2 - lat1)
dlambda = radians(lon2 - lon1)

a = sin(dphi / 2)**2 + cos(phi1) * cos(phi2) * sin(dlambda / 2)**2
c = 2 * atan2(sqrt(a), sqrt(1 - a))
return R * c

-----------------------------------------------------------------------------

MAIN ANALYSIS

-----------------------------------------------------------------------------

def main():
# 1. Load & clean data
df = load_all_excel(EXCEL_FILES, DATA_FOLDER)
if df.empty:
print("No data loaded. Please check your file paths and names.")
return

# Ensure consistent column names (if needed)
# E.g., rename columns if they differ:
# df.rename(columns={
#     "MSISDN": "CallingNumber",
#     "Other Party": "CalledNumber",
#     "StartDate": "Start Date",
#     "Call Duration": "Call Duration",
# }, inplace=True)

# 2. Convert date column to datetime
df[COL_DATE] = pd.to_datetime(df[COL_DATE], errors='coerce')

# -------------------------------------------------------------------------
# (A) Instances where 2+ phones were within 15m at the same time
# -------------------------------------------------------------------------
# Approach:
# 1) Sort df by time
# 2) For each record, compare location with other records at "roughly" the same time
# 3) If distance <= 15m, store in a results list
# 
# This can be time-consuming if you have large data sets. For large data, 
# consider more efficient methods (e.g., spatial indexing, chunking).
# 
# We'll do a naive approach for demonstration:

df_sorted = df.dropna(subset=[COL_LAT, COL_LON, COL_DATE]).sort_values(by=COL_DATE)
records = df_sorted.to_dict(orient='records')
proximity_results = []

# Let's define a time window (e.g., same minute or same second).
# For "at the same time," you might define a threshold, e.g. ±30 seconds.
TIME_THRESHOLD = pd.Timedelta(seconds=30)

for i in range(len(records)):
    r1 = records[i]
    for j in range(i+1, len(records)):
        r2 = records[j]
        
        # If r2 is beyond the time threshold, break (since it's sorted by time)
        if abs(r2[COL_DATE] - r1[COL_DATE]) > TIME_THRESHOLD:
            break
        
        # Calculate distance
        dist = haversine_distance(r1[COL_LAT], r1[COL_LON], r2[COL_LAT], r2[COL_LON])
        if dist <= 15:
            proximity_results.append({
                "Phone1": r1[COL_CALLING],
                "Phone2": r1[COL_CALLED],
                "Phone1_Lat": r1[COL_LAT],
                "Phone1_Lon": r1[COL_LON],
                "Phone2_Calling": r2[COL_CALLING],
                "Phone2_Called": r2[COL_CALLED],
                "Phone2_Lat": r2[COL_LAT],
                "Phone2_Lon": r2[COL_LON],
                "Distance_m": round(dist, 2),
                "Time1": r1[COL_DATE],
                "Time2": r2[COL_DATE],
            })

proximity_df = pd.DataFrame(proximity_results)
proximity_df.to_csv("proximity_15m.csv", index=False)
print(f"(A) Proximity analysis complete. Results -> proximity_15m.csv")

# -------------------------------------------------------------------------
# (B) Communication between Manie, SP, GG, and JU
# -------------------------------------------------------------------------
# We'll define a set of interest and see if either CallingNumber or CalledNumber is in that set.

interest_set = {MANIE, SP, GG, JU}

def in_interest_set(row):
    return (row[COL_CALLING] in interest_set) or (row[COL_CALLED] in interest_set)

df_comm = df[df.apply(in_interest_set, axis=1)].copy()
df_comm.to_csv("communication_manie_sp_gg_ju.csv", index=False)
print("(B) Communication between Manie, SP, GG, and JU -> communication_manie_sp_gg_ju.csv")

# -------------------------------------------------------------------------
# (C) Interactions between GG, Callie, and JU in 14/11/2022 - 22/11/2022
# -------------------------------------------------------------------------
# Filter by date range and phone numbers
start_date = pd.to_datetime("2022-11-14")
end_date = pd.to_datetime("2022-11-22")  # inclusive or exclusive, adjust as needed

# We consider calls from or to GG, Callie, JU
subset_phones = {GG, CALLIE, JU}
df_subset = df[
    (df[COL_DATE] >= start_date) &
    (df[COL_DATE] <= end_date) &
    (
        (df[COL_CALLING].isin(subset_phones)) |
        (df[COL_CALLED].isin(subset_phones))
    )
].copy()

df_subset.to_csv("gg_callie_ju_14_11_to_22_11.csv", index=False)
print("(C) Interactions for GG, Callie, JU (14/11/2022 - 22/11/2022) -> gg_callie_ju_14_11_to_22_11.csv")

# -------------------------------------------------------------------------
# (D) Interactions between GG and JU
# -------------------------------------------------------------------------
# Simple filter for calls or SMS between GG & JU in the entire dataset.

mask_gg_ju = (
    ((df[COL_CALLING] == GG) & (df[COL_CALLED] == JU)) |
    ((df[COL_CALLING] == JU) & (df[COL_CALLED] == GG))
)
df_gg_ju = df[mask_gg_ju].copy()

df_gg_ju.to_csv("gg_ju_interactions.csv", index=False)
print("(D) Interactions between GG and JU -> gg_ju_interactions.csv")

print("All analyses complete. CSV reports generated.")

if name == "main":
main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment