Skip to content

Instantly share code, notes, and snippets.

@roganjoshp
Created December 6, 2024 19:13
Show Gist options
  • Save roganjoshp/59bbabf8790aa217c878327f0dd07518 to your computer and use it in GitHub Desktop.
Save roganjoshp/59bbabf8790aa217c878327f0dd07518 to your computer and use it in GitHub Desktop.
Pandas string operations
from string import ascii_letters
import timeit
import numpy as np
import pandas as pd
letters = [
"".join(np.random.choice(list(ascii_letters), 10, replace=True))
for x in range(1000000)
]
df = pd.DataFrame({"a": letters})
def vectorized_approach(df):
# df["a"] = df["a"].str.lower()
df["a"].str.lower()
def non_vectorized(items):
[item.lower() for item in items]
if __name__ == "__main__":
print(
"Vectorized approach",
timeit.timeit(
"vectorized_approach(df)",
setup="from __main__ import vectorized_approach, df",
number=100,
),
)
print(
"Non vectorized approach",
timeit.timeit(
"non_vectorized(letters)",
setup="from __main__ import non_vectorized, letters",
number=100,
),
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment