-
-
Save Fazzani/18b560588b7f297e2323070ec8231660 to your computer and use it in GitHub Desktop.
Custom Faker Provider
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on the following: | |
# https://www.datainsightonline.com/post/how-to-generate-fake-dataset-with-python-faker-library | |
# https://deparkes.co.uk/2020/12/28/python-fake-data-with-faker/ | |
from faker.providers import BaseProvider | |
from faker import Faker | |
import pandas as pd | |
from loguru import logger | |
fake = Faker("en_GB") | |
for i in range(5): | |
logger.info("order: %s" % fake.bothify(text="ord-###")) | |
for i in range(5): | |
logger.info("time: %s" % fake.date_between(start_date="-2y", end_date="today")) | |
for i in range(5): | |
logger.info("name: %s" % fake.name()) | |
class MyProvider(BaseProvider): | |
__provider__ = "item_category" | |
__provider__ = "food" | |
__provider__ = "fruit" | |
item_categories = ["food", "fruit"] | |
foods = ["rice", "yam", "beans", "spaghetti"] | |
fruits = ["orange", "mango", "banana", "apple"] | |
def item_category(self): | |
return self.random_element(self.item_categories) | |
def food(self): | |
return self.random_element(self.foods) | |
def fruit(self): | |
return self.random_element(self.fruits) | |
fake.add_provider(MyProvider) | |
for i in range(5): | |
logger.info("category: %s" % fake.item_category()) | |
def link_variables(): | |
item_cat = fake.item_category() | |
item = fake.fruit() if item_cat == "fruit" else fake.food() | |
return {"Item_Category": item_cat, "Item_Name": item} | |
for i in range(5): | |
logger.info("variables: %s" % link_variables()) | |
thelist = [] | |
for x in range(100): | |
dataset = { | |
"Order_ID": fake.bothify(text="ord-###"), | |
"Order_Date": fake.date_between(start_date="-2y", end_date="today"), | |
"Customer_Name": fake.name() | |
} | |
dataset_copy = dataset.copy() | |
for key, value in link_variables().items(): | |
dataset_copy[key] = value | |
thelist.append(dataset_copy) | |
dataset_frame = pd.DataFrame(thelist) | |
logger.info("\n%s" % dataset_frame.head(10)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment