Created
April 29, 2022 14:54
-
-
Save thoroc/19a8401f3d4724a0255764d6e5317025 to your computer and use it in GitHub Desktop.
Custom Faker Provider
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on the following: | |
# https://www.datainsightonline.com/post/how-to-generate-fake-dataset-with-python-faker-library | |
# https://deparkes.co.uk/2020/12/28/python-fake-data-with-faker/ | |
from faker.providers import BaseProvider | |
from faker import Faker | |
import pandas as pd | |
from loguru import logger | |
fake = Faker("en_GB") | |
for i in range(5): | |
logger.info("order: %s" % fake.bothify(text="ord-###")) | |
for i in range(5): | |
logger.info("time: %s" % fake.date_between(start_date="-2y", end_date="today")) | |
for i in range(5): | |
logger.info("name: %s" % fake.name()) | |
class MyProvider(BaseProvider): | |
__provider__ = "item_category" | |
__provider__ = "food" | |
__provider__ = "fruit" | |
item_categories = ["food", "fruit"] | |
foods = ["rice", "yam", "beans", "spaghetti"] | |
fruits = ["orange", "mango", "banana", "apple"] | |
def item_category(self): | |
return self.random_element(self.item_categories) | |
def food(self): | |
return self.random_element(self.foods) | |
def fruit(self): | |
return self.random_element(self.fruits) | |
fake.add_provider(MyProvider) | |
for i in range(5): | |
logger.info("category: %s" % fake.item_category()) | |
def link_variables(): | |
item_cat = fake.item_category() | |
item = fake.fruit() if item_cat == "fruit" else fake.food() | |
return {"Item_Category": item_cat, "Item_Name": item} | |
for i in range(5): | |
logger.info("variables: %s" % link_variables()) | |
thelist = [] | |
for x in range(100): | |
dataset = { | |
"Order_ID": fake.bothify(text="ord-###"), | |
"Order_Date": fake.date_between(start_date="-2y", end_date="today"), | |
"Customer_Name": fake.name() | |
} | |
dataset_copy = dataset.copy() | |
for key, value in link_variables().items(): | |
dataset_copy[key] = value | |
thelist.append(dataset_copy) | |
dataset_frame = pd.DataFrame(thelist) | |
logger.info("\n%s" % dataset_frame.head(10)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is vry help full for me