blob: 02450ad9fea24918bb2a30546188c3accb037280 [file] [log] [blame]
"""
Module to help generate fake data for testing purposes.
"""
import pandas as pd
from faker import Faker
# Initialize Faker
fake = Faker()
# Define the columns
columns = ["id", "name", "email", "address", "phone_number", "date_of_birth"]
# Generate fake data
data = {
"id": [i + 1 for i in range(100)],
"name": [fake.name() for _ in range(100)],
"email": [fake.email() for _ in range(100)],
"address": [fake.address() for _ in range(100)],
"phone_number": [fake.phone_number() for _ in range(100)],
"date_of_birth": [fake.date_of_birth() for _ in range(100)],
}
# Create a DataFrame
df = pd.DataFrame(data, columns=columns)
# Save to CSV
df.to_csv("data.csv", index=False)
# now create some fake purchase data
columns = ["id", "user_id", "product_id", "price", "purchase_date"]
# Generate fake data
fake_price_data = {i + 1: fake.random_int(min=1, max=1000) for i in range(20)}
product_ids = [fake.random_int(min=1, max=20) for _ in range(1000)]
data = {
"id": [i + 1 for i in range(1000)],
"user_id": [fake.random_int(min=1, max=100) for _ in range(1000)],
"product_id": product_ids,
"price": [fake_price_data[product_id] for product_id in product_ids],
"purchase_date": [fake.date_this_year() for _ in range(1000)],
}
# Create a DataFrame
df = pd.DataFrame(data, columns=columns)
# Save to SQLLite DB
import sqlite3
conn = sqlite3.connect("purchase_data.db")
df.to_sql("purchase_data", conn, index=False)
df.to_csv("purchase_data.csv", index=False)
conn.close()