|
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.preprocessing import OneHotEncoder
|
|
from sklearn.compose import ColumnTransformer
|
|
from sklearn.metrics import mean_squared_error
|
|
import joblib
|
|
|
|
|
|
def generate_training_data(num_samples=1000):
|
|
products = ['Electronics', 'Clothing', 'Books', 'Home Goods']
|
|
data = []
|
|
for _ in range(num_samples):
|
|
product_type = np.random.choice(products)
|
|
num_clicks = np.random.randint(1, 100)
|
|
|
|
if product_type == 'Electronics':
|
|
price = round(100 + 2 * num_clicks + np.random.normal(0, 20), 2)
|
|
elif product_type == 'Clothing':
|
|
price = round(30 + 0.5 * num_clicks + np.random.normal(0, 10), 2)
|
|
elif product_type == 'Books':
|
|
price = round(10 + 0.1 * num_clicks + np.random.normal(0, 5), 2)
|
|
else:
|
|
price = round(50 + 1 * num_clicks + np.random.normal(0, 15), 2)
|
|
data.append([product_type, num_clicks, price])
|
|
df = pd.DataFrame(data, columns=['product_type', 'num_clicks', 'price'])
|
|
return df
|
|
|
|
|
|
training_data = generate_training_data(num_samples=1000)
|
|
|
|
|
|
X = training_data[['product_type', 'num_clicks']]
|
|
y = training_data['price']
|
|
|
|
|
|
preprocessor = ColumnTransformer(
|
|
transformers=[
|
|
('onehot', OneHotEncoder(), ['product_type'])],
|
|
remainder='passthrough')
|
|
|
|
X_processed = preprocessor.fit_transform(X)
|
|
|
|
|
|
feature_names = preprocessor.get_feature_names_out(['product_type', 'num_clicks'])
|
|
X_processed_df = pd.DataFrame(X_processed, columns=feature_names)
|
|
|
|
|
|
X_train = X_processed_df
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X_train, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
model = LinearRegression()
|
|
model.fit(X_train, y_train)
|
|
|
|
|
|
y_pred = model.predict(X_test)
|
|
mse = mean_squared_error(y_test, y_pred)
|
|
print(f"Mean Squared Error on Test Set: {mse}")
|
|
|
|
|
|
joblib.dump(model, 'price_prediction_model.joblib')
|
|
joblib.dump(preprocessor, 'price_preprocessor.joblib')
|
|
|
|
print("Trained Linear Regression model and preprocessor saved.") |