Upload files to 'Demand_forcasting'

This commit is contained in:
2023-04-12 10:47:05 +00:00
parent d0511702f3
commit 4e149a3f57
+610
View File
@@ -0,0 +1,610 @@
from flask import Flask, render_template, send_file, request, redirect, Response
import os
import pandas as pd
import warnings
import json
import requests
from urllib.request import urlopen
warnings.filterwarnings("ignore")
app = Flask(__name__)
@app.route("/", methods=["GET"])
def home():
return 'forcasting app running'
######################################################################################################################
list_output=[]
def day(Num,df):
#url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata'
url='https://test.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata'
response = urlopen(url)
data_json = json.loads(response.read())
headers = {
'Authorization':'stat 27e6b51b278d444aa0b70ed60419b04c',
#'Authorization':'stat 873f2e6f70b3483e983972f96fbf5ea4',#qa
'Content-Type': 'application/json'
}
response = requests.request("GET", url, headers=headers, data=data_json)
#print("##############################################################")
a=response.text
# print(response.text)
import pandas as pd
df2 = pd.read_json(response.text, orient ='index')
df2=df2.reset_index()
df2.columns = ['key','value']
#print(df2)
a=df2['value'][0]
j=json.loads(a)
userdata = pd.DataFrame(j)
#df1
itemid=userdata[['itemname','itemid']]
itemid.columns = ['ItemName', 'ItemId']
#df1=pd.read_csv(r'./upload/' + name)
#df1=df1[df1['obdate']!='01/01/0001']
userdata.columns = ['journaldate','sum','itemname','itemid']
# import pandas as pd
# userdata = pd.read_csv(r'C:\Users\Bizga\Desktop\forcast\5yearsitems.csv')
# itemid = userdata[['itemname', 'itemid']]
#userdata['journaldate'] = pd.to_datetime(userdata['journaldate'])
userdata["journaldate"] = userdata["journaldate"].astype(str)
userdata[["year", "month", "day"]] = userdata["journaldate"].str.split("/", expand = True)
#userdata['Month-Year']=userdata['year'].astype(str)+'-'+userdata['month'].astype(str)
item_unique_name = userdata['itemname'].unique()
#df=pd.read_csv("C:\\Users\\Bizgaze\\2021_2022.csv")
# Group the DataFrame by the 'item' column
grouped = userdata.groupby('itemname')
# Print the unique items in the 'item' column
#print(grouped.groups.keys())
# Iterate over the unique items and print the group data
for item, userdata in grouped:
print("itemname: ", item)
item_id = userdata.iloc[-1]['itemid']
print(item_id)
userdata= userdata.groupby('journaldate').sum()
userdata= userdata.reset_index()
#print(userdata)
fulldata=userdata[['journaldate','sum']]
fulldata.columns = ["Dates","SALES"]
#************************************************************************************************************************
## Use Techniques Differencing
import pandas as pd
from pandas import DataFrame
# userdata=pd.read_csv(r"C:\Users\Bizgaze\ipynb files\TS forcasting\working\139470.csv")
userdata.columns = ['Date', 'sales','sku']
from statsmodels.tsa.stattools import adfuller
DATE=[]
SALES=[]
def adf_test(series,userdata):
result=adfuller(series)
print('ADF Statistics: {}'.format(result[0]))
print('p- value: {}'.format(result[1]))
if result[1] <= 0.05:
print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
else:
#print(userdata)
print(stationary_test(userdata))
print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
#********************************************* stationary or non-stationary **********************************************************
def stationary_test(userdata):
data=pd.DataFrame(userdata)
for i in range(1,13):
print(i)
sales_data=DataFrame()
data['sales']=data['sales'].shift(i)
data.dropna(inplace=True)
#print( userdata['sales'])
try:
X=adf_test(data["sales"],userdata="nothing")
if "non-stationary" in str(X):
print("non-stationary")
else:
print("stationary")
#print(userdata[["Date","sales"]])
#df_sale=pd.DataFrame(userdata)
DATE.append(data["Date"])
SALES.append(data["sales"])
#df4 = pd.concat([data, sales_data], axis=1)
return "done"
break
except ValueError:
pass
try:
adf_test(userdata["sales"],userdata)
except ValueError:
pass
sales=pd.DataFrame(SALES).T
dates=pd.DataFrame(DATE).T
try:
df4 = pd.concat([dates["Date"],sales["sales"]], axis=1)
df4=df4.dropna()
print(df4)
except KeyError:
df4=userdata[['Date','sales']]
df4=df4.dropna()
print(df4)
pass
#####################################################################################################################
userdata=df4
a = userdata.iloc[-1]['Date']
userdata['Date'] = pd.to_datetime(userdata['Date'])
userdata["Date"] = userdata["Date"].astype(str)
userdata[["year", "month", "day"]] = userdata["Date"].str.split("-", expand = True)
#userdata[["year", "month"]] = userdata["Month"].str.split("-", expand=True)
#userdata = userdata[["year","month",'sum']]
userdata["year"] = userdata["year"].astype(int)
userdata["month"] = userdata["month"].astype(int)
userdata["day"] = userdata["day"].astype(int)
#####################################################################################################################
list_dates=[]
import datetime
days=int(Num)+1
import pandas as pd
base_date=pd.to_datetime(a)
for x in range(1,days):
dates=(base_date + datetime.timedelta(days=x))
dates=str(dates).replace(" 00:00:00","")
#print(dates)
list_dates.append(dates)
fut_date = pd.DataFrame(list_dates)
fut_date.columns = ["Dates"]
future_dates=pd.DataFrame(list_dates)
future_dates.columns=["Dates"]
future_dates[["year", "month", "day"]] = future_dates["Dates"].str.split("-", expand=True)
future_dates.drop(['Dates'], axis=1, inplace=True)
future_dates["year"] = future_dates["year"].astype(int)
future_dates["month"] = future_dates["month"].astype(int)
future_dates["day"] = future_dates["day"].astype(int)
#print(future_dates)
###############################################################################
userdata['sales']=userdata["sales"].astype(float)
dependent = userdata[['year','month','day']]
independent = userdata['sales']
import numpy as np
import pandas as pd
import xgboost
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
#model = xgboost.XGBRegressor()
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=1,n_jobs=-1)
#model.fit(dependent, independent)
model.fit(dependent, independent)
#future=pd.read_csv('future_dates.csv')
future_prediction = model.predict(future_dates)
#print(future_prediction)
df=pd.DataFrame(future_prediction)
df.columns = ["SALES"]
frames = [fut_date, df]
result = pd.concat(frames,axis=1)
result['itemname'] = item
result['itemid'] =item_id
result.columns = ['Date','Predict','ItemName','ItemId']
#result['Predict']=result["Predict"].astype(int)
result['UpperLimit']=result["Predict"].mean()+result['Predict'].std()*3
result['LowerLimit']=result['Predict'].mean()-result['Predict'].std()*3
print(result)
result.to_json('forcast.json', orient="records")
# result['ItemName'] = item
# result['ItemId'] =item_id
# print(result)
# frames = [fulldata, result]
# final = pd.concat(frames)
# print('********************************************************')
# final['itemname'] = item
# final['itemid'] =item_id
# final.columns = ['Date','Predict','ItemName','ItemId']
# print(final)
# final.to_json('forcast.json', orient="records")
with open('forcast.json', 'r') as json_file:
json_load = json.load(json_file)
#url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List"
url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List'
payload = json.dumps(json_load)#.replace("]", "").replace("[", "")
print(payload)
headers = {
#'Authorization': 'stat 263162e61f084d3392f162eb7ec39b2c',#demo
'Authorization': 'stat 873f2e6f70b3483e983972f96fbf5ea4',#test
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print("##############################################################")
print(response.text)
import time
time.sleep(1)
#############################################################################################################################################################
def month(Num,df):
# #url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata'
# url='https://test.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata'
# response = urlopen(url)
# data_json = json.loads(response.read())
# headers = {
# 'Authorization':'stat 27e6b51b278d444aa0b70ed60419b04c',
# #'Authorization':'stat 873f2e6f70b3483e983972f96fbf5ea4',#qa
# 'Content-Type': 'application/json'
# }
# response = requests.request("GET", url, headers=headers, data=data_json)
# #print("##############################################################")
# a=response.text
# import pandas as pd
# df2 = pd.read_json(response.text, orient ='index')
# df2=df2.reset_index()
# df2.columns = ['key','value']
# #print(df2)
# a=df2['value'][0]
# j=json.loads(a)
# userdata = pd.DataFrame(j)
# #df1
filePath='path.csv'
if os.path.exists(filePath):
print('file exist')
os.remove('path.csv')
else:
print("file as it doesn't exists")
pass
userdata=df
itemid=userdata[['itemname','itemid']]
itemid.columns = ['ItemName', 'ItemId']
#df1=pd.read_csv(r'./upload/' + name)
#df1=df1[df1['obdate']!='01/01/0001']
userdata.columns = ['itemname','sum','journaldate','itemid']
# import pandas as pd
# userdata = pd.read_csv(r'C:\Users\Bizga\Desktop\forcast\5yearsitems.csv')
# itemid = userdata[['itemname', 'itemid']]
#userdata['journaldate'] = pd.to_datetime(userdata['journaldate'])
userdata["journaldate"] = userdata["journaldate"].astype(str)
userdata[["year", "month", "day"]] = userdata["journaldate"].str.split("-", expand = True)
userdata['Month-Year']=userdata['year'].astype(str)+'-'+userdata['month'].astype(str)
item_unique_name = userdata['itemname'].unique()
#df=pd.read_csv("C:\\Users\\Bizgaze\\2021_2022.csv")
# Group the DataFrame by the 'item' column
grouped = userdata.groupby('itemname')
# Print the unique items in the 'item' column
#print(grouped.groups.keys())
# Iterate over the unique items and print the group data
for item, userdata in grouped:
print("itemname: ", item)
item_id = userdata.iloc[-1]['itemid']
print(item_id)
userdata= userdata.groupby('Month-Year').sum()
userdata= userdata.reset_index()
#print(userdata)
fulldata=userdata[['Month-Year','sum']]
fulldata.columns = ["Dates","SALES"]
#************************************************************************************************************************
## Use Techniques Differencing
import pandas as pd
from pandas import DataFrame
# userdata=pd.read_csv(r"C:\Users\Bizgaze\ipynb files\TS forcasting\working\139470.csv")
userdata.columns = ['Month', 'sales','sku']
from statsmodels.tsa.stattools import adfuller
DATE=[]
SALES=[]
def adf_test(series,userdata):
result=adfuller(series)
print('ADF Statistics: {}'.format(result[0]))
print('p- value: {}'.format(result[1]))
if result[1] <= 0.05:
print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
else:
#print(userdata)
print(stationary_test(userdata))
print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
#********************************************* stationary or non-stationary **********************************************************
def stationary_test(userdata):
data=pd.DataFrame(userdata)
for i in range(1,13):
print(i)
sales_data=DataFrame()
data['sales']=data['sales'].shift(i)
data.dropna(inplace=True)
#print( userdata['sales'])
try:
X=adf_test(data["sales"],userdata="nothing")
if "non-stationary" in str(X):
print("non-stationary")
else:
print("stationary")
#print(userdata[["Month","sales"]])
#df_sale=pd.DataFrame(userdata)
DATE.append(data["Month"])
SALES.append(data["sales"])
#df4 = pd.concat([data, sales_data], axis=1)
return "done"
break
except ValueError:
pass
try:
adf_test(userdata["sales"],userdata)
except ValueError:
pass
sales=pd.DataFrame(SALES).T
dates=pd.DataFrame(DATE).T
try:
df4 = pd.concat([dates["Month"],sales["sales"]], axis=1)
df4=df4.dropna()
print(df4)
except KeyError:
df4=userdata[['Month','sales']]
df4=df4.dropna()
print(df4)
pass
#####################################################################################################################
userdata=df4
a = userdata.iloc[-1]['Month']
userdata[["year", "month"]] = userdata["Month"].str.split("-", expand=True)
#userdata = userdata[["year","month",'sum']]
userdata["year"] = userdata["year"].astype(int)
userdata["month"] = userdata["month"].astype(int)
#####################################################################################################################
#a = userdata.iloc[-1]['Month-Year']
from datetime import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
months_value = int(Num)+1
base_month = pd.to_datetime(a)
list_months = []
def months(MD):
date_after_month = ((base_month + relativedelta(months=MD)).strftime('%Y-%m'))
# print
list_months.append(date_after_month)
for i in range(1, months_value):
months(i)
future_dates = pd.DataFrame(list_months)
future_dates.columns = ["Dates"]
fut_date = pd.DataFrame(list_months)
fut_date.columns = ["Dates"]
future_dates[["year", "month"]] = future_dates["Dates"].str.split("-", expand=True)
future_dates.drop(['Dates'], axis=1, inplace=True)
future_dates["year"] = future_dates["year"].astype(int)
future_dates["month"] = future_dates["month"].astype(int)
###############################################################################
userdata['sales']=userdata["sales"].astype(float)
dependent = userdata[['year','month']]
independent = userdata['sales']
import numpy as np
import pandas as pd
import xgboost
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
#model = xgboost.XGBRegressor()
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=1,n_jobs=-1)
model.fit(dependent, independent)
#future=pd.read_csv('future_dates.csv')
future_prediction = model.predict(future_dates)
#print(future_prediction)
df=pd.DataFrame(future_prediction)
df.columns = ["SALES"]
frames = [fut_date, df]
result = pd.concat(frames,axis=1)
result['itemname'] = item
result['itemid'] =item_id
result.columns = ['Date','Predict','ItemName','ItemId']
#result['Predict']=result["Predict"].astype(int)
result['UpperLimit']=result["Predict"].mean()+result['Predict'].std()*3
result['LowerLimit']=result['Predict'].mean()-result['Predict'].std()*3
result["LowerLimit"][result["LowerLimit"] < 0] = 0
print(result)
filePath='path.csv'
if os.path.exists(filePath):
print('file exist')
#userdata = pd.DataFrame(data)
result.to_csv('path.csv', mode='a',index=False, header=False)
else:
print("file as it doesn't exists")
#result = pd.DataFrame(data)
result.to_csv('path.csv', index=False)
result=pd.read_csv('path.csv')
result.to_json('forcast.json', orient="records")
import json
# open the JSON file and read its contents
with open(r'forcast.json', 'r') as f:
json_data = json.load(f)
# print the JSON data
#print(json_data)
#output={"response":"success","result":json_data}
#print(output)
return json_data
# frames = [fulldata, result]
# final = pd.concat(frames)
# print('********************************************************')
# final['itemname'] = item
# final['itemid'] =item_id
# final.columns = ['Date','Predict','ItemName','ItemId']
# final['upper_limit']=final["Predict"]+final['Predict']*0.2
# # final['lower_limit']=final['Predict']-final['Predict']*0.2
# # print(final)
# # final.to_json('forcast.json', orient="records")
# with open('forcast.json', 'r') as json_file:
# json_load = json.load(json_file)
# #url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List"
# url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List'
# payload = json.dumps(json_load)#.replace("]", "").replace("[", "")
# print(payload)
# headers = {
# #'Authorization': 'stat 263162e61f084d3392f162eb7ec39b2c',#demo
# 'Authorization': 'stat 873f2e6f70b3483e983972f96fbf5ea4',#test
# 'Content-Type': 'application/json'
# }
# response = requests.request("POST", url, headers=headers, data=payload)
# print("##############################################################")
# print(response.text)
# import time
# time.sleep(1)
###############################################################################################################################################################
#####################################################################################################################
@app.route("/sales_forcast", methods=["GET", "POST"])
def sales_forcast():
#wise= request.args.get('wise').replace('{','').replace('}','')
#Num= request.args.get('value').replace('{','').replace('}','')
#print(wise)
#print(Num)
Dataset = request.get_json()
#print(Dataset)
import pandas as pd
df=pd.DataFrame(Dataset)
print(df)
# a = Dataset
#x = a['wise']
# cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\XGdaywise.py"
# os.system(cmd)
#split=wise
wise='month'
Num=5
if wise=='days':
print('daywise groupby')
day(Num,df)
# cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\XGdaywise.py"+" "+ Num
# os.system(cmd)
else:
print('monthwise groupby')
output=month(Num,df)
payload = json.dumps(output)
payload_list="["+payload+"]"
#payload_list.append(payload)
# print(payload)
# cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\xgmonthwise.py"+" "+ Num
# os.system(cmd)
# import json
# a={"status":"success"}
# payload123 = json.dumps(a)
return output
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5003)