From 4e149a3f57381e927fbe2eeeeec8656a48b5b0f5 Mon Sep 17 00:00:00 2001 From: SadhulaSaiKumar Date: Wed, 12 Apr 2023 10:47:05 +0000 Subject: [PATCH] Upload files to 'Demand_forcasting' --- Demand_forcasting/forcasting.py | 610 ++++++++++++++++++++++++++++++++ 1 file changed, 610 insertions(+) create mode 100644 Demand_forcasting/forcasting.py diff --git a/Demand_forcasting/forcasting.py b/Demand_forcasting/forcasting.py new file mode 100644 index 0000000..5b28a45 --- /dev/null +++ b/Demand_forcasting/forcasting.py @@ -0,0 +1,610 @@ + + + +from flask import Flask, render_template, send_file, request, redirect, Response +import os + + +import pandas as pd +import warnings +import json +import requests +from urllib.request import urlopen +warnings.filterwarnings("ignore") + + + + + +app = Flask(__name__) + + +@app.route("/", methods=["GET"]) +def home(): + return 'forcasting app running' + +###################################################################################################################### +list_output=[] + +def day(Num,df): + + + #url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata' + url='https://test.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata' + response = urlopen(url) + data_json = json.loads(response.read()) + headers = { + 'Authorization':'stat 27e6b51b278d444aa0b70ed60419b04c', + #'Authorization':'stat 873f2e6f70b3483e983972f96fbf5ea4',#qa + 'Content-Type': 'application/json' + } + response = requests.request("GET", url, headers=headers, data=data_json) + #print("##############################################################") + a=response.text + # print(response.text) + + import pandas as pd + + df2 = pd.read_json(response.text, orient ='index') + df2=df2.reset_index() + df2.columns = ['key','value'] + #print(df2) + a=df2['value'][0] + + j=json.loads(a) + userdata = pd.DataFrame(j) + #df1 + itemid=userdata[['itemname','itemid']] + itemid.columns = ['ItemName', 'ItemId'] + + #df1=pd.read_csv(r'./upload/' + name) + #df1=df1[df1['obdate']!='01/01/0001'] + userdata.columns = ['journaldate','sum','itemname','itemid'] + + + # import pandas as pd + # userdata = pd.read_csv(r'C:\Users\Bizga\Desktop\forcast\5yearsitems.csv') + # itemid = userdata[['itemname', 'itemid']] + #userdata['journaldate'] = pd.to_datetime(userdata['journaldate']) + userdata["journaldate"] = userdata["journaldate"].astype(str) + userdata[["year", "month", "day"]] = userdata["journaldate"].str.split("/", expand = True) + #userdata['Month-Year']=userdata['year'].astype(str)+'-'+userdata['month'].astype(str) + item_unique_name = userdata['itemname'].unique() + + #df=pd.read_csv("C:\\Users\\Bizgaze\\2021_2022.csv") + # Group the DataFrame by the 'item' column + grouped = userdata.groupby('itemname') + + # Print the unique items in the 'item' column + #print(grouped.groups.keys()) + + # Iterate over the unique items and print the group data + for item, userdata in grouped: + print("itemname: ", item) + + item_id = userdata.iloc[-1]['itemid'] + + print(item_id) + + userdata= userdata.groupby('journaldate').sum() + userdata= userdata.reset_index() + #print(userdata) + fulldata=userdata[['journaldate','sum']] + fulldata.columns = ["Dates","SALES"] + #************************************************************************************************************************ + + ## Use Techniques Differencing + import pandas as pd + from pandas import DataFrame + + # userdata=pd.read_csv(r"C:\Users\Bizgaze\ipynb files\TS forcasting\working\139470.csv") + + userdata.columns = ['Date', 'sales','sku'] + from statsmodels.tsa.stattools import adfuller + + DATE=[] + SALES=[] + + + def adf_test(series,userdata): + result=adfuller(series) + print('ADF Statistics: {}'.format(result[0])) + print('p- value: {}'.format(result[1])) + if result[1] <= 0.05: + print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary") + else: + #print(userdata) + print(stationary_test(userdata)) + print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ") + + + + + #********************************************* stationary or non-stationary ********************************************************** + def stationary_test(userdata): + data=pd.DataFrame(userdata) + for i in range(1,13): + print(i) + sales_data=DataFrame() + data['sales']=data['sales'].shift(i) + data.dropna(inplace=True) + #print( userdata['sales']) + try: + X=adf_test(data["sales"],userdata="nothing") + if "non-stationary" in str(X): + print("non-stationary") + else: + print("stationary") + #print(userdata[["Date","sales"]]) + #df_sale=pd.DataFrame(userdata) + DATE.append(data["Date"]) + SALES.append(data["sales"]) + #df4 = pd.concat([data, sales_data], axis=1) + return "done" + break + except ValueError: + pass + + try: + adf_test(userdata["sales"],userdata) + except ValueError: + pass + sales=pd.DataFrame(SALES).T + dates=pd.DataFrame(DATE).T + try: + df4 = pd.concat([dates["Date"],sales["sales"]], axis=1) + df4=df4.dropna() + print(df4) + except KeyError: + df4=userdata[['Date','sales']] + df4=df4.dropna() + print(df4) + pass + + ##################################################################################################################### + userdata=df4 + a = userdata.iloc[-1]['Date'] + userdata['Date'] = pd.to_datetime(userdata['Date']) + userdata["Date"] = userdata["Date"].astype(str) + userdata[["year", "month", "day"]] = userdata["Date"].str.split("-", expand = True) + #userdata[["year", "month"]] = userdata["Month"].str.split("-", expand=True) + #userdata = userdata[["year","month",'sum']] + userdata["year"] = userdata["year"].astype(int) + userdata["month"] = userdata["month"].astype(int) + userdata["day"] = userdata["day"].astype(int) + ##################################################################################################################### + + + list_dates=[] + import datetime + + days=int(Num)+1 + import pandas as pd + base_date=pd.to_datetime(a) + for x in range(1,days): + dates=(base_date + datetime.timedelta(days=x)) + dates=str(dates).replace(" 00:00:00","") + #print(dates) + list_dates.append(dates) + fut_date = pd.DataFrame(list_dates) + fut_date.columns = ["Dates"] + + future_dates=pd.DataFrame(list_dates) + + future_dates.columns=["Dates"] + future_dates[["year", "month", "day"]] = future_dates["Dates"].str.split("-", expand=True) + future_dates.drop(['Dates'], axis=1, inplace=True) + future_dates["year"] = future_dates["year"].astype(int) + future_dates["month"] = future_dates["month"].astype(int) + future_dates["day"] = future_dates["day"].astype(int) + #print(future_dates) + + + + ############################################################################### + userdata['sales']=userdata["sales"].astype(float) + dependent = userdata[['year','month','day']] + independent = userdata['sales'] + + import numpy as np + import pandas as pd + + import xgboost + from sklearn.model_selection import train_test_split + from sklearn.model_selection import GridSearchCV + from sklearn.metrics import roc_auc_score + + import matplotlib.pyplot as plt + + #model = xgboost.XGBRegressor() + from sklearn.ensemble import RandomForestRegressor + model = RandomForestRegressor(random_state=1,n_jobs=-1) + #model.fit(dependent, independent) + model.fit(dependent, independent) + #future=pd.read_csv('future_dates.csv') + + + future_prediction = model.predict(future_dates) + #print(future_prediction) + df=pd.DataFrame(future_prediction) + df.columns = ["SALES"] + frames = [fut_date, df] + + + result = pd.concat(frames,axis=1) + result['itemname'] = item + result['itemid'] =item_id + result.columns = ['Date','Predict','ItemName','ItemId'] + #result['Predict']=result["Predict"].astype(int) + result['UpperLimit']=result["Predict"].mean()+result['Predict'].std()*3 + result['LowerLimit']=result['Predict'].mean()-result['Predict'].std()*3 + print(result) + result.to_json('forcast.json', orient="records") + # result['ItemName'] = item + # result['ItemId'] =item_id + # print(result) + # frames = [fulldata, result] + + # final = pd.concat(frames) + # print('********************************************************') + # final['itemname'] = item + # final['itemid'] =item_id + # final.columns = ['Date','Predict','ItemName','ItemId'] + # print(final) + # final.to_json('forcast.json', orient="records") + + with open('forcast.json', 'r') as json_file: + json_load = json.load(json_file) + #url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List" + url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List' + + payload = json.dumps(json_load)#.replace("]", "").replace("[", "") + print(payload) + headers = { + #'Authorization': 'stat 263162e61f084d3392f162eb7ec39b2c',#demo + 'Authorization': 'stat 873f2e6f70b3483e983972f96fbf5ea4',#test + 'Content-Type': 'application/json' + } + response = requests.request("POST", url, headers=headers, data=payload) + print("##############################################################") + print(response.text) + + + + + import time + time.sleep(1) + + +############################################################################################################################################################# + + +def month(Num,df): + + + + # #url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata' + # url='https://test.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata' + # response = urlopen(url) + # data_json = json.loads(response.read()) + # headers = { + # 'Authorization':'stat 27e6b51b278d444aa0b70ed60419b04c', + # #'Authorization':'stat 873f2e6f70b3483e983972f96fbf5ea4',#qa + # 'Content-Type': 'application/json' + # } + # response = requests.request("GET", url, headers=headers, data=data_json) + # #print("##############################################################") + # a=response.text + + + # import pandas as pd + # df2 = pd.read_json(response.text, orient ='index') + # df2=df2.reset_index() + # df2.columns = ['key','value'] + # #print(df2) + # a=df2['value'][0] + + # j=json.loads(a) + # userdata = pd.DataFrame(j) + # #df1 + filePath='path.csv' + if os.path.exists(filePath): + print('file exist') + os.remove('path.csv') + else: + print("file as it doesn't exists") + pass + userdata=df + itemid=userdata[['itemname','itemid']] + itemid.columns = ['ItemName', 'ItemId'] + + #df1=pd.read_csv(r'./upload/' + name) + #df1=df1[df1['obdate']!='01/01/0001'] + userdata.columns = ['itemname','sum','journaldate','itemid'] + # import pandas as pd + # userdata = pd.read_csv(r'C:\Users\Bizga\Desktop\forcast\5yearsitems.csv') + # itemid = userdata[['itemname', 'itemid']] + #userdata['journaldate'] = pd.to_datetime(userdata['journaldate']) + userdata["journaldate"] = userdata["journaldate"].astype(str) + userdata[["year", "month", "day"]] = userdata["journaldate"].str.split("-", expand = True) + userdata['Month-Year']=userdata['year'].astype(str)+'-'+userdata['month'].astype(str) + item_unique_name = userdata['itemname'].unique() + + #df=pd.read_csv("C:\\Users\\Bizgaze\\2021_2022.csv") + # Group the DataFrame by the 'item' column + grouped = userdata.groupby('itemname') + + # Print the unique items in the 'item' column + #print(grouped.groups.keys()) + + # Iterate over the unique items and print the group data + for item, userdata in grouped: + print("itemname: ", item) + + item_id = userdata.iloc[-1]['itemid'] + + print(item_id) + + userdata= userdata.groupby('Month-Year').sum() + userdata= userdata.reset_index() + #print(userdata) + fulldata=userdata[['Month-Year','sum']] + fulldata.columns = ["Dates","SALES"] + #************************************************************************************************************************ + + ## Use Techniques Differencing + import pandas as pd + from pandas import DataFrame + + # userdata=pd.read_csv(r"C:\Users\Bizgaze\ipynb files\TS forcasting\working\139470.csv") + + userdata.columns = ['Month', 'sales','sku'] + from statsmodels.tsa.stattools import adfuller + + DATE=[] + SALES=[] + + + def adf_test(series,userdata): + result=adfuller(series) + print('ADF Statistics: {}'.format(result[0])) + print('p- value: {}'.format(result[1])) + if result[1] <= 0.05: + print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary") + else: + #print(userdata) + print(stationary_test(userdata)) + print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ") + + + + + #********************************************* stationary or non-stationary ********************************************************** + def stationary_test(userdata): + data=pd.DataFrame(userdata) + for i in range(1,13): + print(i) + sales_data=DataFrame() + data['sales']=data['sales'].shift(i) + data.dropna(inplace=True) + #print( userdata['sales']) + try: + X=adf_test(data["sales"],userdata="nothing") + if "non-stationary" in str(X): + print("non-stationary") + else: + print("stationary") + #print(userdata[["Month","sales"]]) + #df_sale=pd.DataFrame(userdata) + DATE.append(data["Month"]) + SALES.append(data["sales"]) + #df4 = pd.concat([data, sales_data], axis=1) + return "done" + break + except ValueError: + pass + + try: + adf_test(userdata["sales"],userdata) + except ValueError: + pass + sales=pd.DataFrame(SALES).T + dates=pd.DataFrame(DATE).T + try: + df4 = pd.concat([dates["Month"],sales["sales"]], axis=1) + df4=df4.dropna() + print(df4) + except KeyError: + df4=userdata[['Month','sales']] + df4=df4.dropna() + print(df4) + pass + + ##################################################################################################################### + userdata=df4 + a = userdata.iloc[-1]['Month'] + userdata[["year", "month"]] = userdata["Month"].str.split("-", expand=True) + #userdata = userdata[["year","month",'sum']] + userdata["year"] = userdata["year"].astype(int) + userdata["month"] = userdata["month"].astype(int) + ##################################################################################################################### + + #a = userdata.iloc[-1]['Month-Year'] + from datetime import datetime + from dateutil.relativedelta import relativedelta + import pandas as pd + months_value = int(Num)+1 + base_month = pd.to_datetime(a) + list_months = [] + + def months(MD): + date_after_month = ((base_month + relativedelta(months=MD)).strftime('%Y-%m')) + # print + list_months.append(date_after_month) + + for i in range(1, months_value): + months(i) + + future_dates = pd.DataFrame(list_months) + + future_dates.columns = ["Dates"] + + fut_date = pd.DataFrame(list_months) + fut_date.columns = ["Dates"] + + future_dates[["year", "month"]] = future_dates["Dates"].str.split("-", expand=True) + future_dates.drop(['Dates'], axis=1, inplace=True) + future_dates["year"] = future_dates["year"].astype(int) + future_dates["month"] = future_dates["month"].astype(int) + + + + + ############################################################################### + userdata['sales']=userdata["sales"].astype(float) + dependent = userdata[['year','month']] + independent = userdata['sales'] + + import numpy as np + import pandas as pd + + import xgboost + from sklearn.model_selection import train_test_split + from sklearn.model_selection import GridSearchCV + from sklearn.metrics import roc_auc_score + + import matplotlib.pyplot as plt + + #model = xgboost.XGBRegressor() + from sklearn.ensemble import RandomForestRegressor + model = RandomForestRegressor(random_state=1,n_jobs=-1) + model.fit(dependent, independent) + #future=pd.read_csv('future_dates.csv') + + + future_prediction = model.predict(future_dates) + #print(future_prediction) + df=pd.DataFrame(future_prediction) + df.columns = ["SALES"] + frames = [fut_date, df] + + + result = pd.concat(frames,axis=1) + result['itemname'] = item + result['itemid'] =item_id + result.columns = ['Date','Predict','ItemName','ItemId'] + #result['Predict']=result["Predict"].astype(int) + result['UpperLimit']=result["Predict"].mean()+result['Predict'].std()*3 + result['LowerLimit']=result['Predict'].mean()-result['Predict'].std()*3 + result["LowerLimit"][result["LowerLimit"] < 0] = 0 + print(result) + + filePath='path.csv' + + + if os.path.exists(filePath): + print('file exist') + #userdata = pd.DataFrame(data) + result.to_csv('path.csv', mode='a',index=False, header=False) + + else: + print("file as it doesn't exists") + #result = pd.DataFrame(data) + result.to_csv('path.csv', index=False) + + + + result=pd.read_csv('path.csv') + result.to_json('forcast.json', orient="records") + import json + + # open the JSON file and read its contents + with open(r'forcast.json', 'r') as f: + json_data = json.load(f) + + # print the JSON data + #print(json_data) + + #output={"response":"success","result":json_data} + #print(output) + return json_data + + # frames = [fulldata, result] + + # final = pd.concat(frames) + # print('********************************************************') + # final['itemname'] = item + # final['itemid'] =item_id + # final.columns = ['Date','Predict','ItemName','ItemId'] + # final['upper_limit']=final["Predict"]+final['Predict']*0.2 + # # final['lower_limit']=final['Predict']-final['Predict']*0.2 + # # print(final) + # # final.to_json('forcast.json', orient="records") + + # with open('forcast.json', 'r') as json_file: + # json_load = json.load(json_file) + # #url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List" + # url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List' + + # payload = json.dumps(json_load)#.replace("]", "").replace("[", "") + # print(payload) + # headers = { + # #'Authorization': 'stat 263162e61f084d3392f162eb7ec39b2c',#demo + # 'Authorization': 'stat 873f2e6f70b3483e983972f96fbf5ea4',#test + # 'Content-Type': 'application/json' + # } + # response = requests.request("POST", url, headers=headers, data=payload) + # print("##############################################################") + # print(response.text) + + + # import time + # time.sleep(1) + + + +############################################################################################################################################################### + +##################################################################################################################### + +@app.route("/sales_forcast", methods=["GET", "POST"]) +def sales_forcast(): + #wise= request.args.get('wise').replace('{','').replace('}','') + #Num= request.args.get('value').replace('{','').replace('}','') + #print(wise) + #print(Num) + Dataset = request.get_json() + #print(Dataset) + import pandas as pd + df=pd.DataFrame(Dataset) + print(df) + # a = Dataset + #x = a['wise'] + # cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\XGdaywise.py" + # os.system(cmd) + + #split=wise + wise='month' + Num=5 + if wise=='days': + print('daywise groupby') + day(Num,df) + # cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\XGdaywise.py"+" "+ Num + # os.system(cmd) + else: + print('monthwise groupby') + output=month(Num,df) + payload = json.dumps(output) + payload_list="["+payload+"]" + #payload_list.append(payload) + # print(payload) + + # cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\xgmonthwise.py"+" "+ Num + # os.system(cmd) + # import json + # a={"status":"success"} + # payload123 = json.dumps(a) + return output + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=5003) \ No newline at end of file