Ingen beskrivning
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

forcasting2.py 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. from flask import Flask, render_template, send_file, request, redirect, Response
  2. import os
  3. import pandas as pd
  4. import warnings
  5. import json
  6. import requests
  7. from urllib.request import urlopen
  8. warnings.filterwarnings("ignore")
  9. app = Flask(__name__)
  10. @app.route("/", methods=["GET"])
  11. def home():
  12. return 'forcasting app running'
  13. ######################################################################################################################
  14. list_output=[]
  15. def day(Num,get_url,get_url_token,post_url,post_url_token):
  16. import requests
  17. url = get_url
  18. payload = {}
  19. headers = {
  20. 'Authorization': get_url_token
  21. }
  22. response = requests.request("GET", url, headers=headers, data=payload)
  23. #a=response.text
  24. # print(response.text)
  25. import pandas as pd
  26. df2 = pd.read_json(response.text, orient ='index')
  27. df2=df2.reset_index()
  28. df2.columns = ['key','value']
  29. #print(df2)
  30. a=df2['value'][0]
  31. j=json.loads(a)
  32. userdata = pd.DataFrame(j)
  33. #df1
  34. itemid=userdata[['itemname','itemid']]
  35. itemid.columns = ['ItemName', 'ItemId']
  36. #df1=pd.read_csv(r'./upload/' + name)
  37. #df1=df1[df1['obdate']!='01/01/0001']
  38. userdata.columns = ['journaldate','sum','itemid','itemname']
  39. # import pandas as pd
  40. # userdata = pd.read_csv(r'C:\Users\Bizga\Desktop\forcast\5yearsitems.csv')
  41. # itemid = userdata[['itemname', 'itemid']]
  42. #userdata['journaldate'] = pd.to_datetime(userdata['journaldate'])
  43. userdata["journaldate"] = userdata["journaldate"].astype(str)
  44. #userdata[["year", "month", "day"]] = userdata["journaldate"].str.split("/", expand = True)
  45. userdata[[ "day","month","year", ]] = userdata["journaldate"].str.split("-", expand = True)
  46. #userdata['Month-Year']=userdata['year'].astype(str)+'-'+userdata['month'].astype(str)
  47. item_unique_name = userdata['itemname'].unique()
  48. #df=pd.read_csv("C:\\Users\\Bizgaze\\2021_2022.csv")
  49. # Group the DataFrame by the 'item' column
  50. grouped = userdata.groupby('itemname')
  51. # Print the unique items in the 'item' column
  52. #print(grouped.groups.keys())
  53. # Iterate over the unique items and print the group data
  54. for item, userdata in grouped:
  55. print("itemname: ", item)
  56. item_id = userdata.iloc[-1]['itemid']
  57. print(item_id)
  58. userdata= userdata.groupby('journaldate').sum()
  59. userdata= userdata.reset_index()
  60. #print(userdata)
  61. fulldata=userdata[['journaldate','sum']]
  62. fulldata.columns = ["Dates","SALES"]
  63. #************************************************************************************************************************
  64. ## Use Techniques Differencing
  65. import pandas as pd
  66. from pandas import DataFrame
  67. # userdata=pd.read_csv(r"C:\Users\Bizgaze\ipynb files\TS forcasting\working\139470.csv")
  68. userdata=userdata[['journaldate','sum','itemid']]
  69. userdata.columns = ['Date', 'sales','sku']
  70. from statsmodels.tsa.stattools import adfuller
  71. DATE=[]
  72. SALES=[]
  73. def adf_test(series,userdata):
  74. result=adfuller(series)
  75. print('ADF Statistics: {}'.format(result[0]))
  76. print('p- value: {}'.format(result[1]))
  77. if result[1] <= 0.05:
  78. print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
  79. else:
  80. #print(userdata)
  81. print(stationary_test(userdata))
  82. print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
  83. #********************************************* stationary or non-stationary **********************************************************
  84. def stationary_test(userdata):
  85. data=pd.DataFrame(userdata)
  86. for i in range(1,13):
  87. print(i)
  88. sales_data=DataFrame()
  89. data['sales']=data['sales'].shift(i)
  90. data.dropna(inplace=True)
  91. #print( userdata['sales'])
  92. try:
  93. X=adf_test(data["sales"],userdata="nothing")
  94. if "non-stationary" in str(X):
  95. print("non-stationary")
  96. else:
  97. print("stationary")
  98. #print(userdata[["Date","sales"]])
  99. #df_sale=pd.DataFrame(userdata)
  100. DATE.append(data["Date"])
  101. SALES.append(data["sales"])
  102. #df4 = pd.concat([data, sales_data], axis=1)
  103. return "done"
  104. break
  105. except ValueError:
  106. pass
  107. try:
  108. adf_test(userdata["sales"],userdata)
  109. except ValueError:
  110. pass
  111. sales=pd.DataFrame(SALES).T
  112. dates=pd.DataFrame(DATE).T
  113. try:
  114. df4 = pd.concat([dates["Date"],sales["sales"]], axis=1)
  115. df4=df4.dropna()
  116. print(df4)
  117. except KeyError:
  118. df4=userdata[['Date','sales']]
  119. df4=df4.dropna()
  120. print(df4)
  121. pass
  122. #####################################################################################################################
  123. userdata=df4
  124. a = userdata.iloc[-1]['Date']
  125. #userdata['Date'] = pd.to_datetime(userdata['Date'])
  126. userdata["Date"] = userdata["Date"].astype(str)
  127. print('after testing')
  128. print(userdata)
  129. userdata[["year", "month", "day"]] = userdata["Date"].str.split("-", expand = True)
  130. #userdata[["year", "month"]] = userdata["Month"].str.split("-", expand=True)
  131. #userdata = userdata[["year","month",'sum']]
  132. userdata["year"] = userdata["year"].astype(int)
  133. userdata["month"] = userdata["month"].astype(int)
  134. userdata["day"] = userdata["day"].astype(int)
  135. #####################################################################################################################
  136. list_dates=[]
  137. import datetime
  138. days=int(Num)+1
  139. import pandas as pd
  140. base_date=pd.to_datetime(a)
  141. for x in range(1,days):
  142. dates=(base_date + datetime.timedelta(days=x))
  143. dates=str(dates).replace(" 00:00:00","")
  144. #print(dates)
  145. list_dates.append(dates)
  146. fut_date = pd.DataFrame(list_dates)
  147. fut_date.columns = ["Dates"]
  148. future_dates=pd.DataFrame(list_dates)
  149. future_dates.columns=["Dates"]
  150. future_dates[["year", "month", "day"]] = future_dates["Dates"].str.split("-", expand=True)
  151. future_dates.drop(['Dates'], axis=1, inplace=True)
  152. future_dates["year"] = future_dates["year"].astype(int)
  153. future_dates["month"] = future_dates["month"].astype(int)
  154. future_dates["day"] = future_dates["day"].astype(int)
  155. #print(future_dates)
  156. ###############################################################################
  157. userdata['sales']=userdata["sales"].astype(float)
  158. dependent = userdata[['year','month','day']]
  159. independent = userdata['sales']
  160. import numpy as np
  161. import pandas as pd
  162. import xgboost
  163. from sklearn.model_selection import train_test_split
  164. from sklearn.model_selection import GridSearchCV
  165. from sklearn.metrics import roc_auc_score
  166. import matplotlib.pyplot as plt
  167. #model = xgboost.XGBRegressor()
  168. from sklearn.ensemble import RandomForestRegressor
  169. model = RandomForestRegressor(random_state=1,n_jobs=-1)
  170. #model.fit(dependent, independent)
  171. model.fit(dependent, independent)
  172. #future=pd.read_csv('future_dates.csv')
  173. future_prediction = model.predict(future_dates)
  174. #print(future_prediction)
  175. df=pd.DataFrame(future_prediction)
  176. df.columns = ["SALES"]
  177. frames = [fut_date, df]
  178. result = pd.concat(frames,axis=1)
  179. result['itemname'] = item
  180. result['itemid'] =item_id
  181. result.columns = ['Date','Predict','ItemName','ItemId']
  182. #result['Predict']=result["Predict"].astype(int)
  183. result['UpperLimit']=result["Predict"].mean()+result['Predict'].std()*3
  184. result['LowerLimit']=result['Predict'].mean()-result['Predict'].std()*3
  185. print(result)
  186. result.to_json('forcast.json', orient="records")
  187. with open('forcast.json', 'r') as json_file:
  188. json_load = json.load(json_file)
  189. #url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List"
  190. url=post_url
  191. payload = json.dumps(json_load)#.replace("]", "").replace("[", "")
  192. print(payload)
  193. #print(payload)
  194. headers = {
  195. #'Authorization': 'stat 263162e61f084d3392f162eb7ec39b2c',#demo
  196. 'Authorization': post_url_token,#test
  197. 'Content-Type': 'application/json'
  198. }
  199. response = requests.request("POST", url, headers=headers, data=payload)
  200. # print("##############################################################")
  201. print(response.text)
  202. return 'done'
  203. #############################################################################################################################################################
  204. def month(Num,get_url,get_url_token,post_url,post_url_token):
  205. #url='https://qa.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/getitemdata'
  206. # url= get_url
  207. # response = urlopen(url)
  208. # data_json = json.loads(response.read())
  209. # headers = {
  210. # 'Authorization':get_url_token,
  211. # #'Authorization':'stat 873f2e6f70b3483e983972f96fbf5ea4',#qa
  212. # 'Content-Type': 'application/json'
  213. # }
  214. # response = requests.request("GET", url, headers=headers, data=data_json)
  215. # #print("##############################################################")
  216. # a=response.text
  217. # # print(response.text)
  218. import requests
  219. url = get_url
  220. payload = {}
  221. headers = {
  222. 'Authorization': get_url_token
  223. }
  224. response = requests.request("GET", url, headers=headers, data=payload)
  225. #print(response.text)
  226. import pandas as pd
  227. df2 = pd.read_json(response.text, orient ='index')
  228. df2=df2.reset_index()
  229. df2.columns = ['key','value']
  230. #print(df2)
  231. a=df2['value'][0]
  232. j=json.loads(a)
  233. userdata = pd.DataFrame(j)
  234. #filePath='path.csv'
  235. # if os.path.exists(filePath):
  236. # print('file exist')
  237. # os.remove('path.csv')
  238. # else:
  239. # print("file doesn't exists")
  240. # pass
  241. #userdata=df
  242. itemid=userdata[['itemname','itemid']]
  243. itemid.columns = ['ItemName', 'ItemId']
  244. #df1=pd.read_csv(r'./upload/' + name)
  245. #df1=df1[df1['obdate']!='01/01/0001']
  246. userdata.columns = ['journaldate','sum','itemid','itemname']
  247. # import pandas as pd
  248. # userdata = pd.read_csv(r'C:\Users\Bizga\Desktop\forcast\5yearsitems.csv')
  249. # itemid = userdata[['itemname', 'itemid']]
  250. #userdata['journaldate'] = pd.to_datetime(userdata['journaldate'])
  251. userdata["journaldate"] = userdata["journaldate"].astype(str)
  252. #userdata[["year", "month", "day"]] = userdata["journaldate"].str.split("-", expand = True)
  253. userdata[[ "day","month","year", ]] = userdata["journaldate"].str.split("-", expand = True)
  254. #userdata[["year", "day", "month"]] = userdata["journaldate"].str.split("/", expand=True)
  255. userdata['Month-Year']=userdata['year'].astype(str)+'-'+userdata['month'].astype(str)
  256. item_unique_name = userdata['itemname'].unique()
  257. #df=pd.read_csv("C:\\Users\\Bizgaze\\2021_2022.csv")
  258. # Group the DataFrame by the 'item' column
  259. grouped = userdata.groupby('itemname')
  260. # Print the unique items in the 'item' column
  261. #print(grouped.groups.keys())
  262. # Iterate over the unique items and print the group data
  263. for item, userdata in grouped:
  264. print("itemname: ", item)
  265. item_id = userdata.iloc[-1]['itemid']
  266. print(item_id)
  267. userdata= userdata.groupby('Month-Year').sum()
  268. userdata= userdata.reset_index()
  269. fulldata=userdata[['Month-Year','sum']]
  270. fulldata.columns = ["Dates","SALES"]
  271. #************************************************************************************************************************
  272. ## Use Techniques Differencing
  273. import pandas as pd
  274. from pandas import DataFrame
  275. # userdata=pd.read_csv(r"C:\Users\Bizgaze\ipynb files\TS forcasting\working\139470.csv")
  276. userdata=userdata[['Month-Year','sum','itemid']]
  277. userdata.columns = ['Month', 'sales','sku']
  278. from statsmodels.tsa.stattools import adfuller
  279. DATE=[]
  280. SALES=[]
  281. def adf_test(series,userdata):
  282. result=adfuller(series)
  283. print('ADF Statistics: {}'.format(result[0]))
  284. print('p- value: {}'.format(result[1]))
  285. if result[1] <= 0.05:
  286. print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
  287. else:
  288. #print(userdata)
  289. print(stationary_test(userdata))
  290. print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
  291. #********************************************* stationary or non-stationary **********************************************************
  292. def stationary_test(userdata):
  293. data=pd.DataFrame(userdata)
  294. for i in range(1,13):
  295. print(i)
  296. sales_data=DataFrame()
  297. data['sales']=data['sales'].shift(i)
  298. data.dropna(inplace=True)
  299. #print( userdata['sales'])
  300. try:
  301. X=adf_test(data["sales"],userdata="nothing")
  302. if "non-stationary" in str(X):
  303. print("non-stationary")
  304. else:
  305. print("stationary")
  306. #print(userdata[["Month","sales"]])
  307. #df_sale=pd.DataFrame(userdata)
  308. DATE.append(data["Month"])
  309. SALES.append(data["sales"])
  310. #df4 = pd.concat([data, sales_data], axis=1)
  311. return "done"
  312. break
  313. except ValueError:
  314. pass
  315. try:
  316. adf_test(userdata["sales"],userdata)
  317. except ValueError:
  318. pass
  319. sales=pd.DataFrame(SALES).T
  320. dates=pd.DataFrame(DATE).T
  321. try:
  322. df4 = pd.concat([dates["Month"],sales["sales"]], axis=1)
  323. df4=df4.dropna()
  324. print(df4)
  325. except KeyError:
  326. df4=userdata[['Month','sales']]
  327. df4=df4.dropna()
  328. print(df4)
  329. pass
  330. #####################################################################################################################
  331. userdata=df4
  332. a = userdata.iloc[-1]['Month']
  333. userdata[["year", "month"]] = userdata["Month"].str.split("-", expand=True)
  334. #userdata = userdata[["year","month",'sum']]
  335. userdata["year"] = userdata["year"].astype(int)
  336. userdata["month"] = userdata["month"].astype(int)
  337. #####################################################################################################################
  338. #a = userdata.iloc[-1]['Month-Year']
  339. from datetime import datetime
  340. from dateutil.relativedelta import relativedelta
  341. import pandas as pd
  342. months_value = int(Num)+1
  343. base_month = pd.to_datetime(a)
  344. list_months = []
  345. def months(MD):
  346. date_after_month = ((base_month + relativedelta(months=MD)).strftime('%Y-%m'))
  347. # print
  348. list_months.append(date_after_month)
  349. for i in range(1, months_value):
  350. months(i)
  351. future_dates = pd.DataFrame(list_months)
  352. future_dates.columns = ["Dates"]
  353. fut_date = pd.DataFrame(list_months)
  354. fut_date.columns = ["Dates"]
  355. future_dates[["year", "month"]] = future_dates["Dates"].str.split("-", expand=True)
  356. future_dates.drop(['Dates'], axis=1, inplace=True)
  357. future_dates["year"] = future_dates["year"].astype(int)
  358. future_dates["month"] = future_dates["month"].astype(int)
  359. ###############################################################################
  360. userdata['sales']=userdata["sales"].astype(float)
  361. dependent = userdata[['year','month']]
  362. independent = userdata['sales']
  363. import numpy as np
  364. import pandas as pd
  365. import xgboost
  366. from sklearn.model_selection import train_test_split
  367. from sklearn.model_selection import GridSearchCV
  368. from sklearn.metrics import roc_auc_score
  369. import matplotlib.pyplot as plt
  370. #model = xgboost.XGBRegressor()
  371. from sklearn.ensemble import RandomForestRegressor
  372. model = RandomForestRegressor(random_state=1,n_jobs=-1)
  373. model.fit(dependent, independent)
  374. #future=pd.read_csv('future_dates.csv')
  375. future_prediction = model.predict(future_dates)
  376. #print(future_prediction)
  377. df=pd.DataFrame(future_prediction)
  378. df.columns = ["SALES"]
  379. frames = [fut_date, df]
  380. result = pd.concat(frames,axis=1)
  381. result['itemname'] = item
  382. result['itemid'] =item_id
  383. result.columns = ['Date','Predict','ItemName','ItemId']
  384. #result['Predict']=result["Predict"].astype(int)
  385. result['UpperLimit']=result["Predict"].mean()+result['Predict'].std()*3
  386. result['LowerLimit']=result['Predict'].mean()-result['Predict'].std()*3
  387. result["LowerLimit"][result["LowerLimit"] < 0] = 0
  388. print(result)
  389. result.to_json('forcast.json', orient="records")
  390. with open('forcast.json', 'r') as json_file:
  391. json_load = json.load(json_file)
  392. #url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/demandforecast/saveforecast/List"
  393. url=post_url
  394. payload = json.dumps(json_load)#.replace("]", "").replace("[", "")
  395. print(payload)
  396. #print(payload)
  397. headers = {
  398. #'Authorization': 'stat 263162e61f084d3392f162eb7ec39b2c',#demo
  399. 'Authorization': post_url_token,#test
  400. 'Content-Type': 'application/json'
  401. }
  402. response = requests.request("POST", url, headers=headers, data=payload)
  403. # print("##############################################################")
  404. print(response.text)
  405. #output={"response":"success","result":json_data}
  406. #print(output)
  407. return 'done'
  408. ###############################################################################################################################################################
  409. #####################################################################################################################
  410. @app.route("/sales_forcast", methods=["GET", "POST"])
  411. def sales_forcast():
  412. #wise= request.args.get('wise').replace('{','').replace('}','')
  413. #Num= request.args.get('value').replace('{','').replace('}','')
  414. #print(wise)
  415. #print(Num)
  416. Dataset = request.get_json()
  417. a = Dataset
  418. wise = a['wise']
  419. # print(x)
  420. Num = a['future_dates']
  421. get_url = a['get_url']
  422. get_url_token = a['get_url_token']
  423. post_url = a['post_url']
  424. post_url_token = a['post_url_token']
  425. #print(Dataset)
  426. # import pandas as pd
  427. # df=pd.DataFrame(Dataset)
  428. # print(df)
  429. # a = Dataset
  430. #x = a['wise']
  431. # cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\XGdaywise.py"
  432. # os.system(cmd)
  433. #split=wise
  434. # wise='month'
  435. # Num=5
  436. if wise=='days':
  437. print('daywise groupby')
  438. output=day(Num,get_url,get_url_token,post_url,post_url_token)
  439. # cmd = "python C:\\Users\\Bizga\\Desktop\\forcast\\XGdaywise.py"+" "+ Num
  440. # os.system(cmd)
  441. else:
  442. print('monthwise groupby')
  443. output=month(Num,get_url,get_url_token,post_url,post_url_token)
  444. # payload = json.dumps(output)
  445. # payload_list="["+payload+"]"
  446. return output
  447. if __name__ == "__main__":
  448. app.run(host='0.0.0.0', port=8082)