説明なし
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

invoice.multiprocessing.py 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. afrom flask import Flask, render_template, send_file, request, redirect, Response
  2. import spacy
  3. import os
  4. import shutil
  5. import pytesseract
  6. import requests
  7. import time
  8. import multiprocessing
  9. from PIL import Image
  10. from functools import partial
  11. app = Flask(__name__)
  12. nlp_model1 = spacy.load("p")
  13. app.config["IMAGE_UPLOADS"] = "/home/ubuntu/AI/InvoiceParser/upload_invoice"
  14. @app.route("/", methods=["GET"])
  15. def home():
  16. return render_template("invoice.html")
  17. # @app.route("/upload_invoice", methods=["GET", "POST"])
  18. def predict(url_list):
  19. # if request.method == "POST":
  20. # if request.files:
  21. # image = request.files["image"]
  22. # try:
  23. # image.save(os.path.join(app.config["IMAGE_UPLOADS"], image.filename))
  24. # except IsADirectoryError:
  25. # return render_template("invoice.html")
  26. # # image.save(os.path.join(
  27. # # app1.config["IMAGE_UPLOADS"], image.filename))
  28. # print("Image saved in Invoice")
  29. # return redirect(request.url)
  30. Dataset = request.get_json()
  31. # print(Dataset)
  32. a = url_list
  33. x = a['FileData']
  34. # print(x)
  35. y = a['FileName']
  36. z = a['FileType']
  37. name = y + '.' + z
  38. print(name)
  39. # print(y)
  40. # image = y.split("/")
  41. # filename=image[-1]
  42. # print(x)
  43. img_data = x.encode()
  44. import base64
  45. with open('/home/ubuntu/AI/InvoiceParser/upload_invoice/' + name, "wb") as fh:
  46. fh.write(base64.decodebytes(img_data))
  47. # Renaming file name
  48. # os.chdir("/home/ubuntu/AI/InvoiceParser/upload_invoice/")
  49. # print(os.getcwd())
  50. #
  51. # for count, f in enumerate(os.listdir()):
  52. # f_name, f_ext = os.path.splitext(f)
  53. # f_name = "" + str(count)
  54. #
  55. # new_name = f"{f_name}{f_ext}"
  56. # os.rename(f, new_name)
  57. import glob
  58. ts = 0
  59. for file_name in glob.glob("/home/ubuntu/AI/InvoiceParser/upload_invoice/*"):
  60. fts = os.path.getmtime(file_name)
  61. if fts > ts:
  62. ts = fts
  63. found = file_name
  64. print(found)
  65. s = "/home/ubuntu/AI/InvoiceParser/upload_invoice"
  66. s = os.listdir(s)
  67. for file in s:
  68. if file.endswith(".jpg"):
  69. fname = (found)
  70. elif file.endswith(".png"):
  71. fname = (found)
  72. elif file.endswith(".pdf"):
  73. fname = (found)
  74. elif file.endswith(".jpeg"):
  75. fname = (found)
  76. elif file.endswith(".JPEG"):
  77. fname = (found)
  78. def img_to_pdf(): # png to editable pdf conversion
  79. pdf = pytesseract.image_to_pdf_or_hocr(fname, extension="pdf")
  80. with open(
  81. "/home/ubuntu/AI/InvoiceParser/upload_invoice/demo.pdf",
  82. "w+b",
  83. ) as f:
  84. f.write(pdf)
  85. if fname.endswith(".pdf"):
  86. print()
  87. else:
  88. img_to_pdf()
  89. fname = "/home/ubuntu/AI/InvoiceParser/upload_invoice/demo.pdf"
  90. sourcepath = "/home/ubuntu/AI/InvoiceParser/upload_invoice"
  91. sourcefiles = os.listdir(sourcepath)
  92. destinationpath = "/home/ubuntu/AI/InvoiceParser/uploads"
  93. for file in sourcefiles:
  94. if file.endswith(".pdf"):
  95. shutil.copy2(
  96. os.path.join(sourcepath, file), os.path.join(destinationpath, file)
  97. )
  98. os.chdir("/home/ubuntu/AI/InvoiceParser/uploads")
  99. print(os.getcwd())
  100. print("file name conerted to o.pdf")
  101. for count, f in enumerate(os.listdir()):
  102. f_name, f_ext = os.path.splitext(f)
  103. f_name = "" + str(count)
  104. new_name = f"{f_name}{f_ext}"
  105. os.rename(f, new_name)
  106. import spacy
  107. import sys
  108. import fitz
  109. fname = "/home/ubuntu/AI/InvoiceParser/uploads/0.pdf"
  110. doc = fitz.open(fname)
  111. text = ""
  112. for page in doc:
  113. text = text + str(page.get_text())
  114. fitz = " ".join(text.split("\n"))
  115. # print(fitz)
  116. import pandas as pd
  117. doc = nlp_model1(fitz)
  118. k = []
  119. l = []
  120. for ent in doc.ents:
  121. # print(f"{ent.label_.upper():{30}}- {ent.text}")
  122. k.append(ent.label_.upper())
  123. l.append(ent.text)
  124. columns = k
  125. rows = [l]
  126. data = pd.DataFrame(rows, columns=columns)
  127. df = data
  128. df = data.T
  129. df.to_csv("/home/ubuntu/AI/InvoiceParser/Invoice.csv")
  130. import pandas as pd
  131. df = pd.read_csv("/home/ubuntu/AI/InvoiceParser/Invoice.csv")
  132. # df.head()
  133. # df = df.T
  134. # new_header = df.iloc[0] # grab the first row for the header
  135. # df = df[1:] # take the data less the header row
  136. # df.columns = new_header
  137. # def df_column_uniquify(df):
  138. # df_columns = df.columns
  139. # new_columns = []
  140. # for item in df_columns:
  141. # counter = 0
  142. # newitem = item
  143. # while newitem in new_columns:
  144. # counter += 1
  145. # newitem = "{}_{}".format(item, counter)
  146. # new_columns.append(newitem)
  147. # df.columns = new_columns
  148. # return df.T
  149. # df = df_column_uniquify(df)
  150. # # df=df.T
  151. # df.to_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
  152. #df = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
  153. df.rename({df.columns[-2]: 'Key'}, axis=1, inplace=True)
  154. df.rename({df.columns[-1]: 'Values'}, axis=1, inplace=True)
  155. df['Key'] = df['Key'].str.replace('/', '')
  156. df['Key'] = df['Key'].str.replace(' ', '')
  157. df.to_csv('/home/ubuntu/AI/InvoiceParser/final.csv', index=False)
  158. import pandas as pd
  159. x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
  160. tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/finalwithcolen.csv')
  161. merge = pd.merge(x1, tp, on='Key', how='right')
  162. merge1 = merge
  163. merge = merge['Values'].str.split(":", expand=True)
  164. merge.rename({merge.columns[-1]: 'Values'}, axis=1, inplace=True)
  165. frames = [merge1['Key'], merge['Values']]
  166. result = pd.concat(frames, axis=1)
  167. x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
  168. tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/finalwithoutcolen.csv')
  169. merged = pd.merge(x1, tp, on='Key', how='right')
  170. frames = [result, merged]
  171. result1 = pd.concat(frames)
  172. result1.to_csv('/home/ubuntu/AI/InvoiceParser/final1.csv', index=False)
  173. x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/main.csv')
  174. tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final1.csv')
  175. tp['Key'] = tp['Key'].str.strip()
  176. tp['Values'] = tp['Values'].str.strip()
  177. merge = pd.merge(tp, x1, on='Key', how='right')
  178. merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', index=False)
  179. df2 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv')
  180. # Import writer class from csv module
  181. from csv import writer
  182. List=['PlantCode'," "]
  183. with open('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', 'a') as f_object:
  184. writer_object = writer(f_object)
  185. writer_object.writerow(List)
  186. f_object.close()
  187. # print(df2)
  188. df2 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv')
  189. df2 = df2.T
  190. df2.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', index=False, header=False)
  191. df1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv')
  192. df1.to_json('/home/ubuntu/AI/InvoiceParser/firstjson.json', orient="index")
  193. import pandas as pd
  194. x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
  195. tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv')
  196. x['Values'] = x['Values'].str.strip()
  197. merge = pd.merge(tp, x, on='Key', how='inner')
  198. merge = merge.groupby('Key').agg({
  199. 'Values': '/'.join,
  200. }).reset_index()
  201. z = merge['Values'].str.split('/', expand=True)
  202. frames = [merge, z]
  203. result1 = pd.concat(frames, axis=1)
  204. result1 = result1.drop(['Values'], axis=1)
  205. import pandas as pd
  206. tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv')
  207. merge = pd.merge(tp, result1, on='Key', how='inner')
  208. merge = merge.T
  209. new_header = merge.iloc[0] # grab the first row for the header
  210. merge = merge[1:] # take the data less the header row
  211. merge.columns = new_header
  212. merge = merge.to_dict('records')
  213. invoice_Item=merge
  214. # import pandas as pd
  215. # import json
  216. # dflist = []
  217. # x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv')
  218. # tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
  219. # tp['Key']=tp['Key'].str.strip()
  220. # tp['Values']=tp['Values'].str.strip()
  221. # # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
  222. # merge = pd.merge(x, tp, on='Key', how='left')
  223. # merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable1.csv', index=False)
  224. # dfPG = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable1.csv')
  225. # import numpy as np
  226. # dfPG = dfPG.replace({np.nan: None})
  227. # x2 = dfPG.iloc[:, -2].tolist()
  228. # y2 = dfPG.iloc[:, -1].tolist()
  229. # z1 = dict(zip(x2, y2))
  230. # dflist.append(z1)
  231. # # u1 = json.dumps(z1)
  232. # import pandas as pd
  233. # x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item2.csv')
  234. # tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
  235. # tp['Key']=tp['Key'].str.strip()
  236. # tp['Values']=tp['Values'].str.strip()
  237. # # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
  238. # merge = pd.merge(x, tp, on='Key', how='left')
  239. # merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable2.csv', index=False)
  240. # dfUG = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable2.csv')
  241. # import numpy as np
  242. # dfUG = dfUG.replace({np.nan: None})
  243. # x2 = dfUG.iloc[:, -2].tolist()
  244. # y2 = dfUG.iloc[:, -1].tolist()
  245. # z2 = dict(zip(x2, y2))
  246. # dflist.append(z2)
  247. # u2 = json.dumps(z2)
  248. # final = '[' + str(z1) + ',' + str(z2) + ']'
  249. # return render_template('resume.html')
  250. ############################################Document############################################################
  251. import base64
  252. empty = []
  253. name = found
  254. image = open(name, 'rb')
  255. image_read = image.read()
  256. image_64_encode = base64.b64encode(image_read)
  257. NULL = 'null'
  258. # empty.append("ByteData--" + (NULL).strip('""'))
  259. image_64_encode = image_64_encode.decode('utf-8')
  260. empty.append("FileData--" + str(image_64_encode))
  261. imagedata = name.split("/")
  262. imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
  263. imagename1 = str(imagename).split('.')
  264. imagename = str(imagename1[-2]).replace("[", "]")
  265. empty.append("FileName--" + imagename)
  266. empty.append("FilePath--" + name)
  267. imageExtension = str(imagename1[-1]).replace("[", "]")
  268. empty.append("FileType--" + imageExtension)
  269. import pandas as pd
  270. df = pd.DataFrame(empty)
  271. df = df[0].str.split("--", expand=True)
  272. data1 = pd.DataFrame(df[0])
  273. data2 = pd.DataFrame(df[1])
  274. dt = data2.set_index(data1[0])
  275. dt4 = dt.T
  276. list = []
  277. dictionary = dt4.to_dict(orient="index")
  278. a = {
  279. "FileId": 0,
  280. "FileData": "",
  281. "FileName": "",
  282. "FileType": "",
  283. "RefId": 0
  284. }
  285. list = []
  286. list.append(a)
  287. list.append(dictionary[1])
  288. import json
  289. with open('/home/ubuntu/AI/InvoiceParser/firstjson.json', 'r') as json_file:
  290. json_load = json.load(json_file)
  291. # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
  292. nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
  293. import json
  294. # JSON data:
  295. x = nothing
  296. # python object to be appended
  297. y = {"InvoiceItems":invoice_Item}
  298. y1 = {"Document": list}
  299. # parsing JSON string:
  300. z = json.loads(x)
  301. # appending the data
  302. z.update(y)
  303. z.update(y1)
  304. # the result is a JSON string:
  305. # print(json.dumps(z))
  306. # print('##########################')
  307. # print(z)
  308. # print('##########################')
  309. import requests
  310. import json
  311. # with open('visitingcard1.json', 'r') as json_file:
  312. # json_load = json.load(json_file)
  313. url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
  314. #url="https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
  315. payload1 = json.dumps(z)
  316. print('--------------------------------------------------------------------------')
  317. print(payload1)
  318. headers = {
  319. 'Authorization': 'stat 089166c35d4c4d7d941c99d6f8986834',
  320. 'Content-Type': 'application/json'
  321. }
  322. response = requests.request("POST", url, headers=headers, data=payload1)
  323. print("##############################################################")
  324. print(response.text)
  325. import glob
  326. files = glob.glob(
  327. "/home/ubuntu/AI/InvoiceParser/upload_invoice/*"
  328. )
  329. for f in files:
  330. os.remove(f)
  331. files = glob.glob(
  332. "/home/ubuntu/AI/InvoiceParser/uploads/*"
  333. )
  334. for f in files:
  335. os.remove(f)
  336. return payload1
  337. @app.route("/Download_invoice")
  338. def Download_invoice():
  339. pass
  340. @app.route("/Table")
  341. def Table():
  342. pass
  343. @app.route('/upload_invoice', methods=["POST"])
  344. def upload_invoice():
  345. if __name__ == "__main__":
  346. url_list = []
  347. Dataset = request.get_json()
  348. # id = "100013660000125"
  349. url_list.append(Dataset)
  350. # multiprocessing
  351. with multiprocessing.Pool(processes=30) as pool:
  352. results = pool.map(predict, url_list)
  353. pool.close()
  354. return results[0]
  355. if __name__ == "__main__":
  356. app.run(host='0.0.0.0', port=9797, debug=True)