123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427 |
- afrom flask import Flask, render_template, send_file, request, redirect, Response
- import spacy
- import os
- import shutil
- import pytesseract
-
- import requests
-
- import time
- import multiprocessing
- from PIL import Image
- from functools import partial
-
- app = Flask(__name__)
- nlp_model1 = spacy.load("p")
- app.config["IMAGE_UPLOADS"] = "/home/ubuntu/AI/InvoiceParser/upload_invoice"
-
-
- @app.route("/", methods=["GET"])
- def home():
- return render_template("invoice.html")
-
-
- # @app.route("/upload_invoice", methods=["GET", "POST"])
- def predict(url_list):
- # if request.method == "POST":
-
- # if request.files:
-
- # image = request.files["image"]
- # try:
- # image.save(os.path.join(app.config["IMAGE_UPLOADS"], image.filename))
- # except IsADirectoryError:
- # return render_template("invoice.html")
- # # image.save(os.path.join(
- # # app1.config["IMAGE_UPLOADS"], image.filename))
-
- # print("Image saved in Invoice")
-
- # return redirect(request.url)
- Dataset = request.get_json()
- # print(Dataset)
- a = url_list
- x = a['FileData']
- # print(x)
- y = a['FileName']
- z = a['FileType']
- name = y + '.' + z
- print(name)
- # print(y)
- # image = y.split("/")
- # filename=image[-1]
-
- # print(x)
- img_data = x.encode()
-
- import base64
- with open('/home/ubuntu/AI/InvoiceParser/upload_invoice/' + name, "wb") as fh:
- fh.write(base64.decodebytes(img_data))
-
- # Renaming file name
- # os.chdir("/home/ubuntu/AI/InvoiceParser/upload_invoice/")
- # print(os.getcwd())
- #
- # for count, f in enumerate(os.listdir()):
- # f_name, f_ext = os.path.splitext(f)
- # f_name = "" + str(count)
- #
- # new_name = f"{f_name}{f_ext}"
- # os.rename(f, new_name)
- import glob
- ts = 0
- for file_name in glob.glob("/home/ubuntu/AI/InvoiceParser/upload_invoice/*"):
- fts = os.path.getmtime(file_name)
- if fts > ts:
- ts = fts
- found = file_name
- print(found)
-
- s = "/home/ubuntu/AI/InvoiceParser/upload_invoice"
- s = os.listdir(s)
- for file in s:
- if file.endswith(".jpg"):
- fname = (found)
- elif file.endswith(".png"):
- fname = (found)
- elif file.endswith(".pdf"):
- fname = (found)
- elif file.endswith(".jpeg"):
- fname = (found)
- elif file.endswith(".JPEG"):
- fname = (found)
-
- def img_to_pdf(): # png to editable pdf conversion
- pdf = pytesseract.image_to_pdf_or_hocr(fname, extension="pdf")
- with open(
- "/home/ubuntu/AI/InvoiceParser/upload_invoice/demo.pdf",
- "w+b",
- ) as f:
- f.write(pdf)
-
- if fname.endswith(".pdf"):
- print()
- else:
- img_to_pdf()
- fname = "/home/ubuntu/AI/InvoiceParser/upload_invoice/demo.pdf"
-
- sourcepath = "/home/ubuntu/AI/InvoiceParser/upload_invoice"
- sourcefiles = os.listdir(sourcepath)
- destinationpath = "/home/ubuntu/AI/InvoiceParser/uploads"
- for file in sourcefiles:
- if file.endswith(".pdf"):
- shutil.copy2(
- os.path.join(sourcepath, file), os.path.join(destinationpath, file)
- )
- os.chdir("/home/ubuntu/AI/InvoiceParser/uploads")
- print(os.getcwd())
- print("file name conerted to o.pdf")
- for count, f in enumerate(os.listdir()):
- f_name, f_ext = os.path.splitext(f)
- f_name = "" + str(count)
- new_name = f"{f_name}{f_ext}"
- os.rename(f, new_name)
-
- import spacy
- import sys
- import fitz
-
- fname = "/home/ubuntu/AI/InvoiceParser/uploads/0.pdf"
- doc = fitz.open(fname)
- text = ""
- for page in doc:
- text = text + str(page.get_text())
-
- fitz = " ".join(text.split("\n"))
- # print(fitz)
-
- import pandas as pd
-
- doc = nlp_model1(fitz)
- k = []
- l = []
- for ent in doc.ents:
- # print(f"{ent.label_.upper():{30}}- {ent.text}")
- k.append(ent.label_.upper())
- l.append(ent.text)
-
- columns = k
- rows = [l]
- data = pd.DataFrame(rows, columns=columns)
- df = data
-
- df = data.T
-
- df.to_csv("/home/ubuntu/AI/InvoiceParser/Invoice.csv")
-
- import pandas as pd
- df = pd.read_csv("/home/ubuntu/AI/InvoiceParser/Invoice.csv")
- # df.head()
- # df = df.T
- # new_header = df.iloc[0] # grab the first row for the header
- # df = df[1:] # take the data less the header row
- # df.columns = new_header
-
- # def df_column_uniquify(df):
- # df_columns = df.columns
- # new_columns = []
- # for item in df_columns:
- # counter = 0
- # newitem = item
- # while newitem in new_columns:
- # counter += 1
- # newitem = "{}_{}".format(item, counter)
- # new_columns.append(newitem)
- # df.columns = new_columns
- # return df.T
-
- # df = df_column_uniquify(df)
- # # df=df.T
- # df.to_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
- #df = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
- df.rename({df.columns[-2]: 'Key'}, axis=1, inplace=True)
- df.rename({df.columns[-1]: 'Values'}, axis=1, inplace=True)
- df['Key'] = df['Key'].str.replace('/', '')
- df['Key'] = df['Key'].str.replace(' ', '')
- df.to_csv('/home/ubuntu/AI/InvoiceParser/final.csv', index=False)
- import pandas as pd
- x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
- tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/finalwithcolen.csv')
- merge = pd.merge(x1, tp, on='Key', how='right')
- merge1 = merge
- merge = merge['Values'].str.split(":", expand=True)
- merge.rename({merge.columns[-1]: 'Values'}, axis=1, inplace=True)
- frames = [merge1['Key'], merge['Values']]
- result = pd.concat(frames, axis=1)
-
- x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
- tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/finalwithoutcolen.csv')
- merged = pd.merge(x1, tp, on='Key', how='right')
- frames = [result, merged]
- result1 = pd.concat(frames)
- result1.to_csv('/home/ubuntu/AI/InvoiceParser/final1.csv', index=False)
-
- x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/main.csv')
- tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final1.csv')
- tp['Key'] = tp['Key'].str.strip()
- tp['Values'] = tp['Values'].str.strip()
-
- merge = pd.merge(tp, x1, on='Key', how='right')
- merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', index=False)
- df2 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv')
-
- # Import writer class from csv module
- from csv import writer
-
-
- List=['PlantCode'," "]
- with open('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', 'a') as f_object:
- writer_object = writer(f_object)
- writer_object.writerow(List)
- f_object.close()
- # print(df2)
- df2 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv')
- df2 = df2.T
-
- df2.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', index=False, header=False)
-
- df1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv')
- df1.to_json('/home/ubuntu/AI/InvoiceParser/firstjson.json', orient="index")
- import pandas as pd
- x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
- tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv')
- x['Values'] = x['Values'].str.strip()
- merge = pd.merge(tp, x, on='Key', how='inner')
- merge = merge.groupby('Key').agg({
- 'Values': '/'.join,
- }).reset_index()
- z = merge['Values'].str.split('/', expand=True)
- frames = [merge, z]
- result1 = pd.concat(frames, axis=1)
- result1 = result1.drop(['Values'], axis=1)
- import pandas as pd
- tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv')
- merge = pd.merge(tp, result1, on='Key', how='inner')
- merge = merge.T
- new_header = merge.iloc[0] # grab the first row for the header
- merge = merge[1:] # take the data less the header row
- merge.columns = new_header
-
- merge = merge.to_dict('records')
- invoice_Item=merge
-
-
- # import pandas as pd
- # import json
- # dflist = []
- # x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv')
- # tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
- # tp['Key']=tp['Key'].str.strip()
- # tp['Values']=tp['Values'].str.strip()
- # # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
- # merge = pd.merge(x, tp, on='Key', how='left')
- # merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable1.csv', index=False)
-
- # dfPG = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable1.csv')
- # import numpy as np
- # dfPG = dfPG.replace({np.nan: None})
- # x2 = dfPG.iloc[:, -2].tolist()
- # y2 = dfPG.iloc[:, -1].tolist()
- # z1 = dict(zip(x2, y2))
- # dflist.append(z1)
- # # u1 = json.dumps(z1)
- # import pandas as pd
-
- # x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item2.csv')
- # tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv')
- # tp['Key']=tp['Key'].str.strip()
- # tp['Values']=tp['Values'].str.strip()
- # # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
- # merge = pd.merge(x, tp, on='Key', how='left')
- # merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable2.csv', index=False)
-
- # dfUG = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable2.csv')
- # import numpy as np
- # dfUG = dfUG.replace({np.nan: None})
- # x2 = dfUG.iloc[:, -2].tolist()
- # y2 = dfUG.iloc[:, -1].tolist()
- # z2 = dict(zip(x2, y2))
- # dflist.append(z2)
- # u2 = json.dumps(z2)
- # final = '[' + str(z1) + ',' + str(z2) + ']'
- # return render_template('resume.html')
- ############################################Document############################################################
-
- import base64
- empty = []
- name = found
- image = open(name, 'rb')
- image_read = image.read()
- image_64_encode = base64.b64encode(image_read)
- NULL = 'null'
- # empty.append("ByteData--" + (NULL).strip('""'))
- image_64_encode = image_64_encode.decode('utf-8')
- empty.append("FileData--" + str(image_64_encode))
- imagedata = name.split("/")
- imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
- imagename1 = str(imagename).split('.')
-
- imagename = str(imagename1[-2]).replace("[", "]")
- empty.append("FileName--" + imagename)
- empty.append("FilePath--" + name)
- imageExtension = str(imagename1[-1]).replace("[", "]")
- empty.append("FileType--" + imageExtension)
-
- import pandas as pd
- df = pd.DataFrame(empty)
- df = df[0].str.split("--", expand=True)
- data1 = pd.DataFrame(df[0])
- data2 = pd.DataFrame(df[1])
- dt = data2.set_index(data1[0])
-
- dt4 = dt.T
- list = []
- dictionary = dt4.to_dict(orient="index")
-
- a = {
- "FileId": 0,
- "FileData": "",
- "FileName": "",
- "FileType": "",
- "RefId": 0
- }
- list = []
-
- list.append(a)
- list.append(dictionary[1])
-
- import json
-
- with open('/home/ubuntu/AI/InvoiceParser/firstjson.json', 'r') as json_file:
- json_load = json.load(json_file)
-
- # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
-
- nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
- import json
-
- # JSON data:
- x = nothing
-
- # python object to be appended
- y = {"InvoiceItems":invoice_Item}
- y1 = {"Document": list}
-
- # parsing JSON string:
- z = json.loads(x)
-
- # appending the data
- z.update(y)
- z.update(y1)
-
- # the result is a JSON string:
- # print(json.dumps(z))
- # print('##########################')
- # print(z)
- # print('##########################')
- import requests
- import json
-
- # with open('visitingcard1.json', 'r') as json_file:
- # json_load = json.load(json_file)
- url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
- #url="https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
- payload1 = json.dumps(z)
- print('--------------------------------------------------------------------------')
- print(payload1)
- headers = {
- 'Authorization': 'stat 089166c35d4c4d7d941c99d6f8986834',
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload1)
- print("##############################################################")
- print(response.text)
-
- import glob
- files = glob.glob(
- "/home/ubuntu/AI/InvoiceParser/upload_invoice/*"
- )
- for f in files:
- os.remove(f)
- files = glob.glob(
- "/home/ubuntu/AI/InvoiceParser/uploads/*"
- )
- for f in files:
- os.remove(f)
-
- return payload1
-
-
- @app.route("/Download_invoice")
- def Download_invoice():
- pass
-
-
- @app.route("/Table")
- def Table():
- pass
-
-
-
- @app.route('/upload_invoice', methods=["POST"])
- def upload_invoice():
- if __name__ == "__main__":
- url_list = []
- Dataset = request.get_json()
- # id = "100013660000125"
- url_list.append(Dataset)
- # multiprocessing
- with multiprocessing.Pool(processes=30) as pool:
- results = pool.map(predict, url_list)
-
- pool.close()
- return results[0]
-
-
- if __name__ == "__main__":
- app.run(host='0.0.0.0', port=9797, debug=True)
|