afrom flask import Flask, render_template, send_file, request, redirect, Response import spacy import os import shutil import pytesseract import requests import time import multiprocessing from PIL import Image from functools import partial app = Flask(__name__) nlp_model1 = spacy.load("p") app.config["IMAGE_UPLOADS"] = "/home/ubuntu/AI/InvoiceParser/upload_invoice" @app.route("/", methods=["GET"]) def home(): return render_template("invoice.html") # @app.route("/upload_invoice", methods=["GET", "POST"]) def predict(url_list): # if request.method == "POST": # if request.files: # image = request.files["image"] # try: # image.save(os.path.join(app.config["IMAGE_UPLOADS"], image.filename)) # except IsADirectoryError: # return render_template("invoice.html") # # image.save(os.path.join( # # app1.config["IMAGE_UPLOADS"], image.filename)) # print("Image saved in Invoice") # return redirect(request.url) Dataset = request.get_json() # print(Dataset) a = url_list x = a['FileData'] # print(x) y = a['FileName'] z = a['FileType'] name = y + '.' + z print(name) # print(y) # image = y.split("/") # filename=image[-1] # print(x) img_data = x.encode() import base64 with open('/home/ubuntu/AI/InvoiceParser/upload_invoice/' + name, "wb") as fh: fh.write(base64.decodebytes(img_data)) # Renaming file name # os.chdir("/home/ubuntu/AI/InvoiceParser/upload_invoice/") # print(os.getcwd()) # # for count, f in enumerate(os.listdir()): # f_name, f_ext = os.path.splitext(f) # f_name = "" + str(count) # # new_name = f"{f_name}{f_ext}" # os.rename(f, new_name) import glob ts = 0 for file_name in glob.glob("/home/ubuntu/AI/InvoiceParser/upload_invoice/*"): fts = os.path.getmtime(file_name) if fts > ts: ts = fts found = file_name print(found) s = "/home/ubuntu/AI/InvoiceParser/upload_invoice" s = os.listdir(s) for file in s: if file.endswith(".jpg"): fname = (found) elif file.endswith(".png"): fname = (found) elif file.endswith(".pdf"): fname = (found) elif file.endswith(".jpeg"): fname = (found) elif file.endswith(".JPEG"): fname = (found) def img_to_pdf(): # png to editable pdf conversion pdf = pytesseract.image_to_pdf_or_hocr(fname, extension="pdf") with open( "/home/ubuntu/AI/InvoiceParser/upload_invoice/demo.pdf", "w+b", ) as f: f.write(pdf) if fname.endswith(".pdf"): print() else: img_to_pdf() fname = "/home/ubuntu/AI/InvoiceParser/upload_invoice/demo.pdf" sourcepath = "/home/ubuntu/AI/InvoiceParser/upload_invoice" sourcefiles = os.listdir(sourcepath) destinationpath = "/home/ubuntu/AI/InvoiceParser/uploads" for file in sourcefiles: if file.endswith(".pdf"): shutil.copy2( os.path.join(sourcepath, file), os.path.join(destinationpath, file) ) os.chdir("/home/ubuntu/AI/InvoiceParser/uploads") print(os.getcwd()) print("file name conerted to o.pdf") for count, f in enumerate(os.listdir()): f_name, f_ext = os.path.splitext(f) f_name = "" + str(count) new_name = f"{f_name}{f_ext}" os.rename(f, new_name) import spacy import sys import fitz fname = "/home/ubuntu/AI/InvoiceParser/uploads/0.pdf" doc = fitz.open(fname) text = "" for page in doc: text = text + str(page.get_text()) fitz = " ".join(text.split("\n")) # print(fitz) import pandas as pd doc = nlp_model1(fitz) k = [] l = [] for ent in doc.ents: # print(f"{ent.label_.upper():{30}}- {ent.text}") k.append(ent.label_.upper()) l.append(ent.text) columns = k rows = [l] data = pd.DataFrame(rows, columns=columns) df = data df = data.T df.to_csv("/home/ubuntu/AI/InvoiceParser/Invoice.csv") import pandas as pd df = pd.read_csv("/home/ubuntu/AI/InvoiceParser/Invoice.csv") # df.head() # df = df.T # new_header = df.iloc[0] # grab the first row for the header # df = df[1:] # take the data less the header row # df.columns = new_header # def df_column_uniquify(df): # df_columns = df.columns # new_columns = [] # for item in df_columns: # counter = 0 # newitem = item # while newitem in new_columns: # counter += 1 # newitem = "{}_{}".format(item, counter) # new_columns.append(newitem) # df.columns = new_columns # return df.T # df = df_column_uniquify(df) # # df=df.T # df.to_csv('/home/ubuntu/AI/InvoiceParser/final.csv') #df = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv') df.rename({df.columns[-2]: 'Key'}, axis=1, inplace=True) df.rename({df.columns[-1]: 'Values'}, axis=1, inplace=True) df['Key'] = df['Key'].str.replace('/', '') df['Key'] = df['Key'].str.replace(' ', '') df.to_csv('/home/ubuntu/AI/InvoiceParser/final.csv', index=False) import pandas as pd x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv') tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/finalwithcolen.csv') merge = pd.merge(x1, tp, on='Key', how='right') merge1 = merge merge = merge['Values'].str.split(":", expand=True) merge.rename({merge.columns[-1]: 'Values'}, axis=1, inplace=True) frames = [merge1['Key'], merge['Values']] result = pd.concat(frames, axis=1) x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv') tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/finalwithoutcolen.csv') merged = pd.merge(x1, tp, on='Key', how='right') frames = [result, merged] result1 = pd.concat(frames) result1.to_csv('/home/ubuntu/AI/InvoiceParser/final1.csv', index=False) x1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/main.csv') tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final1.csv') tp['Key'] = tp['Key'].str.strip() tp['Values'] = tp['Values'].str.strip() merge = pd.merge(tp, x1, on='Key', how='right') merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', index=False) df2 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv') # Import writer class from csv module from csv import writer List=['PlantCode'," "] with open('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', 'a') as f_object: writer_object = writer(f_object) writer_object.writerow(List) f_object.close() # print(df2) df2 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv') df2 = df2.T df2.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv', index=False, header=False) df1 = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithouttable.csv') df1.to_json('/home/ubuntu/AI/InvoiceParser/firstjson.json', orient="index") import pandas as pd x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv') tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv') x['Values'] = x['Values'].str.strip() merge = pd.merge(tp, x, on='Key', how='inner') merge = merge.groupby('Key').agg({ 'Values': '/'.join, }).reset_index() z = merge['Values'].str.split('/', expand=True) frames = [merge, z] result1 = pd.concat(frames, axis=1) result1 = result1.drop(['Values'], axis=1) import pandas as pd tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv') merge = pd.merge(tp, result1, on='Key', how='inner') merge = merge.T new_header = merge.iloc[0] # grab the first row for the header merge = merge[1:] # take the data less the header row merge.columns = new_header merge = merge.to_dict('records') invoice_Item=merge # import pandas as pd # import json # dflist = [] # x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item1.csv') # tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv') # tp['Key']=tp['Key'].str.strip() # tp['Values']=tp['Values'].str.strip() # # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')] # merge = pd.merge(x, tp, on='Key', how='left') # merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable1.csv', index=False) # dfPG = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable1.csv') # import numpy as np # dfPG = dfPG.replace({np.nan: None}) # x2 = dfPG.iloc[:, -2].tolist() # y2 = dfPG.iloc[:, -1].tolist() # z1 = dict(zip(x2, y2)) # dflist.append(z1) # # u1 = json.dumps(z1) # import pandas as pd # x = pd.read_csv('/home/ubuntu/AI/InvoiceParser/item2.csv') # tp = pd.read_csv('/home/ubuntu/AI/InvoiceParser/final.csv') # tp['Key']=tp['Key'].str.strip() # tp['Values']=tp['Values'].str.strip() # # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')] # merge = pd.merge(x, tp, on='Key', how='left') # merge.to_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable2.csv', index=False) # dfUG = pd.read_csv('/home/ubuntu/AI/InvoiceParser/invoicewithtable2.csv') # import numpy as np # dfUG = dfUG.replace({np.nan: None}) # x2 = dfUG.iloc[:, -2].tolist() # y2 = dfUG.iloc[:, -1].tolist() # z2 = dict(zip(x2, y2)) # dflist.append(z2) # u2 = json.dumps(z2) # final = '[' + str(z1) + ',' + str(z2) + ']' # return render_template('resume.html') ############################################Document############################################################ import base64 empty = [] name = found image = open(name, 'rb') image_read = image.read() image_64_encode = base64.b64encode(image_read) NULL = 'null' # empty.append("ByteData--" + (NULL).strip('""')) image_64_encode = image_64_encode.decode('utf-8') empty.append("FileData--" + str(image_64_encode)) imagedata = name.split("/") imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "") imagename1 = str(imagename).split('.') imagename = str(imagename1[-2]).replace("[", "]") empty.append("FileName--" + imagename) empty.append("FilePath--" + name) imageExtension = str(imagename1[-1]).replace("[", "]") empty.append("FileType--" + imageExtension) import pandas as pd df = pd.DataFrame(empty) df = df[0].str.split("--", expand=True) data1 = pd.DataFrame(df[0]) data2 = pd.DataFrame(df[1]) dt = data2.set_index(data1[0]) dt4 = dt.T list = [] dictionary = dt4.to_dict(orient="index") a = { "FileId": 0, "FileData": "", "FileName": "", "FileType": "", "RefId": 0 } list = [] list.append(a) list.append(dictionary[1]) import json with open('/home/ubuntu/AI/InvoiceParser/firstjson.json', 'r') as json_file: json_load = json.load(json_file) # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create" nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}') import json # JSON data: x = nothing # python object to be appended y = {"InvoiceItems":invoice_Item} y1 = {"Document": list} # parsing JSON string: z = json.loads(x) # appending the data z.update(y) z.update(y1) # the result is a JSON string: # print(json.dumps(z)) # print('##########################') # print(z) # print('##########################') import requests import json # with open('visitingcard1.json', 'r') as json_file: # json_load = json.load(json_file) url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice" #url="https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice" payload1 = json.dumps(z) print('--------------------------------------------------------------------------') print(payload1) headers = { 'Authorization': 'stat 089166c35d4c4d7d941c99d6f8986834', 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload1) print("##############################################################") print(response.text) import glob files = glob.glob( "/home/ubuntu/AI/InvoiceParser/upload_invoice/*" ) for f in files: os.remove(f) files = glob.glob( "/home/ubuntu/AI/InvoiceParser/uploads/*" ) for f in files: os.remove(f) return payload1 @app.route("/Download_invoice") def Download_invoice(): pass @app.route("/Table") def Table(): pass @app.route('/upload_invoice', methods=["POST"]) def upload_invoice(): if __name__ == "__main__": url_list = [] Dataset = request.get_json() # id = "100013660000125" url_list.append(Dataset) # multiprocessing with multiprocessing.Pool(processes=30) as pool: results = pool.map(predict, url_list) pool.close() return results[0] if __name__ == "__main__": app.run(host='0.0.0.0', port=9797, debug=True)