from flask import Flask, render_template, request, redirect, Response, send_file import spacy import pandas as pd import os import glob import camelot from pytesseract import * import shutil import cv2 import matplotlib from werkzeug.utils import secure_filename import requests import time import multiprocessing from PIL import Image from functools import partial app = Flask(__name__) nlp_model = spacy.load('ME') nlp_model1 = spacy.load('bdeeducation_50_0.2') # path to save image in a folder app.config["IMAGE_UPLOADS"] = "/home/ubuntu/AI/ResumeParser/resume_upload" @app.route('/', methods=['GET']) def resume(): return render_template('resume.html') #@app.route("/upload_resume", methods=["POST"]) def predict(url_list): #Dataset = request.get_json() #print(Dataset) a = url_list x = a['FileData'] #print(x) y = a['FileName'] z = a['FileType'] name=y+'.'+z print(name) #print(y) # image = y.split("/") # filename=image[-1] #print(x) img_data=x.encode() import base64 with open('/home/ubuntu/AI/ResumeParser/resume_upload/'+name, "wb") as fh: fh.write(base64.decodebytes(img_data)) # if request.method == "POST": # if request.files: # image = request.files["image"] # try: # image.save(os.path.join( # app.config["IMAGE_UPLOADS"], image.filename)) # except IsADirectoryError: # return render_template('resume.html') # # image.save(os.path.join( # # app.config["IMAGE_UPLOADS"], image.filename)) # print("Image saved") # return redirect(request.url) import glob # import os ts = 0 for file_name in glob.glob('/home/ubuntu/AI/ResumeParser/resume_upload/*'): fts = os.path.getmtime(file_name) if fts > ts: ts = fts found = file_name print(found) # os.chdir(found) # print(os.getcwd()) # for count, f in enumerate(os.listdir()): # f_name, f_ext = os.path.splitext(f) # f_name = "" + str(count) # new_name = f'{f_name}{f_ext}' # os.rename(f, new_name) f = "/home/ubuntu/AI/ResumeParser/resume_upload" f = os.listdir(f) def docx_to_txt(): import docx2txt import glob text = '' for file in glob.glob(found): c = docx2txt.process(file) c = c.rstrip("\n") toPrint = c d = ' '.join(i for i in toPrint.split()) d = d.rstrip() text += d docx_to_txt.text = text def doc_to_txt(): import docx2txt import glob text = '' for file in glob.glob(found): c = docx2txt.process(file) c = c.rstrip("\n") toPrint = c d = ' '.join(i for i in toPrint.split()) d = d.rstrip() text += d doc_to_txt.text = text def pdf_to_txt(): import sys import fitz fname = found doc = fitz.open(fname) text = "" for page in doc: text = text + str(page.get_text()) pdf_to_txt.text = " ".join(text.split('\n')) for file in f: if file.endswith('.doc'): doc_to_txt() x = doc_to_txt.text elif file.endswith('.docx'): docx_to_txt() x = docx_to_txt.text elif file.endswith('.pdf'): pdf_to_txt() x = pdf_to_txt.text doc = nlp_model(x) k = [] l = [] for ent in doc.ents: # print(f'{ent.label_.upper():{30}}- {ent.text}') k.append(ent.label_.upper()) l.append(ent.text) columns = k rows = [l] import pandas as pd data = pd.DataFrame(rows, columns=columns) df = data data = df.T data.to_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv', index=True) data = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv') data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True) data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True) data.to_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv', index=False) df2 = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv') x1 = pd.read_csv('/home/ubuntu/AI/ResumeParser/AD11.csv') tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv') # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')] merge = pd.merge( tp,x1, on='Key', how='right') merge.to_csv('/home/ubuntu/AI/ResumeParser/AD.csv', index=False) df2 = pd.read_csv('/home/ubuntu/AI/ResumeParser/AD.csv') #print(df2) df2 = df2.T df2.to_csv('/home/ubuntu/AI/ResumeParser/path.csv', index=False, header=False) df1 = pd.read_csv('/home/ubuntu/AI/ResumeParser/path.csv') df1.to_json('/home/ubuntu/AI/ResumeParser/firstjson.json', orient="index") doc = nlp_model1(x) k = [] l = [] for ent in doc.ents: # print(f'{ent.label_.upper():{30}}- {ent.text}') k.append(ent.label_.upper()) l.append(ent.text) columns = k rows = [l] data = pd.DataFrame(rows, columns=columns) df = data data = df.T data.to_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv', index=True) data = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv') data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True) data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True) data.to_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv', index=False) import pandas as pd import json dflist = [] x = pd.read_csv('/home/ubuntu/AI/ResumeParser/PG.csv') tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv') # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')] merge = pd.merge(x, tp, on='Key', how='left') import numpy as np merge = merge.replace(np.nan, '', regex=True) merge.to_csv('/home/ubuntu/AI/ResumeParser/PGmerge.csv', index=False) dfPG = pd.read_csv('/home/ubuntu/AI/ResumeParser/PGmerge.csv') import numpy as np dfPG = dfPG.replace({np.nan: None}) x2 = dfPG.iloc[:, -2].tolist() y2 = dfPG.iloc[:, -1].tolist() z1 = dict(zip(x2, y2)) dflist.append(z1) #u1 = json.dumps(z1) import pandas as pd x = pd.read_csv('/home/ubuntu/AI/ResumeParser/UG.csv') tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv') # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')] merge = pd.merge(x, tp, on='Key', how='left') import numpy as np merge = merge.replace(np.nan, '', regex=True) merge.to_csv('/home/ubuntu/AI/ResumeParser/UGmerge.csv', index=False) dfUG = pd.read_csv('/home/ubuntu/AI/ResumeParser/UGmerge.csv') import numpy as np dfUG = dfUG.replace({np.nan: None}) x2 = dfUG.iloc[:, -2].tolist() y2 = dfUG.iloc[:, -1].tolist() z2 = dict(zip(x2, y2)) dflist.append(z2) #u2 = json.dumps(z2) #final = '[' + str(z1) + ',' + str(z2) + ']' #return render_template('resume.html') ############################################################################ import pandas as pd x = pd.read_csv('/home/ubuntu/AI/ResumeParser/inter.csv') tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv') # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')] merge = pd.merge(x, tp, on='Key', how='left') import numpy as np merge = merge.replace(np.nan, '', regex=True) merge.to_csv('/home/ubuntu/AI/ResumeParser/intermerge.csv', index=False) dfinter = pd.read_csv('/home/ubuntu/AI/ResumeParser/intermerge.csv') import numpy as np dfinter= dfinter.replace({np.nan: None}) x2 = dfinter.iloc[:, -2].tolist() y2 = dfinter.iloc[:, -1].tolist() z3 = dict(zip(x2, y2)) dflist.append(z3) ############################################################################ import pandas as pd x = pd.read_csv('/home/ubuntu/AI/ResumeParser/SSC.csv') tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv') # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')] merge = pd.merge(x, tp, on='Key', how='left') import numpy as np merge = merge.replace(np.nan, '', regex=True) merge.to_csv('/home/ubuntu/AI/ResumeParser/sscmerge.csv', index=False) dfssc = pd.read_csv('/home/ubuntu/AI/ResumeParser/sscmerge.csv') import numpy as np dfssc = dfssc.replace({np.nan: None}) x2 = dfssc.iloc[:, -2].tolist() y2 = dfssc.iloc[:, -1].tolist() z4 = dict(zip(x2, y2)) dflist.append(z4) ############################################Document############################################################ import base64 empty = [] name = found image = open(name, 'rb') image_read = image.read() image_64_encode = base64.b64encode(image_read) NULL = 'null' #empty.append("ByteData--" + (NULL).strip('""')) image_64_encode = image_64_encode.decode('utf-8') empty.append("FileData--" + str(image_64_encode)) imagedata = name.split("/") imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "") imagename1 = str(imagename).split('.') imagename = str(imagename1[-2]).replace("[", "]") empty.append("FileName--" + imagename) empty.append("FilePath--" + name) imageExtension = str(imagename1[-1]).replace("[", "]") empty.append("FileType--" + imageExtension) import pandas as pd df = pd.DataFrame(empty) df = df[0].str.split("--", expand=True) data1 = pd.DataFrame(df[0]) data2 = pd.DataFrame(df[1]) dt = data2.set_index(data1[0]) dt4 = dt.T list = [] dictionary = dt4.to_dict(orient="index") a = { "FileId": 0, "FileData": "", "FileName": "", "FileType": "", "RefId": 0 } list = [] list.append(a) list.append(dictionary[1]) import json with open('/home/ubuntu/AI/ResumeParser/firstjson.json', 'r') as json_file: json_load = json.load(json_file) # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create" nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}') import json # JSON data: x = nothing # python object to be appended y = {"EducationDetails": dflist} y1 = {"Document": list} # parsing JSON string: z = json.loads(x) # appending the data z.update(y) z.update(y1) # the result is a JSON string: # print(json.dumps(z)) # print('##########################') # print(z) # print('##########################') import requests import json # with open('visitingcard1.json', 'r') as json_file: # json_load = json.load(json_file) url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/resumeparsing/save" #url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/resumeparsing/save" payload1 = json.dumps(z) print('--------------------------------------------------------------------------') print(payload1) headers = { #'Authorization': 'stat 53f27e671adf456e974f1d11ceb5db41', 'Authorization': 'stat 3c8e545aca704c68a1d34d364ee73388',#demo 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload1) print("##############################################################") print(response.text) #function_1.var=response #a=str(response.text) files = glob.glob('/home/ubuntu/AI/ResumeParser/resume_upload/*') for f in files: os.remove(f) return response.text @app.route("/Download_resume") def Download_resume(): try: with open("/home/ubuntu/AI/ResumeParser/Ad1.csv",encoding="unicode_escape") as fp: csv = fp.read() return Response(csv,mimetype="text/csv",headers={"Content-disposition":"attachment; filename=Resume.csv"}) finally: os.remove('/home/ubuntu/AI/ResumeParser/Ad1.csv') @app.route('/upload_resume', methods=["POST"]) def upload_resume(): if __name__ == "__main__": print(os.getpid()) url_list=[] Dataset= request.get_json() # id = "100013660000125" url_list.append(Dataset) # multiprocessing with multiprocessing.Pool(processes=30) as pool: results = pool.map(predict, url_list) pool.close() return results[0] if __name__ == "__main__": app.run(host='0.0.0.0', port=1113, debug=True)