123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- from flask import Flask, render_template, request, redirect, Response, send_file
- import spacy
- import pandas as pd
- import os
- import glob
- import camelot
- from pytesseract import *
- import shutil
- import cv2
- import matplotlib
- from werkzeug.utils import secure_filename
- import requests
-
- import time
- import multiprocessing
- from PIL import Image
- from functools import partial
-
-
-
- app = Flask(__name__)
-
- nlp_model = spacy.load('ME')
- nlp_model1 = spacy.load('bdeeducation_50_0.2')
- # path to save image in a folder
- app.config["IMAGE_UPLOADS"] = "/home/ubuntu/AI/ResumeParser/resume_upload"
-
-
- @app.route('/', methods=['GET'])
- def resume():
- return render_template('resume.html')
-
-
- #@app.route("/upload_resume", methods=["POST"])
- def predict(url_list):
- #Dataset = request.get_json()
- #print(Dataset)
- a = url_list
- x = a['FileData']
- #print(x)
- y = a['FileName']
- z = a['FileType']
- name=y+'.'+z
- print(name)
- #print(y)
- # image = y.split("/")
- # filename=image[-1]
-
- #print(x)
- img_data=x.encode()
-
- import base64
- with open('/home/ubuntu/AI/ResumeParser/resume_upload/'+name, "wb") as fh:
- fh.write(base64.decodebytes(img_data))
- # if request.method == "POST":
-
- # if request.files:
-
- # image = request.files["image"]
- # try:
- # image.save(os.path.join(
- # app.config["IMAGE_UPLOADS"], image.filename))
- # except IsADirectoryError:
- # return render_template('resume.html')
- # # image.save(os.path.join(
- # # app.config["IMAGE_UPLOADS"], image.filename))
-
- # print("Image saved")
-
- # return redirect(request.url)
-
- import glob
- # import os
- ts = 0
- for file_name in glob.glob('/home/ubuntu/AI/ResumeParser/resume_upload/*'):
- fts = os.path.getmtime(file_name)
- if fts > ts:
- ts = fts
- found = file_name
- print(found)
-
- # os.chdir(found)
- # print(os.getcwd())
-
- # for count, f in enumerate(os.listdir()):
- # f_name, f_ext = os.path.splitext(f)
- # f_name = "" + str(count)
-
- # new_name = f'{f_name}{f_ext}'
- # os.rename(f, new_name)
- f = "/home/ubuntu/AI/ResumeParser/resume_upload"
- f = os.listdir(f)
-
- def docx_to_txt():
- import docx2txt
- import glob
- text = ''
- for file in glob.glob(found):
- c = docx2txt.process(file)
- c = c.rstrip("\n")
- toPrint = c
- d = ' '.join(i for i in toPrint.split())
- d = d.rstrip()
- text += d
- docx_to_txt.text = text
-
- def doc_to_txt():
- import docx2txt
- import glob
- text = ''
- for file in glob.glob(found):
- c = docx2txt.process(file)
- c = c.rstrip("\n")
- toPrint = c
- d = ' '.join(i for i in toPrint.split())
- d = d.rstrip()
- text += d
- doc_to_txt.text = text
-
- def pdf_to_txt():
- import sys
- import fitz
- fname = found
- doc = fitz.open(fname)
- text = ""
- for page in doc:
- text = text + str(page.get_text())
- pdf_to_txt.text = " ".join(text.split('\n'))
-
- for file in f:
- if file.endswith('.doc'):
- doc_to_txt()
- x = doc_to_txt.text
- elif file.endswith('.docx'):
- docx_to_txt()
- x = docx_to_txt.text
- elif file.endswith('.pdf'):
- pdf_to_txt()
- x = pdf_to_txt.text
-
- doc = nlp_model(x)
- k = []
- l = []
- for ent in doc.ents:
- # print(f'{ent.label_.upper():{30}}- {ent.text}')
- k.append(ent.label_.upper())
- l.append(ent.text)
- columns = k
- rows = [l]
- import pandas as pd
- data = pd.DataFrame(rows, columns=columns)
- df = data
-
- data = df.T
-
- data.to_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv', index=True)
- data = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv')
- data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
- data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
- data.to_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv', index=False)
- df2 = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv')
- x1 = pd.read_csv('/home/ubuntu/AI/ResumeParser/AD11.csv')
- tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv')
- # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
- merge = pd.merge( tp,x1, on='Key', how='right')
- merge.to_csv('/home/ubuntu/AI/ResumeParser/AD.csv', index=False)
- df2 = pd.read_csv('/home/ubuntu/AI/ResumeParser/AD.csv')
- #print(df2)
- df2 = df2.T
-
- df2.to_csv('/home/ubuntu/AI/ResumeParser/path.csv', index=False, header=False)
- df1 = pd.read_csv('/home/ubuntu/AI/ResumeParser/path.csv')
- df1.to_json('/home/ubuntu/AI/ResumeParser/firstjson.json', orient="index")
-
-
-
-
- doc = nlp_model1(x)
- k = []
- l = []
- for ent in doc.ents:
- # print(f'{ent.label_.upper():{30}}- {ent.text}')
- k.append(ent.label_.upper())
- l.append(ent.text)
- columns = k
- rows = [l]
- data = pd.DataFrame(rows, columns=columns)
- df = data
- data = df.T
-
-
- data.to_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv', index=True)
- data = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
- data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
- data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
- data.to_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv', index=False)
- import pandas as pd
- import json
- dflist = []
- x = pd.read_csv('/home/ubuntu/AI/ResumeParser/PG.csv')
- tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
- # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
- merge = pd.merge(x, tp, on='Key', how='left')
- import numpy as np
- merge = merge.replace(np.nan, '', regex=True)
- merge.to_csv('/home/ubuntu/AI/ResumeParser/PGmerge.csv', index=False)
-
- dfPG = pd.read_csv('/home/ubuntu/AI/ResumeParser/PGmerge.csv')
- import numpy as np
- dfPG = dfPG.replace({np.nan: None})
- x2 = dfPG.iloc[:, -2].tolist()
- y2 = dfPG.iloc[:, -1].tolist()
- z1 = dict(zip(x2, y2))
- dflist.append(z1)
- #u1 = json.dumps(z1)
- import pandas as pd
-
- x = pd.read_csv('/home/ubuntu/AI/ResumeParser/UG.csv')
- tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
- # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
- merge = pd.merge(x, tp, on='Key', how='left')
- import numpy as np
- merge = merge.replace(np.nan, '', regex=True)
- merge.to_csv('/home/ubuntu/AI/ResumeParser/UGmerge.csv', index=False)
-
-
-
- dfUG = pd.read_csv('/home/ubuntu/AI/ResumeParser/UGmerge.csv')
- import numpy as np
- dfUG = dfUG.replace({np.nan: None})
- x2 = dfUG.iloc[:, -2].tolist()
- y2 = dfUG.iloc[:, -1].tolist()
- z2 = dict(zip(x2, y2))
- dflist.append(z2)
- #u2 = json.dumps(z2)
- #final = '[' + str(z1) + ',' + str(z2) + ']'
- #return render_template('resume.html')
-
- ############################################################################
- import pandas as pd
-
- x = pd.read_csv('/home/ubuntu/AI/ResumeParser/inter.csv')
- tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
- # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
- merge = pd.merge(x, tp, on='Key', how='left')
- import numpy as np
- merge = merge.replace(np.nan, '', regex=True)
- merge.to_csv('/home/ubuntu/AI/ResumeParser/intermerge.csv', index=False)
-
- dfinter = pd.read_csv('/home/ubuntu/AI/ResumeParser/intermerge.csv')
- import numpy as np
- dfinter= dfinter.replace({np.nan: None})
- x2 = dfinter.iloc[:, -2].tolist()
- y2 = dfinter.iloc[:, -1].tolist()
- z3 = dict(zip(x2, y2))
- dflist.append(z3)
-
- ############################################################################
- import pandas as pd
-
- x = pd.read_csv('/home/ubuntu/AI/ResumeParser/SSC.csv')
- tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
- # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
- merge = pd.merge(x, tp, on='Key', how='left')
- import numpy as np
- merge = merge.replace(np.nan, '', regex=True)
- merge.to_csv('/home/ubuntu/AI/ResumeParser/sscmerge.csv', index=False)
-
- dfssc = pd.read_csv('/home/ubuntu/AI/ResumeParser/sscmerge.csv')
- import numpy as np
- dfssc = dfssc.replace({np.nan: None})
- x2 = dfssc.iloc[:, -2].tolist()
- y2 = dfssc.iloc[:, -1].tolist()
- z4 = dict(zip(x2, y2))
- dflist.append(z4)
- ############################################Document############################################################
- import base64
- empty = []
- name = found
- image = open(name, 'rb')
- image_read = image.read()
- image_64_encode = base64.b64encode(image_read)
- NULL = 'null'
- #empty.append("ByteData--" + (NULL).strip('""'))
- image_64_encode = image_64_encode.decode('utf-8')
- empty.append("FileData--" + str(image_64_encode))
- imagedata = name.split("/")
- imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
- imagename1 = str(imagename).split('.')
-
- imagename = str(imagename1[-2]).replace("[", "]")
- empty.append("FileName--" + imagename)
- empty.append("FilePath--" + name)
- imageExtension = str(imagename1[-1]).replace("[", "]")
- empty.append("FileType--" + imageExtension)
-
- import pandas as pd
- df = pd.DataFrame(empty)
- df = df[0].str.split("--", expand=True)
- data1 = pd.DataFrame(df[0])
- data2 = pd.DataFrame(df[1])
- dt = data2.set_index(data1[0])
-
- dt4 = dt.T
- list = []
- dictionary = dt4.to_dict(orient="index")
-
- a = {
- "FileId": 0,
- "FileData": "",
- "FileName": "",
- "FileType": "",
- "RefId": 0
- }
- list = []
-
- list.append(a)
- list.append(dictionary[1])
-
-
- import json
-
- with open('/home/ubuntu/AI/ResumeParser/firstjson.json', 'r') as json_file:
- json_load = json.load(json_file)
-
- # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
-
- nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
- import json
-
- # JSON data:
- x = nothing
-
- # python object to be appended
- y = {"EducationDetails": dflist}
- y1 = {"Document": list}
-
- # parsing JSON string:
- z = json.loads(x)
-
- # appending the data
- z.update(y)
- z.update(y1)
-
- # the result is a JSON string:
- # print(json.dumps(z))
- # print('##########################')
- # print(z)
- # print('##########################')
- import requests
- import json
-
- # with open('visitingcard1.json', 'r') as json_file:
- # json_load = json.load(json_file)
- url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/resumeparsing/save"
- #url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/resumeparsing/save"
-
- payload1 = json.dumps(z)
- print('--------------------------------------------------------------------------')
- print(payload1)
- headers = {
- #'Authorization': 'stat 53f27e671adf456e974f1d11ceb5db41',
- 'Authorization': 'stat 3c8e545aca704c68a1d34d364ee73388',#demo
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload1)
- print("##############################################################")
-
- print(response.text)
- #function_1.var=response
- #a=str(response.text)
-
-
- files = glob.glob('/home/ubuntu/AI/ResumeParser/resume_upload/*')
- for f in files:
- os.remove(f)
-
- return response.text
-
-
-
-
-
- @app.route("/Download_resume")
- def Download_resume():
- try:
- with open("/home/ubuntu/AI/ResumeParser/Ad1.csv",encoding="unicode_escape") as fp:
- csv = fp.read()
- return Response(csv,mimetype="text/csv",headers={"Content-disposition":"attachment; filename=Resume.csv"})
- finally:
- os.remove('/home/ubuntu/AI/ResumeParser/Ad1.csv')
-
- @app.route('/upload_resume', methods=["POST"])
- def upload_resume():
- if __name__ == "__main__":
- print(os.getpid())
-
- url_list=[]
- Dataset= request.get_json()
- # id = "100013660000125"
- url_list.append(Dataset)
- # multiprocessing
- with multiprocessing.Pool(processes=30) as pool:
- results = pool.map(predict, url_list)
-
- pool.close()
- return results[0]
-
- if __name__ == "__main__":
- app.run(host='0.0.0.0', port=1113, debug=True)
-
|