Nav apraksta
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. from flask import Flask, render_template, request, redirect, Response, send_file
  2. import spacy
  3. import pandas as pd
  4. import os
  5. import glob
  6. import camelot
  7. from pytesseract import *
  8. import shutil
  9. import cv2
  10. import matplotlib
  11. from werkzeug.utils import secure_filename
  12. import requests
  13. import time
  14. import multiprocessing
  15. from PIL import Image
  16. from functools import partial
  17. app = Flask(__name__)
  18. nlp_model = spacy.load('ME')
  19. nlp_model1 = spacy.load('bdeeducation_50_0.2')
  20. # path to save image in a folder
  21. app.config["IMAGE_UPLOADS"] = "/home/ubuntu/AI/ResumeParser/resume_upload"
  22. @app.route('/', methods=['GET'])
  23. def resume():
  24. return render_template('resume.html')
  25. #@app.route("/upload_resume", methods=["POST"])
  26. def predict(url_list):
  27. #Dataset = request.get_json()
  28. #print(Dataset)
  29. a = url_list
  30. x = a['FileData']
  31. #print(x)
  32. y = a['FileName']
  33. z = a['FileType']
  34. name=y+'.'+z
  35. print(name)
  36. #print(y)
  37. # image = y.split("/")
  38. # filename=image[-1]
  39. #print(x)
  40. img_data=x.encode()
  41. import base64
  42. with open('/home/ubuntu/AI/ResumeParser/resume_upload/'+name, "wb") as fh:
  43. fh.write(base64.decodebytes(img_data))
  44. # if request.method == "POST":
  45. # if request.files:
  46. # image = request.files["image"]
  47. # try:
  48. # image.save(os.path.join(
  49. # app.config["IMAGE_UPLOADS"], image.filename))
  50. # except IsADirectoryError:
  51. # return render_template('resume.html')
  52. # # image.save(os.path.join(
  53. # # app.config["IMAGE_UPLOADS"], image.filename))
  54. # print("Image saved")
  55. # return redirect(request.url)
  56. import glob
  57. # import os
  58. ts = 0
  59. for file_name in glob.glob('/home/ubuntu/AI/ResumeParser/resume_upload/*'):
  60. fts = os.path.getmtime(file_name)
  61. if fts > ts:
  62. ts = fts
  63. found = file_name
  64. print(found)
  65. # os.chdir(found)
  66. # print(os.getcwd())
  67. # for count, f in enumerate(os.listdir()):
  68. # f_name, f_ext = os.path.splitext(f)
  69. # f_name = "" + str(count)
  70. # new_name = f'{f_name}{f_ext}'
  71. # os.rename(f, new_name)
  72. f = "/home/ubuntu/AI/ResumeParser/resume_upload"
  73. f = os.listdir(f)
  74. def docx_to_txt():
  75. import docx2txt
  76. import glob
  77. text = ''
  78. for file in glob.glob(found):
  79. c = docx2txt.process(file)
  80. c = c.rstrip("\n")
  81. toPrint = c
  82. d = ' '.join(i for i in toPrint.split())
  83. d = d.rstrip()
  84. text += d
  85. docx_to_txt.text = text
  86. def doc_to_txt():
  87. import docx2txt
  88. import glob
  89. text = ''
  90. for file in glob.glob(found):
  91. c = docx2txt.process(file)
  92. c = c.rstrip("\n")
  93. toPrint = c
  94. d = ' '.join(i for i in toPrint.split())
  95. d = d.rstrip()
  96. text += d
  97. doc_to_txt.text = text
  98. def pdf_to_txt():
  99. import sys
  100. import fitz
  101. fname = found
  102. doc = fitz.open(fname)
  103. text = ""
  104. for page in doc:
  105. text = text + str(page.get_text())
  106. pdf_to_txt.text = " ".join(text.split('\n'))
  107. for file in f:
  108. if file.endswith('.doc'):
  109. doc_to_txt()
  110. x = doc_to_txt.text
  111. elif file.endswith('.docx'):
  112. docx_to_txt()
  113. x = docx_to_txt.text
  114. elif file.endswith('.pdf'):
  115. pdf_to_txt()
  116. x = pdf_to_txt.text
  117. doc = nlp_model(x)
  118. k = []
  119. l = []
  120. for ent in doc.ents:
  121. # print(f'{ent.label_.upper():{30}}- {ent.text}')
  122. k.append(ent.label_.upper())
  123. l.append(ent.text)
  124. columns = k
  125. rows = [l]
  126. import pandas as pd
  127. data = pd.DataFrame(rows, columns=columns)
  128. df = data
  129. data = df.T
  130. data.to_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv', index=True)
  131. data = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv')
  132. data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
  133. data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
  134. data.to_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv', index=False)
  135. df2 = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv')
  136. x1 = pd.read_csv('/home/ubuntu/AI/ResumeParser/AD11.csv')
  137. tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad1.csv')
  138. # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
  139. merge = pd.merge( tp,x1, on='Key', how='right')
  140. merge.to_csv('/home/ubuntu/AI/ResumeParser/AD.csv', index=False)
  141. df2 = pd.read_csv('/home/ubuntu/AI/ResumeParser/AD.csv')
  142. #print(df2)
  143. df2 = df2.T
  144. df2.to_csv('/home/ubuntu/AI/ResumeParser/path.csv', index=False, header=False)
  145. df1 = pd.read_csv('/home/ubuntu/AI/ResumeParser/path.csv')
  146. df1.to_json('/home/ubuntu/AI/ResumeParser/firstjson.json', orient="index")
  147. doc = nlp_model1(x)
  148. k = []
  149. l = []
  150. for ent in doc.ents:
  151. # print(f'{ent.label_.upper():{30}}- {ent.text}')
  152. k.append(ent.label_.upper())
  153. l.append(ent.text)
  154. columns = k
  155. rows = [l]
  156. data = pd.DataFrame(rows, columns=columns)
  157. df = data
  158. data = df.T
  159. data.to_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv', index=True)
  160. data = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
  161. data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
  162. data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
  163. data.to_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv', index=False)
  164. import pandas as pd
  165. import json
  166. dflist = []
  167. x = pd.read_csv('/home/ubuntu/AI/ResumeParser/PG.csv')
  168. tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
  169. # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
  170. merge = pd.merge(x, tp, on='Key', how='left')
  171. import numpy as np
  172. merge = merge.replace(np.nan, '', regex=True)
  173. merge.to_csv('/home/ubuntu/AI/ResumeParser/PGmerge.csv', index=False)
  174. dfPG = pd.read_csv('/home/ubuntu/AI/ResumeParser/PGmerge.csv')
  175. import numpy as np
  176. dfPG = dfPG.replace({np.nan: None})
  177. x2 = dfPG.iloc[:, -2].tolist()
  178. y2 = dfPG.iloc[:, -1].tolist()
  179. z1 = dict(zip(x2, y2))
  180. dflist.append(z1)
  181. #u1 = json.dumps(z1)
  182. import pandas as pd
  183. x = pd.read_csv('/home/ubuntu/AI/ResumeParser/UG.csv')
  184. tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
  185. # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
  186. merge = pd.merge(x, tp, on='Key', how='left')
  187. import numpy as np
  188. merge = merge.replace(np.nan, '', regex=True)
  189. merge.to_csv('/home/ubuntu/AI/ResumeParser/UGmerge.csv', index=False)
  190. dfUG = pd.read_csv('/home/ubuntu/AI/ResumeParser/UGmerge.csv')
  191. import numpy as np
  192. dfUG = dfUG.replace({np.nan: None})
  193. x2 = dfUG.iloc[:, -2].tolist()
  194. y2 = dfUG.iloc[:, -1].tolist()
  195. z2 = dict(zip(x2, y2))
  196. dflist.append(z2)
  197. #u2 = json.dumps(z2)
  198. #final = '[' + str(z1) + ',' + str(z2) + ']'
  199. #return render_template('resume.html')
  200. ############################################################################
  201. import pandas as pd
  202. x = pd.read_csv('/home/ubuntu/AI/ResumeParser/inter.csv')
  203. tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
  204. # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
  205. merge = pd.merge(x, tp, on='Key', how='left')
  206. import numpy as np
  207. merge = merge.replace(np.nan, '', regex=True)
  208. merge.to_csv('/home/ubuntu/AI/ResumeParser/intermerge.csv', index=False)
  209. dfinter = pd.read_csv('/home/ubuntu/AI/ResumeParser/intermerge.csv')
  210. import numpy as np
  211. dfinter= dfinter.replace({np.nan: None})
  212. x2 = dfinter.iloc[:, -2].tolist()
  213. y2 = dfinter.iloc[:, -1].tolist()
  214. z3 = dict(zip(x2, y2))
  215. dflist.append(z3)
  216. ############################################################################
  217. import pandas as pd
  218. x = pd.read_csv('/home/ubuntu/AI/ResumeParser/SSC.csv')
  219. tp = pd.read_csv('/home/ubuntu/AI/ResumeParser/Ad2.csv')
  220. # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
  221. merge = pd.merge(x, tp, on='Key', how='left')
  222. import numpy as np
  223. merge = merge.replace(np.nan, '', regex=True)
  224. merge.to_csv('/home/ubuntu/AI/ResumeParser/sscmerge.csv', index=False)
  225. dfssc = pd.read_csv('/home/ubuntu/AI/ResumeParser/sscmerge.csv')
  226. import numpy as np
  227. dfssc = dfssc.replace({np.nan: None})
  228. x2 = dfssc.iloc[:, -2].tolist()
  229. y2 = dfssc.iloc[:, -1].tolist()
  230. z4 = dict(zip(x2, y2))
  231. dflist.append(z4)
  232. ############################################Document############################################################
  233. import base64
  234. empty = []
  235. name = found
  236. image = open(name, 'rb')
  237. image_read = image.read()
  238. image_64_encode = base64.b64encode(image_read)
  239. NULL = 'null'
  240. #empty.append("ByteData--" + (NULL).strip('""'))
  241. image_64_encode = image_64_encode.decode('utf-8')
  242. empty.append("FileData--" + str(image_64_encode))
  243. imagedata = name.split("/")
  244. imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
  245. imagename1 = str(imagename).split('.')
  246. imagename = str(imagename1[-2]).replace("[", "]")
  247. empty.append("FileName--" + imagename)
  248. empty.append("FilePath--" + name)
  249. imageExtension = str(imagename1[-1]).replace("[", "]")
  250. empty.append("FileType--" + imageExtension)
  251. import pandas as pd
  252. df = pd.DataFrame(empty)
  253. df = df[0].str.split("--", expand=True)
  254. data1 = pd.DataFrame(df[0])
  255. data2 = pd.DataFrame(df[1])
  256. dt = data2.set_index(data1[0])
  257. dt4 = dt.T
  258. list = []
  259. dictionary = dt4.to_dict(orient="index")
  260. a = {
  261. "FileId": 0,
  262. "FileData": "",
  263. "FileName": "",
  264. "FileType": "",
  265. "RefId": 0
  266. }
  267. list = []
  268. list.append(a)
  269. list.append(dictionary[1])
  270. import json
  271. with open('/home/ubuntu/AI/ResumeParser/firstjson.json', 'r') as json_file:
  272. json_load = json.load(json_file)
  273. # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
  274. nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
  275. import json
  276. # JSON data:
  277. x = nothing
  278. # python object to be appended
  279. y = {"EducationDetails": dflist}
  280. y1 = {"Document": list}
  281. # parsing JSON string:
  282. z = json.loads(x)
  283. # appending the data
  284. z.update(y)
  285. z.update(y1)
  286. # the result is a JSON string:
  287. # print(json.dumps(z))
  288. # print('##########################')
  289. # print(z)
  290. # print('##########################')
  291. import requests
  292. import json
  293. # with open('visitingcard1.json', 'r') as json_file:
  294. # json_load = json.load(json_file)
  295. url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/resumeparsing/save"
  296. #url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/resumeparsing/save"
  297. payload1 = json.dumps(z)
  298. print('--------------------------------------------------------------------------')
  299. print(payload1)
  300. headers = {
  301. #'Authorization': 'stat 53f27e671adf456e974f1d11ceb5db41',
  302. 'Authorization': 'stat 3c8e545aca704c68a1d34d364ee73388',#demo
  303. 'Content-Type': 'application/json'
  304. }
  305. response = requests.request("POST", url, headers=headers, data=payload1)
  306. print("##############################################################")
  307. print(response.text)
  308. #function_1.var=response
  309. #a=str(response.text)
  310. files = glob.glob('/home/ubuntu/AI/ResumeParser/resume_upload/*')
  311. for f in files:
  312. os.remove(f)
  313. return response.text
  314. @app.route("/Download_resume")
  315. def Download_resume():
  316. try:
  317. with open("/home/ubuntu/AI/ResumeParser/Ad1.csv",encoding="unicode_escape") as fp:
  318. csv = fp.read()
  319. return Response(csv,mimetype="text/csv",headers={"Content-disposition":"attachment; filename=Resume.csv"})
  320. finally:
  321. os.remove('/home/ubuntu/AI/ResumeParser/Ad1.csv')
  322. @app.route('/upload_resume', methods=["POST"])
  323. def upload_resume():
  324. if __name__ == "__main__":
  325. print(os.getpid())
  326. url_list=[]
  327. Dataset= request.get_json()
  328. # id = "100013660000125"
  329. url_list.append(Dataset)
  330. # multiprocessing
  331. with multiprocessing.Pool(processes=30) as pool:
  332. results = pool.map(predict, url_list)
  333. pool.close()
  334. return results[0]
  335. if __name__ == "__main__":
  336. app.run(host='0.0.0.0', port=1113, debug=True)