|
@@ -1,6 +1,6 @@
|
1
|
1
|
from flask import Flask, render_template, request, redirect, Response, send_file
|
2
|
2
|
import os
|
3
|
|
-import openai
|
|
3
|
+# import openai
|
4
|
4
|
import requests
|
5
|
5
|
import pandas as pd
|
6
|
6
|
import pgeocode
|
|
@@ -15,6 +15,11 @@ from functools import partial
|
15
|
15
|
from urlextract import URLExtract
|
16
|
16
|
import pytesseract as tess
|
17
|
17
|
from PIL import Image
|
|
18
|
+# from doctr.io import DocumentFile
|
|
19
|
+# from doctr.models import ocr_predictor
|
|
20
|
+# model = ocr_predictor(pretrained=True)
|
|
21
|
+# load tagger
|
|
22
|
+######################################################
|
18
|
23
|
import os
|
19
|
24
|
import glob
|
20
|
25
|
|
|
@@ -29,6 +34,9 @@ import time
|
29
|
34
|
import multiprocessing
|
30
|
35
|
from PIL import Image
|
31
|
36
|
from functools import partial
|
|
37
|
+nlp_model = spacy.load("D:/projects/C01app/Resume_parser/ME")
|
|
38
|
+nlp_model1 = spacy.load("D:/projects/C01app/Resume_parser/bdeeducation_50_0.2")
|
|
39
|
+from flask import Flask, render_template, request, redirect, Response, send_file
|
32
|
40
|
|
33
|
41
|
import pandas as pd
|
34
|
42
|
################################################################
|
|
@@ -49,7 +57,7 @@ model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncas
|
49
|
57
|
|
50
|
58
|
from paddleocr import PaddleOCR, draw_ocr
|
51
|
59
|
|
52
|
|
-ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=True)
|
|
60
|
+ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=False)
|
53
|
61
|
tagger = SequenceTagger.load("flair/ner-english-large")
|
54
|
62
|
|
55
|
63
|
import datetime
|
|
@@ -59,7 +67,23 @@ app = Flask(__name__)
|
59
|
67
|
|
60
|
68
|
# app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"
|
61
|
69
|
|
|
70
|
+
|
62
|
71
|
@app.route('/', methods=['GET'])
|
|
72
|
+def home():
|
|
73
|
+ return render_template('home.html')
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+@app.route('/resume', methods=['GET'])
|
|
77
|
+def resume():
|
|
78
|
+ return render_template('resume.html')
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+@app.route('/invoice', methods=['GET'])
|
|
82
|
+def invoice():
|
|
83
|
+ return render_template('invoice.html')
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+@app.route('/card', methods=['GET'])
|
63
|
87
|
def card():
|
64
|
88
|
return render_template('card.html')
|
65
|
89
|
|
|
@@ -407,7 +431,7 @@ def multiplecards():
|
407
|
431
|
htext = x
|
408
|
432
|
# print('------------------------------------------------')
|
409
|
433
|
#print('############################################################# this is verticaltext #################################################################')
|
410
|
|
- # print(verticaltext)
|
|
434
|
+ print(verticaltext)
|
411
|
435
|
htext = htext.replace('\n', ' ')
|
412
|
436
|
# print('############################################################# this is htext #############################################################')
|
413
|
437
|
#print(htext)
|
|
@@ -463,9 +487,10 @@ def multiplecards():
|
463
|
487
|
addrespinlst.append(Address)
|
464
|
488
|
|
465
|
489
|
except NameError:
|
|
490
|
+ final.append('ADDRESS--')
|
466
|
491
|
|
467
|
|
- print(
|
468
|
|
- '############################################################ Addressmodelworking #############################################################')
|
|
492
|
+
|
|
493
|
+ #print('############################################################ Addressmodelworking #############################################################')
|
469
|
494
|
|
470
|
495
|
# doc = nlp_model1(textaddress)
|
471
|
496
|
# addlist = []
|
|
@@ -854,6 +879,32 @@ def multiplecards():
|
854
|
879
|
number = str(number).split(")")
|
855
|
880
|
num.append(number[1])
|
856
|
881
|
# num.append(number[-1])
|
|
882
|
+
|
|
883
|
+ print(num)
|
|
884
|
+ import re
|
|
885
|
+
|
|
886
|
+ # Input list of strings
|
|
887
|
+# num =[' 7227906777Extn1204634444']
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+ # Define a regular expression pattern to split when text is present
|
|
891
|
+ pattern = r'[a-zA-Z]+'
|
|
892
|
+
|
|
893
|
+ # Function to split a string based on the pattern
|
|
894
|
+ def split_string(text):
|
|
895
|
+ return re.split(pattern, text)
|
|
896
|
+
|
|
897
|
+ # Process each line in the list
|
|
898
|
+ split_lines = [split_string(line) for line in num]
|
|
899
|
+
|
|
900
|
+ # Flatten the list of lists into a single list
|
|
901
|
+ split_lines = [item for sublist in split_lines for item in sublist]
|
|
902
|
+
|
|
903
|
+ # Remove any empty strings
|
|
904
|
+ num = [item for item in split_lines if item]
|
|
905
|
+
|
|
906
|
+ # Print the split lines
|
|
907
|
+ print(num)
|
857
|
908
|
if len(num) == 0:
|
858
|
909
|
final.append("ContactNumber--")
|
859
|
910
|
final.append("OrganizationNumber--")
|
|
@@ -985,7 +1036,7 @@ def multiplecards():
|
985
|
1036
|
final.append(county_name)
|
986
|
1037
|
|
987
|
1038
|
except (IndexError, NameError):
|
988
|
|
- final.append("PinCode1--")
|
|
1039
|
+ final.append("PinCode1--"+" ")
|
989
|
1040
|
final.append("country_code--")
|
990
|
1041
|
final.append("LandMark1--")
|
991
|
1042
|
final.append("state_name--")
|
|
@@ -1004,6 +1055,11 @@ def multiplecards():
|
1004
|
1055
|
df1.to_csv('path123.csv', index=False)
|
1005
|
1056
|
df2 = pd.read_csv('path123.csv')
|
1006
|
1057
|
print(df2)
|
|
1058
|
+ if df2['Values'].isnull().all():
|
|
1059
|
+ print("Column 'Column2' is empty.")
|
|
1060
|
+ return 'Invalid image'
|
|
1061
|
+ else:
|
|
1062
|
+ pass
|
1007
|
1063
|
df2 = df2.T
|
1008
|
1064
|
df2.to_csv('path1.csv', index=False, header=False)
|
1009
|
1065
|
df1 = pd.read_csv('path1.csv')
|
|
@@ -1062,7 +1118,11 @@ def multiplecards():
|
1062
|
1118
|
|
1063
|
1119
|
zlist.append(z)
|
1064
|
1120
|
#############################################creating csv#####################################
|
1065
|
|
- #print(final)
|
|
1121
|
+ # print(final)
|
|
1122
|
+
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
|
1066
|
1126
|
#print(imagelist)
|
1067
|
1127
|
#final.append('image--' + str(imagelist))
|
1068
|
1128
|
# import requests
|
|
@@ -1111,7 +1171,5 @@ def multiplecards():
|
1111
|
1171
|
return zlist
|
1112
|
1172
|
|
1113
|
1173
|
|
1114
|
|
-
|
1115
|
|
-
|
1116
|
1174
|
if __name__ == "__main__":
|
1117
|
1175
|
app.run(host='0.0.0.0', port=1112)
|