Update 'Business_cards/Business_cards.py'

Этот коммит содержится в:
2023-09-07 10:23:27 +00:00
родитель f1b1032b12
Коммит 99872f52e5
+67 -9
Просмотреть файл
@@ -1,6 +1,6 @@
from flask import Flask, render_template, request, redirect, Response, send_file from flask import Flask, render_template, request, redirect, Response, send_file
import os import os
import openai # import openai
import requests import requests
import pandas as pd import pandas as pd
import pgeocode import pgeocode
@@ -15,6 +15,11 @@ from functools import partial
from urlextract import URLExtract from urlextract import URLExtract
import pytesseract as tess import pytesseract as tess
from PIL import Image from PIL import Image
# from doctr.io import DocumentFile
# from doctr.models import ocr_predictor
# model = ocr_predictor(pretrained=True)
# load tagger
######################################################
import os import os
import glob import glob
@@ -29,6 +34,9 @@ import time
import multiprocessing import multiprocessing
from PIL import Image from PIL import Image
from functools import partial from functools import partial
nlp_model = spacy.load("D:/projects/C01app/Resume_parser/ME")
nlp_model1 = spacy.load("D:/projects/C01app/Resume_parser/bdeeducation_50_0.2")
from flask import Flask, render_template, request, redirect, Response, send_file
import pandas as pd import pandas as pd
################################################################ ################################################################
@@ -49,7 +57,7 @@ model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncas
from paddleocr import PaddleOCR, draw_ocr from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=True) ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=False)
tagger = SequenceTagger.load("flair/ner-english-large") tagger = SequenceTagger.load("flair/ner-english-large")
import datetime import datetime
@@ -59,7 +67,23 @@ app = Flask(__name__)
# app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/" # app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
def home():
return render_template('home.html')
@app.route('/resume', methods=['GET'])
def resume():
return render_template('resume.html')
@app.route('/invoice', methods=['GET'])
def invoice():
return render_template('invoice.html')
@app.route('/card', methods=['GET'])
def card(): def card():
return render_template('card.html') return render_template('card.html')
@@ -407,7 +431,7 @@ def multiplecards():
htext = x htext = x
# print('------------------------------------------------') # print('------------------------------------------------')
#print('############################################################# this is verticaltext #################################################################') #print('############################################################# this is verticaltext #################################################################')
# print(verticaltext) print(verticaltext)
htext = htext.replace('\n', ' ') htext = htext.replace('\n', ' ')
# print('############################################################# this is htext #############################################################') # print('############################################################# this is htext #############################################################')
#print(htext) #print(htext)
@@ -463,9 +487,10 @@ def multiplecards():
addrespinlst.append(Address) addrespinlst.append(Address)
except NameError: except NameError:
final.append('ADDRESS--')
print(
'############################################################ Addressmodelworking #############################################################') #print('############################################################ Addressmodelworking #############################################################')
# doc = nlp_model1(textaddress) # doc = nlp_model1(textaddress)
# addlist = [] # addlist = []
@@ -854,6 +879,32 @@ def multiplecards():
number = str(number).split(")") number = str(number).split(")")
num.append(number[1]) num.append(number[1])
# num.append(number[-1]) # num.append(number[-1])
print(num)
import re
# Input list of strings
# num =[' 7227906777Extn1204634444']
# Define a regular expression pattern to split when text is present
pattern = r'[a-zA-Z]+'
# Function to split a string based on the pattern
def split_string(text):
return re.split(pattern, text)
# Process each line in the list
split_lines = [split_string(line) for line in num]
# Flatten the list of lists into a single list
split_lines = [item for sublist in split_lines for item in sublist]
# Remove any empty strings
num = [item for item in split_lines if item]
# Print the split lines
print(num)
if len(num) == 0: if len(num) == 0:
final.append("ContactNumber--") final.append("ContactNumber--")
final.append("OrganizationNumber--") final.append("OrganizationNumber--")
@@ -985,7 +1036,7 @@ def multiplecards():
final.append(county_name) final.append(county_name)
except (IndexError, NameError): except (IndexError, NameError):
final.append("PinCode1--") final.append("PinCode1--"+" ")
final.append("country_code--") final.append("country_code--")
final.append("LandMark1--") final.append("LandMark1--")
final.append("state_name--") final.append("state_name--")
@@ -1004,6 +1055,11 @@ def multiplecards():
df1.to_csv('path123.csv', index=False) df1.to_csv('path123.csv', index=False)
df2 = pd.read_csv('path123.csv') df2 = pd.read_csv('path123.csv')
print(df2) print(df2)
if df2['Values'].isnull().all():
print("Column 'Column2' is empty.")
return 'Invalid image'
else:
pass
df2 = df2.T df2 = df2.T
df2.to_csv('path1.csv', index=False, header=False) df2.to_csv('path1.csv', index=False, header=False)
df1 = pd.read_csv('path1.csv') df1 = pd.read_csv('path1.csv')
@@ -1062,7 +1118,11 @@ def multiplecards():
zlist.append(z) zlist.append(z)
#############################################creating csv##################################### #############################################creating csv#####################################
#print(final) # print(final)
#print(imagelist) #print(imagelist)
#final.append('image--' + str(imagelist)) #final.append('image--' + str(imagelist))
# import requests # import requests
@@ -1111,7 +1171,5 @@ def multiplecards():
return zlist return zlist
if __name__ == "__main__": if __name__ == "__main__":
app.run(host='0.0.0.0', port=1112) app.run(host='0.0.0.0', port=1112)