From d7e51ac17894974368265bc6ff1da9648cce675c Mon Sep 17 00:00:00 2001 From: SadhulaSaiKumar Date: Tue, 2 May 2023 05:28:41 +0000 Subject: [PATCH] Delete 'Business_cards/Business_cards.py' --- Business_cards/Business_cards.py | 1048 ------------------------------ 1 file changed, 1048 deletions(-) delete mode 100644 Business_cards/Business_cards.py diff --git a/Business_cards/Business_cards.py b/Business_cards/Business_cards.py deleted file mode 100644 index 3a3d287..0000000 --- a/Business_cards/Business_cards.py +++ /dev/null @@ -1,1048 +0,0 @@ - - -@app.route('/upload_BusinessCards', methods=["POST"]) -# @app.route('/multiplecards', methods=["POST"]) -def multiplecards(): - # print('################## multiple card detection #######################') - # print(Dataset) - datalist=[] - Dataset = request.get_json() - # print(data) - #datalist.append(Dataset) - data = {'visiting': Dataset} - for i in data['visiting']: - import time - # time.sleep(1) - a = i - x = a['FileData'] - # print(x) - y = a['FileName'] - z = a['FileType'] - # CreatedBy=a['CreatedBy'] - - name = y + '.' + z - # print(name) - # print(y) - # image = y.split("/") - # filename=image[-1] - - # print(x) - img_data = x.encode() - - import base64 - with open('./multicards/' + name, "wb") as fh: - fh.write(base64.decodebytes(img_data)) - # print(i) - - # import os - # import glob - # for i in glob.glob('./multipleupload/*'): - - found = './multicards/' + name - print(found) - extension = found.split('.')[-1] - - # for root, dirs, fils in os.glob('./multipleupload'): - # for name in files: - # foundfile= os.path.join(root, name) - # print(foundfile) - - import re - import csv - import glob - import os - # import pytesseract - # import cv2 - import numpy as np - import glob - import os - import cv2 - import requests - final = [] - # final.append('assignto--'+CreatedBy) - imagelist = [] - # print(found) - remove_list = [] - import os - import glob - import pdfminer - - # import os - # ts = 0 - # for file_name in glob.glob('./upload/*'): - # fts = os.path.getmtime(file_name) - # if fts > ts: - # ts = fts - # found = file_name - # print(found) - - # print(extension) - - def org_name(): - print('org_name is working') - import pytesseract - fname = found - if extension != 'pdf': - - img = cv2.imread(fname) - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - - cv2.imwrite(str(found), img) - from PIL import Image - im = Image.open(found) - im.save("images1.png", dpi=(1200, 1200)) - # import pytesseract - fname = "images1.png" - import pytesseract as tess - from PIL import Image - - tess.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" - pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf") - with open("demo.pdf", "w+b", ) as f: - f.write(pdf) - - from pdfminer.high_level import extract_text - text = extract_text('demo.pdf') - # doc = DocumentFile.from_images(found) - # result = model(doc) - # text = result.render() - - # from pdfminer.high_level import extract_text - # txt = extract_text('demo.pdf') - else: - from pdfminer.high_level import extract_text - text = extract_text(fname) - - sentence = Sentence(text) - - # predict NER tags - tagger.predict(sentence) - - # print sentence - ko = (sentence) - - ko1 = str(ko).split("→") - import pandas as pd - - dfg = [] - try: - s = ko1[1].replace("", "").replace("", "").replace("/", ":") - - # os.remove(found) - # return 'Invalid image' - dfg.append(s) - df = pd.DataFrame(dfg) - df = df[0] - - df.to_csv("df.csv", index=False) - - df1 = pd.read_csv("df.csv") - ve = df1["0"].str.split(",") - fgf = ve.to_list() - dfgh = pd.DataFrame(fgf[0]) - maindf = dfgh[0] # .str.split(":") - # maindf.to_csv("main.csv") - - main1 = maindf.to_list() - main1 - # cv=pd.DataFrame(ve) - # cv - per = ["PER"] - org = ["ORG"] - loc = ["LOC"] - organizations = [i for i in main1 for j in org if j in i] - PErsons = [i for i in main1 for j in per if j in i] - location = [i for i in main1 for j in loc if j in i] - except IndexError: - pass - - # ************************************* ORGANIZATION ******************************************************************** - - def organisation(): - print('organisation working ') - try: - if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', - '').replace( - '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"', - '').replace( - '.com', ''))) < 4: - pass - - - else: - - match = str(urlfinal[0]).lower() - match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace( - 'https', - '').replace( - 'http', '').replace(":", "").replace("/", "").upper() - print(match) - - s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', - '') + " /" + \ - organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '') - s1 = s1g.upper() - s2 = match.upper() - from difflib import SequenceMatcher - print(s1) - print(s2) - print(SequenceMatcher(None, s1, s2).ratio()) - if SequenceMatcher(None, s1, s2).ratio() >= 0.10: - # and SequenceMatcher(None, s1, s2).ratio()<0.50: - final.append( - "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', - '').replace( - '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"', - '').replace( - '.com', - '').replace(']', '')) - else: - final.append("OrganizationName--" + s2) - - except IndexError: - try: - if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']', - '').replace( - '"', - '').replace( - '.com', '').replace('.in', ''))) < 4: - pass - - else: - match = str(urlfinal[0]).lower() - match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', - '').replace( - 'https', '').replace('http', '').replace(":", "").replace("/", "").upper() - - s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '') - s1 = s1g.upper() - s2 = match.upper() - from difflib import SequenceMatcher - print(s1) - print(s2) - print(SequenceMatcher(None, s1, s2).ratio()) - if SequenceMatcher(None, s1, s2).ratio() >= 0.10: - # and SequenceMatcher(None, s1, s2).ratio()<0.50: - final.append( - "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace( - '[', - '').replace( - ']', '').replace( - '.com', '')) - else: - final.append("OrganizationName--" + s2) - - except IndexError: - try: - match = str(urlfinal[0]).lower() - match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', - '').upper() - final.append("OrganizationName--" + match) - # remove_list.append(match) - except IndexError: - company() - - #################################################company Name######################################## - - def company(): - print('company list working') - import re - - new = [] - with open('test.txt', 'r+') as f: - flag = False - for line in f: - line = line.upper() - matches = re.findall( - r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''', - line) - - for i in matches: - if i in line: - flag = True - if flag: - o = "OrganizationName--" + line - new.append(o) - # if line.startswith('\n'): - # flag = False - try: - a = new[0].replace('\n', '') - final.append(a) - except IndexError: - final.append("OrganizationName--") - - # ************************************* CONTACT PERSON ******************************************************************* - def contactpersonname(): - print('contactpersonname working') - try: - final.append( - "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace( - "]", - "") + '/' + - PErsons[ - 1].replace(":PER", "").replace('"', '')) - except IndexError: - try: - final.append( - "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", - "").replace( - '"', '')) - except IndexError: - final.append("CONTACTPERSONNAME--") - - def image_to_text(): - - # doc = DocumentFile.from_images(found) - # result = model(doc) - # image_to_text.txt = result.render() - - # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe" - # img = Image.open(found) - # text = tess.image_to_string(img) - # image_to_text.txt = text - # print(text) - import cv2 - img_path = found - img = cv2.imread(img_path) - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - cv2.imwrite(str(found), img) - - result = ocr.ocr(img_path, cls=True) - result = result[0] - - txts = [line[1][0] for line in result] - - image_to_text.txt = "" - for i in txts: - if len(i) < 4: - continue - # print(i+"\n") - image_to_text.txt = image_to_text.txt + str(i) + "\n" - # print(image_to_text.txt) - - def pdf_to_text(): - - from pdfminer.high_level import extract_text - pdf_to_text.txt = extract_text(found) - # pdf_to_text.txt= text.replace('\n', ' ') - - extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg'] - - if extension in extensionlist: - print('image' + extension) - image_to_text() - x = image_to_text.txt - - else: - print('pdf' + extension) - pdf_to_text() - x = pdf_to_text.txt - - verticaltext = x - htext = x - # print('------------------------------------------------') - print( - '############################################################# this is verticaltext #################################################################') - print(verticaltext) - htext = htext.replace('\n', ' ') - print( - '############################################################# this is htext #############################################################') - print(htext) - y = x.replace('\n', ',') - y = y.replace(' ', ' ') - # y = y.replace(".", " .") - horizontaltext = y - # print('------------------------------------------------') - print( - '############################################################# this is horizontaltext #############################################################') - print(horizontaltext) - - textfile = open("test123456.txt", "w") - a = textfile.write(verticaltext) - textfile.close() - textfile = open("vtext.txt", "w") - a = textfile.write(horizontaltext) - textfile.close() - with open('test123456.txt', 'r') as f: - with open('test.txt', 'w') as w: - for line in f: - if line.strip().replace('|', ''): - w.write(line) - - ###########################ADDRESS################################## - addrespinlst = [] - - def splitaddress(): - import re - textaddress = htext.replace('\n', ' ') - # print(textaddress) - - address1 = (textaddress.partition(",")[0]) - words = address1.split() - address1 = words[-1] - addre = (htext.partition(",")[2]) - a = addre.replace('\n', ' ').replace('\x0c', '') - addre = (a.partition(",")[2]) - matches = re.findall( - r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3} \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b', - a) - for match in matches: - address2 = match - address2 = str(address2) - address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace(' ', - '') - - matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3} \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a) - for address3 in matches: - pass - try: - Address = address1 + "," + address2 + "," + address3 - final.append('ADDRESS--' + Address) - addrespinlst.append(Address) - - except NameError: - - print( - '############################################################ Addressmodelworking #############################################################') - - # doc = nlp_model1(textaddress) - # addlist = [] - # for ent in doc.ents: - # name = (f'{ent.label_.upper():{10}}--{ent.text}') - # addlist.append(name) - # try: - # Address = addlist[0] - # final.append(Address) - # addrespinlst.append(Address) - # remove_list.append( - # str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace( - # "ADDRESS--", - # "")) - # except IndexError: - # final.append("ADDRESS--") - pass - - ################################################## website####################################################### - - # import re - - # url = [] - # matches = re.findall(r'www.*', verticaltext) - # for match in matches: - # if (match.count('.')) == 1: - # a_string1 = match.replace("www", "www.") - - # final.append("Urls--" + a_string1) - # url.append(a_string1) - # else: - - # final.append("Urls--" + match) - - # if len(url)==0: - - # from urlextract import URLExtract - - # extractor = URLExtract() - # urls = extractor.find_urls(verticaltext) - # try: - # urllist = urls[0] - # final.append("Urls--"+urllist) - # url.append(urllist) - # except IndexError: - # final.append("Urls--") - - # for match in matches: - # if (match.count('.')) == 1: - # a_string1 = match.replace("www", "www.") - - # final.append("Urls--" + a_string1) - # url.append(a_string1) - # else: - - # final.append("Urls--" + match) - # url.append(match) - # remove_list.append(match) - # else: - # final.append("Urls--" ) - - ################################################## website####################################################### - - import re - # final=[] - url = [] - urlfinal = [] - matches = re.findall(r'www.*', verticaltext) - for match in matches: - - if (match.count('.')) == 1: - a_string1 = match.replace("www", "www.") - - # final.append("Urls--" + a_string1) - url.append(a_string1) - else: - - url.append(match) - - if len(url) == 0: - - from urlextract import URLExtract - - extractor = URLExtract() - urls = extractor.find_urls(verticaltext) - try: - urllist = urls[0] - url.append(urllist) - url.append(urllist) - except IndexError: - pass - - for match in matches: - if (match.count('.')) == 1: - a_string1 = match.replace("www", "www.") - - url.append(a_string1) - # url.append(a_string1) - else: - - url.append(match) - url.append(match) - - else: - pass - try: - test_string = url[0] - - test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"] - - res = [ele for ele in test_list if (ele in test_string)] - - if len(res) == 0: - print('no match') - - final.append('urls--') - - - else: - print('matched') - final.append('urls--' + url[0]) - urlfinal.append(url[0]) - - - except IndexError: - final.append('urls--') - - print( - '############################################################# url #############################################################') - print(url) - #######organisation and contact################ - - # def company_url(): - # # print('--url--') - # # print(url) - - # try: - # match = str(url[0]).lower() - # match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper() - # final.append("OrganizationName--" + match) - # # remove_list.append(match) - # except IndexError: - # org_name() - # organisation() - # final.append("OrganizationName--") - - # make example sentence - - # print(horizontaltext) - sentence = Sentence(verticaltext) - - # predict NER tags - tagger.predict(sentence) - - # print sentence - ko = (sentence) - - ko1 = str(ko).split("→") - import pandas as pd - - dfg = [] - try: - s = ko1[1].replace("", "").replace("", "").replace("/", ":") - except IndexError: - os.remove(found) - return 'Invalid image' - dfg.append(s) - df = pd.DataFrame(dfg) - df = df[0] - - df.to_csv("df.csv", index=False) - - df1 = pd.read_csv("df.csv") - ve = df1["0"].str.split(",") - fgf = ve.to_list() - dfgh = pd.DataFrame(fgf[0]) - maindf = dfgh[0] # .str.split(":") - # maindf.to_csv("main.csv") - - main1 = maindf.to_list() - main1 - # cv=pd.DataFrame(ve) - # cv - per = ["PER"] - org = ["ORG"] - loc = ["LOC"] - organizations = [i for i in main1 for j in org if j in i] - PErsons = [i for i in main1 for j in per if j in i] - location = [i for i in main1 for j in loc if j in i] - - # ************************************* ORGANIZATION ******************************************************************** - try: - if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', - '').replace( - ']', '').replace( - '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4: - pass - # company_url() - else: - - match = str(urlfinal[0]).lower() - match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace( - 'https', - '').replace( - 'http', '').replace(":", "").replace("/", "").upper() - print(match) - - s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace( - '.com', '') + " /" + \ - organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '') - s1 = s1g.upper() - s2 = match.upper() - from difflib import SequenceMatcher - print(s1) - print(s2) - print(SequenceMatcher(None, s1, s2).ratio()) - if SequenceMatcher(None, s1, s2).ratio() >= 0.10: - # and SequenceMatcher(None, s1, s2).ratio()<0.50: - final.append( - "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', - '').replace( - '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"', - '').replace( - '.com', '').replace(']', '')) - else: - final.append("OrganizationName--" + s2) - - - - except IndexError: - try: - if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']', - '').replace( - '"', - '').replace( - '.com', ''))) < 4: - pass - # company_url() - else: - - match = str(urlfinal[0]).lower() - match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace( - 'https', '').replace('http', '').replace(":", "").replace("/", "").upper() - - s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', - '').replace( - '.com', '') - s1 = s1g.upper() - s2 = match.upper() - from difflib import SequenceMatcher - print(s1) - print(s2) - print(SequenceMatcher(None, s1, s2).ratio()) - if SequenceMatcher(None, s1, s2).ratio() >= 0.10: - # and SequenceMatcher(None, s1, s2).ratio()<0.50: - final.append( - "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', - '').replace( - ']', '').replace( - '.com', '').replace(']', '')) - else: - final.append("OrganizationName--" + s2) - - except IndexError: - org_name() - organisation() - - # final.append("OrganizationName--") - - # ************************************* CONTACT PERSON ******************************************************************* - try: - final.append( - "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]", - "") + - PErsons[ - 1].replace(":PER", "").replace('"', '')) - except IndexError: - try: - final.append( - "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace( - '"', - '')) - except IndexError: - org_name() - contactpersonname() - # final.append("CONTACTPERSONNAME--") - ###############address flair##################### - - try: - print( - '############################################################# address new code #############################################################') - loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat'] - loclst = [i for i in loactionlst if i in htext.lower()] - - textaddress = htext - textaddress = textaddress.replace("|", ",") - textaddress = textaddress.lower() - - nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple") - grop = nlp(textaddress) - - citycountry = [] - print('########################### city or country name ###########################') - d = grop[-1] - - if d['entity_group'] == "COUNTRY": - print(d["word"]) - citycountry.append(d["word"]) - elif d['entity_group'] == "CITY": - print(d["word"]) - citycountry.append(d["word"]) - - try: - address1 = loclst[0] - except IndexError: - address1 = (textaddress.partition(",")[0]) - words = address1.split() - address1 = words[-1] - - star_location = address1.lower() - end_location = citycountry[0].replace("#", "") - start = star_location - end = end_location - s = textaddress.lower() - middle_address = (s.split(start))[-1].split(end)[0] - Address = start + middle_address + end - Address = Address.replace('--', '').title() - print(Address) - if Address.count(',') < 2: - splitaddress() - else: - final.append('ADDRESS--' + Address) - - # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '') - # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '') - # d1 = star_location.split() - # d2 = end_location.split() - # d3 = d1[0] - # d4 = d2[0] - # start = d3 - # end = d4 - # s = horizontaltext - # middle_address = ((s.split(start))[1].split(end)[0]) - # Address = d3 + middle_address + d4 - # final.append('ADDRESS--' + Address) - # addrespinlst.append(Address) - - - except IndexError: - splitaddress() - - ########################################## Designation ########################################### - import re - new = [] - with open('test.txt', 'r') as f: - flag = False - for line in f: - line1 = line - line = line.upper() - matches = re.findall( - r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''', - line) - for match in matches: - line = line.replace('-', '') - # print(line) - o = "Designation--" + line - new.append(o) - remove_list.append(str(line1).replace('\n', '')) - - try: - a = new[0].replace('\n', '') - final.append(a) - - except IndexError: - final.append("Designation--") - - ###################################################Phone number################################################# - num = [] - import phonenumbers - - # print(verticaltext) - numbers = phonenumbers.PhoneNumberMatcher( - verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN") - - for number in numbers: - number = str(number).split(")") - num.append(number[1]) - # num.append(number[-1]) - if len(num) == 0: - final.append("ContactNumber--") - final.append("OrganizationNumber--") - elif len(num) > 1: - final.append("ContactNumber--" + num[0].replace(' ', '')) - final.append("OrganizationNumber--" + num[-1].replace(' ', '')) - elif len(num) == 1: - try: - final.append("ContactNumber--" + num[0].replace(' ', '')) - final.append("OrganizationNumber--") - except IndexError: - final.append("ContactNumber--") - final.append("OrganizationNumber--") - print( - '############################################################# num #############################################################') - print(num) - # try: - # final.append("PhoneNumber--" + num[0].replace(' ', '')) - # remove_list.append(num[0]) - # except IndexError: - # pass - # try: - # final.append("PhoneNumber1--" + num[1].replace(' ', '')) - # remove_list.append(num[1]) - # except IndexError: - # pass - # try: - # final.append("PhoneNumber2--" + num[2].replace(' ', '')) - # remove_list.append(num[2]) - # except IndexError: - # pass - - ################################################### Email###################################################### - import re - from email_scraper import scrape_emails - s = list(scrape_emails(horizontaltext)) - email_id = s - - # email_id = [] - # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext) - # for match in matches: - # email_id.append(match) - - # # final.append('Email--' + match) - # email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "") - # # final.append(email_) - - # # final.append('Email--' + email_) - # # remove_list.append(email_) - if len(email_id) > 1: - final.append( - 'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'", - "")) - final.append( - 'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace( - "'", - "")) - else: - try: - final.append( - 'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace( - "'", - "")) - final.append('OrganizationEmail--') - except IndexError: - final.append('ContactEmail--') - final.append('OrganizationEmail--') - - ###############PINCODE############ - - pinlst = [] - print(addrespinlst) - import pgeocode - - # try: - # matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3} \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0]) - # for i in matche1: - # address3 = i.replace(' ', '').replace('-', '') - # pinlst.append(address3) - # except IndexError: - - lst = [] - for i in num: - i = i[1:] - lst.append(i) - - infile = r"vtext.txt" - outfile = r"cleaned_file.txt" - import glob - delete_list = lst - # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director - Sales & Business Development'] - fin = open(infile, "r+") - fout = open(outfile, "w+") - for line12 in fin: - for word in delete_list: - line12 = line12.replace(word, "") - - fout.write(line12) - fin.close() - # print(line) - - # print(addrespinlst) - import pgeocode - print(line12) - import re - matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3} \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12) - for i in matche1: - address3 = i.replace(' ', '').replace('-', '') - pinlst.append(address3) - - nomi = pgeocode.Nominatim('IN') - try: - a = nomi.query_postal_code(str(pinlst[-1])) - # print(a) - b = a.keys() - c = b.values.tolist() - d = a.tolist() - postal_code = "PinCode1" + "--" + d[0] - final.append(postal_code) - country_code = c[1] + "--" + str(d[1]) - final.append(country_code) - place_name = 'LandMark1' + "--" + str(d[2]) - final.append(place_name) - state_name = c[3] + "--" + str(d[3]) - final.append(state_name) - state_code = c[4] + "--" + str(d[4]) - final.append(state_code) - county_name = 'CityName1' + "--" + str(d[5]) - final.append(county_name) - - except (IndexError, NameError): - final.append("PinCode1--") - final.append("country_code--") - final.append("LandMark1--") - final.append("state_name--") - final.append("state_code--") - final.append("CityName1--") - - ######################################################## json ##################################################################### - - import pandas as pd - df = pd.DataFrame(final) - df1 = df[0].str.split('--', expand=True) - # print(df1) - df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True) - df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True) - df1['Keys'] = df1['Keys'].str.strip() - df1.to_csv('path123.csv', index=False) - df2 = pd.read_csv('path123.csv') - print(df2) - df2 = df2.T - df2.to_csv('path1.csv', index=False, header=False) - df1 = pd.read_csv('path1.csv') - df1.to_json('firstjson1.json', orient="index") - import json - with open('firstjson1.json', 'r') as json_file: - json_load = json.load(json_file) - # # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create" - nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}') - # # print('--------------------------------------------------------------------------') - # # print(nothing) - empty = [] - import base64 - name = found - image = open(name, 'rb') - image_read = image.read() - image_64_encode = base64.b64encode(image_read) - NULL = 'null' - empty.append("ByteData--" + (NULL).strip('""')) - image_64_encode = image_64_encode.decode('utf-8') - empty.append("FileData--" + str(image_64_encode)) - imagedata = name.split("/") - imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "") - imagename1 = str(imagename).split('.') - imagename = str(imagename1[-2]).replace("[", "]") - empty.append("FileName--" + imagename) - empty.append("FilePath--"+ "") - imageExtension = str(imagename1[-1]).replace("[", "]") - empty.append("FileType--" + imageExtension) - image.close() - import pandas as pd - df = pd.DataFrame(empty) - df = df[0].str.split("--", expand=True) - data1 = pd.DataFrame(df[0]) - data2 = pd.DataFrame(df[1]) - dt = data2.set_index(data1[0]) - dt4 = dt.T - dictionary = dt4.to_dict(orient="index") - list1 = [] - # list.append(a) - list1.append(dictionary[1]) - # # final.append("image--"+str(dictionary[1]).replace("\'",'"')) - print('--------------------') - # print(namelist) - import json - # JSON data: - x = nothing - # python object to be appended - y = {"image": dictionary[1]} - # parsing JSON string: - z = json.loads(x) - # appending the data - z.update(y) - # the result is a JSON string: - # print(json.dumps(z)) - zlist=[] - zlist.append(z) - #############################################creating csv##################################### - print(final) - print(imagelist) - final.append('image--' + str(imagelist)) - import requests - import json - url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev - # url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" #testing - # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test - # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create' - # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create' # C01 - payload1 = json.dumps(zlist) - # print('--------------------------------------------------------------------------') - #print(payload1) - headers = { - #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a', #dev - 'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing - # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1', - # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f', # c01 - # 'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo - 'Content-Type': 'application/json' - } - response = requests.request("POST", url, headers=headers, data=payload1) - # print("##############################################################") - - #print(payload1) - print(response.text) - import os - if 'BusinessCards Created Successfully' in response.text: - print('present') - os.remove(found) - else: - print('not present') - - df1.to_json('visitingcard.json') - data = df1.to_json('visiting.json', orient='records') - print(data) - - #return render_template('index.html') - - - return response.text - # return 'done' - - -if __name__ == "__main__": - app.run(host='0.0.0.0', port=1112) \ No newline at end of file