diff --git a/Business_cards/Business_cards.py b/Business_cards/Business_cards.py new file mode 100644 index 0000000..3a3d287 --- /dev/null +++ b/Business_cards/Business_cards.py @@ -0,0 +1,1048 @@ + + +@app.route('/upload_BusinessCards', methods=["POST"]) +# @app.route('/multiplecards', methods=["POST"]) +def multiplecards(): + # print('################## multiple card detection #######################') + # print(Dataset) + datalist=[] + Dataset = request.get_json() + # print(data) + #datalist.append(Dataset) + data = {'visiting': Dataset} + for i in data['visiting']: + import time + # time.sleep(1) + a = i + x = a['FileData'] + # print(x) + y = a['FileName'] + z = a['FileType'] + # CreatedBy=a['CreatedBy'] + + name = y + '.' + z + # print(name) + # print(y) + # image = y.split("/") + # filename=image[-1] + + # print(x) + img_data = x.encode() + + import base64 + with open('./multicards/' + name, "wb") as fh: + fh.write(base64.decodebytes(img_data)) + # print(i) + + # import os + # import glob + # for i in glob.glob('./multipleupload/*'): + + found = './multicards/' + name + print(found) + extension = found.split('.')[-1] + + # for root, dirs, fils in os.glob('./multipleupload'): + # for name in files: + # foundfile= os.path.join(root, name) + # print(foundfile) + + import re + import csv + import glob + import os + # import pytesseract + # import cv2 + import numpy as np + import glob + import os + import cv2 + import requests + final = [] + # final.append('assignto--'+CreatedBy) + imagelist = [] + # print(found) + remove_list = [] + import os + import glob + import pdfminer + + # import os + # ts = 0 + # for file_name in glob.glob('./upload/*'): + # fts = os.path.getmtime(file_name) + # if fts > ts: + # ts = fts + # found = file_name + # print(found) + + # print(extension) + + def org_name(): + print('org_name is working') + import pytesseract + fname = found + if extension != 'pdf': + + img = cv2.imread(fname) + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + cv2.imwrite(str(found), img) + from PIL import Image + im = Image.open(found) + im.save("images1.png", dpi=(1200, 1200)) + # import pytesseract + fname = "images1.png" + import pytesseract as tess + from PIL import Image + + tess.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" + pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf") + with open("demo.pdf", "w+b", ) as f: + f.write(pdf) + + from pdfminer.high_level import extract_text + text = extract_text('demo.pdf') + # doc = DocumentFile.from_images(found) + # result = model(doc) + # text = result.render() + + # from pdfminer.high_level import extract_text + # txt = extract_text('demo.pdf') + else: + from pdfminer.high_level import extract_text + text = extract_text(fname) + + sentence = Sentence(text) + + # predict NER tags + tagger.predict(sentence) + + # print sentence + ko = (sentence) + + ko1 = str(ko).split("→") + import pandas as pd + + dfg = [] + try: + s = ko1[1].replace("", "").replace("", "").replace("/", ":") + + # os.remove(found) + # return 'Invalid image' + dfg.append(s) + df = pd.DataFrame(dfg) + df = df[0] + + df.to_csv("df.csv", index=False) + + df1 = pd.read_csv("df.csv") + ve = df1["0"].str.split(",") + fgf = ve.to_list() + dfgh = pd.DataFrame(fgf[0]) + maindf = dfgh[0] # .str.split(":") + # maindf.to_csv("main.csv") + + main1 = maindf.to_list() + main1 + # cv=pd.DataFrame(ve) + # cv + per = ["PER"] + org = ["ORG"] + loc = ["LOC"] + organizations = [i for i in main1 for j in org if j in i] + PErsons = [i for i in main1 for j in per if j in i] + location = [i for i in main1 for j in loc if j in i] + except IndexError: + pass + + # ************************************* ORGANIZATION ******************************************************************** + + def organisation(): + print('organisation working ') + try: + if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', + '').replace( + '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"', + '').replace( + '.com', ''))) < 4: + pass + + + else: + + match = str(urlfinal[0]).lower() + match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace( + 'https', + '').replace( + 'http', '').replace(":", "").replace("/", "").upper() + print(match) + + s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', + '') + " /" + \ + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '') + s1 = s1g.upper() + s2 = match.upper() + from difflib import SequenceMatcher + print(s1) + print(s2) + print(SequenceMatcher(None, s1, s2).ratio()) + if SequenceMatcher(None, s1, s2).ratio() >= 0.10: + # and SequenceMatcher(None, s1, s2).ratio()<0.50: + final.append( + "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', + '').replace( + '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"', + '').replace( + '.com', + '').replace(']', '')) + else: + final.append("OrganizationName--" + s2) + + except IndexError: + try: + if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']', + '').replace( + '"', + '').replace( + '.com', '').replace('.in', ''))) < 4: + pass + + else: + match = str(urlfinal[0]).lower() + match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', + '').replace( + 'https', '').replace('http', '').replace(":", "").replace("/", "").upper() + + s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '') + s1 = s1g.upper() + s2 = match.upper() + from difflib import SequenceMatcher + print(s1) + print(s2) + print(SequenceMatcher(None, s1, s2).ratio()) + if SequenceMatcher(None, s1, s2).ratio() >= 0.10: + # and SequenceMatcher(None, s1, s2).ratio()<0.50: + final.append( + "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace( + '[', + '').replace( + ']', '').replace( + '.com', '')) + else: + final.append("OrganizationName--" + s2) + + except IndexError: + try: + match = str(urlfinal[0]).lower() + match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', + '').upper() + final.append("OrganizationName--" + match) + # remove_list.append(match) + except IndexError: + company() + + #################################################company Name######################################## + + def company(): + print('company list working') + import re + + new = [] + with open('test.txt', 'r+') as f: + flag = False + for line in f: + line = line.upper() + matches = re.findall( + r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''', + line) + + for i in matches: + if i in line: + flag = True + if flag: + o = "OrganizationName--" + line + new.append(o) + # if line.startswith('\n'): + # flag = False + try: + a = new[0].replace('\n', '') + final.append(a) + except IndexError: + final.append("OrganizationName--") + + # ************************************* CONTACT PERSON ******************************************************************* + def contactpersonname(): + print('contactpersonname working') + try: + final.append( + "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace( + "]", + "") + '/' + + PErsons[ + 1].replace(":PER", "").replace('"', '')) + except IndexError: + try: + final.append( + "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", + "").replace( + '"', '')) + except IndexError: + final.append("CONTACTPERSONNAME--") + + def image_to_text(): + + # doc = DocumentFile.from_images(found) + # result = model(doc) + # image_to_text.txt = result.render() + + # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe" + # img = Image.open(found) + # text = tess.image_to_string(img) + # image_to_text.txt = text + # print(text) + import cv2 + img_path = found + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + cv2.imwrite(str(found), img) + + result = ocr.ocr(img_path, cls=True) + result = result[0] + + txts = [line[1][0] for line in result] + + image_to_text.txt = "" + for i in txts: + if len(i) < 4: + continue + # print(i+"\n") + image_to_text.txt = image_to_text.txt + str(i) + "\n" + # print(image_to_text.txt) + + def pdf_to_text(): + + from pdfminer.high_level import extract_text + pdf_to_text.txt = extract_text(found) + # pdf_to_text.txt= text.replace('\n', ' ') + + extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg'] + + if extension in extensionlist: + print('image' + extension) + image_to_text() + x = image_to_text.txt + + else: + print('pdf' + extension) + pdf_to_text() + x = pdf_to_text.txt + + verticaltext = x + htext = x + # print('------------------------------------------------') + print( + '############################################################# this is verticaltext #################################################################') + print(verticaltext) + htext = htext.replace('\n', ' ') + print( + '############################################################# this is htext #############################################################') + print(htext) + y = x.replace('\n', ',') + y = y.replace(' ', ' ') + # y = y.replace(".", " .") + horizontaltext = y + # print('------------------------------------------------') + print( + '############################################################# this is horizontaltext #############################################################') + print(horizontaltext) + + textfile = open("test123456.txt", "w") + a = textfile.write(verticaltext) + textfile.close() + textfile = open("vtext.txt", "w") + a = textfile.write(horizontaltext) + textfile.close() + with open('test123456.txt', 'r') as f: + with open('test.txt', 'w') as w: + for line in f: + if line.strip().replace('|', ''): + w.write(line) + + ###########################ADDRESS################################## + addrespinlst = [] + + def splitaddress(): + import re + textaddress = htext.replace('\n', ' ') + # print(textaddress) + + address1 = (textaddress.partition(",")[0]) + words = address1.split() + address1 = words[-1] + addre = (htext.partition(",")[2]) + a = addre.replace('\n', ' ').replace('\x0c', '') + addre = (a.partition(",")[2]) + matches = re.findall( + r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3} \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b', + a) + for match in matches: + address2 = match + address2 = str(address2) + address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace(' ', + '') + + matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3} \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a) + for address3 in matches: + pass + try: + Address = address1 + "," + address2 + "," + address3 + final.append('ADDRESS--' + Address) + addrespinlst.append(Address) + + except NameError: + + print( + '############################################################ Addressmodelworking #############################################################') + + # doc = nlp_model1(textaddress) + # addlist = [] + # for ent in doc.ents: + # name = (f'{ent.label_.upper():{10}}--{ent.text}') + # addlist.append(name) + # try: + # Address = addlist[0] + # final.append(Address) + # addrespinlst.append(Address) + # remove_list.append( + # str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace( + # "ADDRESS--", + # "")) + # except IndexError: + # final.append("ADDRESS--") + pass + + ################################################## website####################################################### + + # import re + + # url = [] + # matches = re.findall(r'www.*', verticaltext) + # for match in matches: + # if (match.count('.')) == 1: + # a_string1 = match.replace("www", "www.") + + # final.append("Urls--" + a_string1) + # url.append(a_string1) + # else: + + # final.append("Urls--" + match) + + # if len(url)==0: + + # from urlextract import URLExtract + + # extractor = URLExtract() + # urls = extractor.find_urls(verticaltext) + # try: + # urllist = urls[0] + # final.append("Urls--"+urllist) + # url.append(urllist) + # except IndexError: + # final.append("Urls--") + + # for match in matches: + # if (match.count('.')) == 1: + # a_string1 = match.replace("www", "www.") + + # final.append("Urls--" + a_string1) + # url.append(a_string1) + # else: + + # final.append("Urls--" + match) + # url.append(match) + # remove_list.append(match) + # else: + # final.append("Urls--" ) + + ################################################## website####################################################### + + import re + # final=[] + url = [] + urlfinal = [] + matches = re.findall(r'www.*', verticaltext) + for match in matches: + + if (match.count('.')) == 1: + a_string1 = match.replace("www", "www.") + + # final.append("Urls--" + a_string1) + url.append(a_string1) + else: + + url.append(match) + + if len(url) == 0: + + from urlextract import URLExtract + + extractor = URLExtract() + urls = extractor.find_urls(verticaltext) + try: + urllist = urls[0] + url.append(urllist) + url.append(urllist) + except IndexError: + pass + + for match in matches: + if (match.count('.')) == 1: + a_string1 = match.replace("www", "www.") + + url.append(a_string1) + # url.append(a_string1) + else: + + url.append(match) + url.append(match) + + else: + pass + try: + test_string = url[0] + + test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"] + + res = [ele for ele in test_list if (ele in test_string)] + + if len(res) == 0: + print('no match') + + final.append('urls--') + + + else: + print('matched') + final.append('urls--' + url[0]) + urlfinal.append(url[0]) + + + except IndexError: + final.append('urls--') + + print( + '############################################################# url #############################################################') + print(url) + #######organisation and contact################ + + # def company_url(): + # # print('--url--') + # # print(url) + + # try: + # match = str(url[0]).lower() + # match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper() + # final.append("OrganizationName--" + match) + # # remove_list.append(match) + # except IndexError: + # org_name() + # organisation() + # final.append("OrganizationName--") + + # make example sentence + + # print(horizontaltext) + sentence = Sentence(verticaltext) + + # predict NER tags + tagger.predict(sentence) + + # print sentence + ko = (sentence) + + ko1 = str(ko).split("→") + import pandas as pd + + dfg = [] + try: + s = ko1[1].replace("", "").replace("", "").replace("/", ":") + except IndexError: + os.remove(found) + return 'Invalid image' + dfg.append(s) + df = pd.DataFrame(dfg) + df = df[0] + + df.to_csv("df.csv", index=False) + + df1 = pd.read_csv("df.csv") + ve = df1["0"].str.split(",") + fgf = ve.to_list() + dfgh = pd.DataFrame(fgf[0]) + maindf = dfgh[0] # .str.split(":") + # maindf.to_csv("main.csv") + + main1 = maindf.to_list() + main1 + # cv=pd.DataFrame(ve) + # cv + per = ["PER"] + org = ["ORG"] + loc = ["LOC"] + organizations = [i for i in main1 for j in org if j in i] + PErsons = [i for i in main1 for j in per if j in i] + location = [i for i in main1 for j in loc if j in i] + + # ************************************* ORGANIZATION ******************************************************************** + try: + if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', + '').replace( + ']', '').replace( + '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4: + pass + # company_url() + else: + + match = str(urlfinal[0]).lower() + match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace( + 'https', + '').replace( + 'http', '').replace(":", "").replace("/", "").upper() + print(match) + + s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace( + '.com', '') + " /" + \ + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '') + s1 = s1g.upper() + s2 = match.upper() + from difflib import SequenceMatcher + print(s1) + print(s2) + print(SequenceMatcher(None, s1, s2).ratio()) + if SequenceMatcher(None, s1, s2).ratio() >= 0.10: + # and SequenceMatcher(None, s1, s2).ratio()<0.50: + final.append( + "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', + '').replace( + '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"', + '').replace( + '.com', '').replace(']', '')) + else: + final.append("OrganizationName--" + s2) + + + + except IndexError: + try: + if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']', + '').replace( + '"', + '').replace( + '.com', ''))) < 4: + pass + # company_url() + else: + + match = str(urlfinal[0]).lower() + match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace( + 'https', '').replace('http', '').replace(":", "").replace("/", "").upper() + + s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', + '').replace( + '.com', '') + s1 = s1g.upper() + s2 = match.upper() + from difflib import SequenceMatcher + print(s1) + print(s2) + print(SequenceMatcher(None, s1, s2).ratio()) + if SequenceMatcher(None, s1, s2).ratio() >= 0.10: + # and SequenceMatcher(None, s1, s2).ratio()<0.50: + final.append( + "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', + '').replace( + ']', '').replace( + '.com', '').replace(']', '')) + else: + final.append("OrganizationName--" + s2) + + except IndexError: + org_name() + organisation() + + # final.append("OrganizationName--") + + # ************************************* CONTACT PERSON ******************************************************************* + try: + final.append( + "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]", + "") + + PErsons[ + 1].replace(":PER", "").replace('"', '')) + except IndexError: + try: + final.append( + "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace( + '"', + '')) + except IndexError: + org_name() + contactpersonname() + # final.append("CONTACTPERSONNAME--") + ###############address flair##################### + + try: + print( + '############################################################# address new code #############################################################') + loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat'] + loclst = [i for i in loactionlst if i in htext.lower()] + + textaddress = htext + textaddress = textaddress.replace("|", ",") + textaddress = textaddress.lower() + + nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple") + grop = nlp(textaddress) + + citycountry = [] + print('########################### city or country name ###########################') + d = grop[-1] + + if d['entity_group'] == "COUNTRY": + print(d["word"]) + citycountry.append(d["word"]) + elif d['entity_group'] == "CITY": + print(d["word"]) + citycountry.append(d["word"]) + + try: + address1 = loclst[0] + except IndexError: + address1 = (textaddress.partition(",")[0]) + words = address1.split() + address1 = words[-1] + + star_location = address1.lower() + end_location = citycountry[0].replace("#", "") + start = star_location + end = end_location + s = textaddress.lower() + middle_address = (s.split(start))[-1].split(end)[0] + Address = start + middle_address + end + Address = Address.replace('--', '').title() + print(Address) + if Address.count(',') < 2: + splitaddress() + else: + final.append('ADDRESS--' + Address) + + # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '') + # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '') + # d1 = star_location.split() + # d2 = end_location.split() + # d3 = d1[0] + # d4 = d2[0] + # start = d3 + # end = d4 + # s = horizontaltext + # middle_address = ((s.split(start))[1].split(end)[0]) + # Address = d3 + middle_address + d4 + # final.append('ADDRESS--' + Address) + # addrespinlst.append(Address) + + + except IndexError: + splitaddress() + + ########################################## Designation ########################################### + import re + new = [] + with open('test.txt', 'r') as f: + flag = False + for line in f: + line1 = line + line = line.upper() + matches = re.findall( + r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''', + line) + for match in matches: + line = line.replace('-', '') + # print(line) + o = "Designation--" + line + new.append(o) + remove_list.append(str(line1).replace('\n', '')) + + try: + a = new[0].replace('\n', '') + final.append(a) + + except IndexError: + final.append("Designation--") + + ###################################################Phone number################################################# + num = [] + import phonenumbers + + # print(verticaltext) + numbers = phonenumbers.PhoneNumberMatcher( + verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN") + + for number in numbers: + number = str(number).split(")") + num.append(number[1]) + # num.append(number[-1]) + if len(num) == 0: + final.append("ContactNumber--") + final.append("OrganizationNumber--") + elif len(num) > 1: + final.append("ContactNumber--" + num[0].replace(' ', '')) + final.append("OrganizationNumber--" + num[-1].replace(' ', '')) + elif len(num) == 1: + try: + final.append("ContactNumber--" + num[0].replace(' ', '')) + final.append("OrganizationNumber--") + except IndexError: + final.append("ContactNumber--") + final.append("OrganizationNumber--") + print( + '############################################################# num #############################################################') + print(num) + # try: + # final.append("PhoneNumber--" + num[0].replace(' ', '')) + # remove_list.append(num[0]) + # except IndexError: + # pass + # try: + # final.append("PhoneNumber1--" + num[1].replace(' ', '')) + # remove_list.append(num[1]) + # except IndexError: + # pass + # try: + # final.append("PhoneNumber2--" + num[2].replace(' ', '')) + # remove_list.append(num[2]) + # except IndexError: + # pass + + ################################################### Email###################################################### + import re + from email_scraper import scrape_emails + s = list(scrape_emails(horizontaltext)) + email_id = s + + # email_id = [] + # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext) + # for match in matches: + # email_id.append(match) + + # # final.append('Email--' + match) + # email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "") + # # final.append(email_) + + # # final.append('Email--' + email_) + # # remove_list.append(email_) + if len(email_id) > 1: + final.append( + 'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'", + "")) + final.append( + 'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace( + "'", + "")) + else: + try: + final.append( + 'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace( + "'", + "")) + final.append('OrganizationEmail--') + except IndexError: + final.append('ContactEmail--') + final.append('OrganizationEmail--') + + ###############PINCODE############ + + pinlst = [] + print(addrespinlst) + import pgeocode + + # try: + # matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3} \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0]) + # for i in matche1: + # address3 = i.replace(' ', '').replace('-', '') + # pinlst.append(address3) + # except IndexError: + + lst = [] + for i in num: + i = i[1:] + lst.append(i) + + infile = r"vtext.txt" + outfile = r"cleaned_file.txt" + import glob + delete_list = lst + # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director - Sales & Business Development'] + fin = open(infile, "r+") + fout = open(outfile, "w+") + for line12 in fin: + for word in delete_list: + line12 = line12.replace(word, "") + + fout.write(line12) + fin.close() + # print(line) + + # print(addrespinlst) + import pgeocode + print(line12) + import re + matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3} \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12) + for i in matche1: + address3 = i.replace(' ', '').replace('-', '') + pinlst.append(address3) + + nomi = pgeocode.Nominatim('IN') + try: + a = nomi.query_postal_code(str(pinlst[-1])) + # print(a) + b = a.keys() + c = b.values.tolist() + d = a.tolist() + postal_code = "PinCode1" + "--" + d[0] + final.append(postal_code) + country_code = c[1] + "--" + str(d[1]) + final.append(country_code) + place_name = 'LandMark1' + "--" + str(d[2]) + final.append(place_name) + state_name = c[3] + "--" + str(d[3]) + final.append(state_name) + state_code = c[4] + "--" + str(d[4]) + final.append(state_code) + county_name = 'CityName1' + "--" + str(d[5]) + final.append(county_name) + + except (IndexError, NameError): + final.append("PinCode1--") + final.append("country_code--") + final.append("LandMark1--") + final.append("state_name--") + final.append("state_code--") + final.append("CityName1--") + + ######################################################## json ##################################################################### + + import pandas as pd + df = pd.DataFrame(final) + df1 = df[0].str.split('--', expand=True) + # print(df1) + df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True) + df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True) + df1['Keys'] = df1['Keys'].str.strip() + df1.to_csv('path123.csv', index=False) + df2 = pd.read_csv('path123.csv') + print(df2) + df2 = df2.T + df2.to_csv('path1.csv', index=False, header=False) + df1 = pd.read_csv('path1.csv') + df1.to_json('firstjson1.json', orient="index") + import json + with open('firstjson1.json', 'r') as json_file: + json_load = json.load(json_file) + # # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create" + nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}') + # # print('--------------------------------------------------------------------------') + # # print(nothing) + empty = [] + import base64 + name = found + image = open(name, 'rb') + image_read = image.read() + image_64_encode = base64.b64encode(image_read) + NULL = 'null' + empty.append("ByteData--" + (NULL).strip('""')) + image_64_encode = image_64_encode.decode('utf-8') + empty.append("FileData--" + str(image_64_encode)) + imagedata = name.split("/") + imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "") + imagename1 = str(imagename).split('.') + imagename = str(imagename1[-2]).replace("[", "]") + empty.append("FileName--" + imagename) + empty.append("FilePath--"+ "") + imageExtension = str(imagename1[-1]).replace("[", "]") + empty.append("FileType--" + imageExtension) + image.close() + import pandas as pd + df = pd.DataFrame(empty) + df = df[0].str.split("--", expand=True) + data1 = pd.DataFrame(df[0]) + data2 = pd.DataFrame(df[1]) + dt = data2.set_index(data1[0]) + dt4 = dt.T + dictionary = dt4.to_dict(orient="index") + list1 = [] + # list.append(a) + list1.append(dictionary[1]) + # # final.append("image--"+str(dictionary[1]).replace("\'",'"')) + print('--------------------') + # print(namelist) + import json + # JSON data: + x = nothing + # python object to be appended + y = {"image": dictionary[1]} + # parsing JSON string: + z = json.loads(x) + # appending the data + z.update(y) + # the result is a JSON string: + # print(json.dumps(z)) + zlist=[] + zlist.append(z) + #############################################creating csv##################################### + print(final) + print(imagelist) + final.append('image--' + str(imagelist)) + import requests + import json + url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev + # url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" #testing + # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test + # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create' + # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create' # C01 + payload1 = json.dumps(zlist) + # print('--------------------------------------------------------------------------') + #print(payload1) + headers = { + #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a', #dev + 'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing + # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1', + # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f', # c01 + # 'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo + 'Content-Type': 'application/json' + } + response = requests.request("POST", url, headers=headers, data=payload1) + # print("##############################################################") + + #print(payload1) + print(response.text) + import os + if 'BusinessCards Created Successfully' in response.text: + print('present') + os.remove(found) + else: + print('not present') + + df1.to_json('visitingcard.json') + data = df1.to_json('visiting.json', orient='records') + print(data) + + #return render_template('index.html') + + + return response.text + # return 'done' + + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=1112) \ No newline at end of file