SadhulaSaiKumar
/
AI


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119
							from flask import Flask, render_template, request, redirect, Response, send_file
import os
# import openai
import requests
import pandas as pd
import pgeocode
from email_scraper import scrape_emails
import phonenumbers
from pdfminer.high_level import extract_text
import pytesseract
import time
import multiprocessing
from PIL import Image
from functools import partial
from urlextract import URLExtract
import pytesseract as tess
from PIL import Image
import os
import glob

from pytesseract import *
import shutil
import cv2
import matplotlib
from werkzeug.utils import secure_filename
import requests
#import spacy
import time
import multiprocessing
from PIL import Image
from functools import partial

import pandas as pd
################################################################
Current_Working_Directory=os.getcwd()
Current_Working_Directory=Current_Working_Directory.replace("\\","/")
# nlp_model1 = spacy.load(Current_Working_Directory + "/Invoice_parser/p")

################################################################
# import spacy

# nlp_model1 = spacy.load('./ADD3001.2')
from flair.data import Sentence
from flair.models import SequenceTagger
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

tokenizer = AutoTokenizer.from_pretrained("ml6team/bert-base-uncased-city-country-ner")
model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncased-city-country-ner")

from paddleocr import PaddleOCR, draw_ocr

ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=True)
tagger = SequenceTagger.load("flair/ner-english-large")

import datetime

app = Flask(__name__)


# app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"

@app.route('/', methods=['GET'])
def card():
    return render_template('card.html')


@app.route('/upload_BusinessCards', methods=["POST"])
# @app.route('/multiplecards', methods=["POST"])
def multiplecards():
    # print('################## multiple card detection #######################')
    # print(Dataset)
    from pathlib import Path
    Path("multicards").mkdir(exist_ok=True)
    datalist=[]
    zlist=[]
    Dataset = request.get_json()
    # print(data)
    #datalist.append(Dataset)
    data = {'visiting': Dataset}
    for i in data['visiting']:
        import time
        # time.sleep(1)
        a = i
        x = a['FileData']
        # print(x)
        y = a['FileName']
        z = a['FileType']
        # CreatedBy=a['CreatedBy']

        name = y + '.' + z
        # print(name)
        # print(y)
        # image = y.split("/")
        # filename=image[-1]

        # print(x)
        img_data = x.encode()

        import base64
        with open('./multicards/' + name, "wb") as fh:
            fh.write(base64.decodebytes(img_data))
        # print(i)

        # import os
        # import glob
        # for i in glob.glob('./multipleupload/*'):

        found = './multicards/' + name
        print(found)
        extension = found.split('.')[-1]

        # for root, dirs, fils in os.glob('./multipleupload'):
        #     for name in files:
        #         foundfile= os.path.join(root, name)
        #         print(foundfile)

        import re
        import csv
        import glob
        import os
        # import pytesseract
        # import cv2
        import numpy as np
        import glob
        import os
        import cv2
        import requests
        final = []
        # final.append('assignto--'+CreatedBy)
        imagelist = []
        # print(found)
        remove_list = []
        import os
        import glob
        import pdfminer

        # import os
        # ts = 0
        # for file_name in glob.glob('./upload/*'):
        #     fts = os.path.getmtime(file_name)
        #     if fts > ts:
        #         ts = fts
        #         found = file_name
        # print(found)

        # print(extension)

        def org_name():
            print('org_name is working')
            import pytesseract
            fname = found
            if extension != 'pdf':

                img = cv2.imread(fname)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                cv2.imwrite(str(found), img)
                from PIL import Image
                im = Image.open(found)
                im.save("images1.png", dpi=(1200, 1200))
                # import pytesseract
                fname = "images1.png"
                import pytesseract as tess
                from PIL import Image

                tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
                pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf")
                with open("demo.pdf", "w+b", ) as f:
                    f.write(pdf)

                from pdfminer.high_level import extract_text
                text = extract_text('demo.pdf')
                # doc = DocumentFile.from_images(found)
                # result = model(doc)
                # text = result.render()

                # from pdfminer.high_level import extract_text
                # txt  = extract_text('demo.pdf')
            else:
                from pdfminer.high_level import extract_text
                text = extract_text(fname)

            sentence = Sentence(text)

            # predict NER tags
            tagger.predict(sentence)

            # print sentence
            ko = (sentence)

            ko1 = str(ko).split("→")
            import pandas as pd

            dfg = []
            try:
                s = ko1[1].replace("", "").replace("", "").replace("/", ":")

                # os.remove(found)
                # return 'Invalid image'
                dfg.append(s)
                df = pd.DataFrame(dfg)
                df = df[0]

                df.to_csv("df.csv", index=False)

                df1 = pd.read_csv("df.csv")
                ve = df1["0"].str.split(",")
                fgf = ve.to_list()
                dfgh = pd.DataFrame(fgf[0])
                maindf = dfgh[0]  # .str.split(":")
                # maindf.to_csv("main.csv")

                main1 = maindf.to_list()
                main1
                # cv=pd.DataFrame(ve)
                # cv
                per = ["PER"]
                org = ["ORG"]
                loc = ["LOC"]
                organizations = [i for i in main1 for j in org if j in i]
                PErsons = [i for i in main1 for j in per if j in i]
                location = [i for i in main1 for j in loc if j in i]
            except IndexError:
                pass

                # ************************************* ORGANIZATION ********************************************************************

        def organisation():
            print('organisation working ')
            try:
                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                    '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                         '').replace(
                    '.com', ''))) < 4:
                    pass


                else:

                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                        'https',
                        '').replace(
                        'http', '').replace(":", "").replace("/", "").upper()
                    print(match)

                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com',
                                                                                                         '') + " /" + \
                          organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
                    s1 = s1g.upper()
                    s2 = match.upper()
                    from difflib import SequenceMatcher
                    print(s1)
                    print(s2)
                    print(SequenceMatcher(None, s1, s2).ratio())
                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                        final.append(
                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                 '').replace(
                                '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                                   '').replace(
                                '.com',
                                '').replace(']', ''))
                    else:
                        final.append("OrganizationName--" + s2)

            except IndexError:
                try:
                    if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                                 '').replace(
                        '"',
                        '').replace(
                        '.com', '').replace('.in', ''))) < 4:
                        pass

                    else:
                        match = str(urlfinal[0]).lower()
                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
                                                                                                         '').replace(
                            'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                        s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '')
                        s1 = s1g.upper()
                        s2 = match.upper()
                        from difflib import SequenceMatcher
                        print(s1)
                        print(s2)
                        print(SequenceMatcher(None, s1, s2).ratio())
                        if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                            # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                            final.append(
                                "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace(
                                    '[',
                                    '').replace(
                                    ']', '').replace(
                                    '.com', ''))
                        else:
                            final.append("OrganizationName--" + s2)

                except IndexError:
                    try:
                        match = str(urlfinal[0]).lower()
                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
                                                                                                         '').upper()
                        final.append("OrganizationName--" + match)
                        # remove_list.append(match)
                    except IndexError:
                        company()

        #################################################company Name########################################

        def company():
            print('company list working')
            import re

            new = []
            with open('test.txt', 'r+') as f:
                flag = False
                for line in f:
                    line = line.upper()
                    matches = re.findall(
                        r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''',
                        line)

                    for i in matches:
                        if i in line:
                            flag = True
                            if flag:
                                o = "OrganizationName--" + line
                                new.append(o)
            #                       if line.startswith('\n'):
            #                           flag = False
            try:
                a = new[0].replace('\n', '')
                final.append(a)
            except IndexError:
                final.append("OrganizationName--")

        # ************************************* CONTACT PERSON *******************************************************************
        def contactpersonname():
            print('contactpersonname working')
            try:
                final.append(
                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace(
                        "]",
                        "") + '/' +
                    PErsons[
                        1].replace(":PER", "").replace('"', ''))
            except IndexError:
                try:
                    final.append(
                        "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]",
                                                                                                        "").replace(
                            '"', ''))
                except IndexError:
                    final.append("CONTACTPERSONNAME--")

        def image_to_text():

            # doc = DocumentFile.from_images(found)
            # result = model(doc)
            # image_to_text.txt = result.render()

            # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
            # img = Image.open(found)
            # text = tess.image_to_string(img)
            # image_to_text.txt = text
            # print(text)
            import cv2
            img_path = found
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(str(found), img)

            result = ocr.ocr(img_path, cls=True)
            result = result[0]

            txts = [line[1][0] for line in result]

            image_to_text.txt = ""
            for i in txts:
                if len(i) < 4:
                    continue
                    # print(i+"\n")
                image_to_text.txt = image_to_text.txt + str(i) + "\n"
                # print(image_to_text.txt)

        def pdf_to_text():

            from pdfminer.high_level import extract_text
            pdf_to_text.txt = extract_text(found)
            # pdf_to_text.txt= text.replace('\n', ' ')

        extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']

        if extension in extensionlist:
            print('image' + extension)
            image_to_text()
            x = image_to_text.txt

        else:
            print('pdf' + extension)
            pdf_to_text()
            x = pdf_to_text.txt

        verticaltext = x
        htext = x
        # print('------------------------------------------------')
        #print('############################################################# this is verticaltext #################################################################')
       # print(verticaltext)
        htext = htext.replace('\n', ' ')
       # print('############################################################# this is htext #############################################################')
        #print(htext)
        y = x.replace('\n', ',')
        y = y.replace('  ', ' ')
        # y = y.replace(".", " .")
        horizontaltext = y
        # print('------------------------------------------------')
        #print('############################################################# this is horizontaltext #############################################################')
        #print(horizontaltext)

        textfile = open("test123456.txt", "w")
        a = textfile.write(verticaltext)
        textfile.close()
        textfile = open("vtext.txt", "w")
        a = textfile.write(horizontaltext)
        textfile.close()
        with open('test123456.txt', 'r') as f:
            with open('test.txt', 'w') as w:
                for line in f:
                    if line.strip().replace('|', ''):
                        w.write(line)

        ###########################ADDRESS##################################
        addrespinlst = []

        def splitaddress():
            import re
            textaddress = htext.replace('\n', ' ')
            # print(textaddress)

            address1 = (textaddress.partition(",")[0])
            words = address1.split()
            address1 = words[-1]
            addre = (htext.partition(",")[2])
            a = addre.replace('\n', ' ').replace('\x0c', '')
            addre = (a.partition(",")[2])
            matches = re.findall(
                r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3}  \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b',
                a)
            for match in matches:
                address2 = match
                address2 = str(address2)
                address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace('  ',
                                                                                                                  '')

            matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
            for address3 in matches:
                pass
            try:
                Address = address1 + "," + address2 + "," + address3
                final.append('ADDRESS--' + Address)
                addrespinlst.append(Address)

            except NameError:

                print(
                    '############################################################ Addressmodelworking #############################################################')

                # doc = nlp_model1(textaddress)
                # addlist = []
                # for ent in doc.ents:
                #     name = (f'{ent.label_.upper():{10}}--{ent.text}')
                #     addlist.append(name)
                # try:
                #     Address = addlist[0]
                #     final.append(Address)
                #     addrespinlst.append(Address)
                #     remove_list.append(
                #         str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace(
                #             "ADDRESS--",
                #             ""))
                # except IndexError:
                #     final.append("ADDRESS--")
                pass

        ################################################## website#######################################################

        # import re

        # url = []
        # matches = re.findall(r'www.*', verticaltext)
        # for match in matches:
        #     if (match.count('.')) == 1:
        #         a_string1 = match.replace("www", "www.")

        #         final.append("Urls--" + a_string1)
        #         url.append(a_string1)
        #     else:

        #         final.append("Urls--" + match)

        # if len(url)==0:

        #     from urlextract import URLExtract

        #     extractor = URLExtract()
        #     urls = extractor.find_urls(verticaltext)
        #     try:
        #         urllist = urls[0]
        #         final.append("Urls--"+urllist)
        #         url.append(urllist)
        #     except IndexError:
        #         final.append("Urls--")

        #     for match in matches:
        #         if (match.count('.')) == 1:
        #             a_string1 = match.replace("www", "www.")

        #             final.append("Urls--" + a_string1)
        #             url.append(a_string1)
        #         else:

        #             final.append("Urls--" + match)
        #             url.append(match)
        #             remove_list.append(match)
        # else:
        #     final.append("Urls--" )

        ################################################## website#######################################################

        import re
        # final=[]
        url = []
        urlfinal = []
        matches = re.findall(r'www.*', verticaltext)
        for match in matches:

            if (match.count('.')) == 1:
                a_string1 = match.replace("www", "www.")

                # final.append("Urls--" + a_string1)
                url.append(a_string1)
            else:

                url.append(match)

        if len(url) == 0:

            from urlextract import URLExtract

            extractor = URLExtract()
            urls = extractor.find_urls(verticaltext)
            try:
                urllist = urls[0]
                url.append(urllist)
                url.append(urllist)
            except IndexError:
                pass

            for match in matches:
                if (match.count('.')) == 1:
                    a_string1 = match.replace("www", "www.")

                    url.append(a_string1)
                    # url.append(a_string1)
                else:

                    url.append(match)
                    url.append(match)

        else:
            pass
        try:
            test_string = url[0]

            test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"]

            res = [ele for ele in test_list if (ele in test_string)]

            if len(res) == 0:
                print('no match')

                final.append('urls--')


            else:
                print('matched')
                final.append('urls--' + url[0])
                urlfinal.append(url[0])


        except IndexError:
            final.append('urls--')

        print(
            '############################################################# url #############################################################')
        print(url)
        #######organisation and contact################

        # def company_url():
        #     # print('--url--')
        #     # print(url)

        #     try:
        #         match = str(url[0]).lower()
        #         match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper()
        #         final.append("OrganizationName--" + match)
        #         # remove_list.append(match)
        #     except IndexError:
        #         org_name()
        #         organisation()
        # final.append("OrganizationName--")

        # make example sentence

        # print(horizontaltext)
        sentence = Sentence(verticaltext)

        # predict NER tags
        tagger.predict(sentence)

        # print sentence
        ko = (sentence)

        ko1 = str(ko).split("→")
        import pandas as pd

        dfg = []
        try:
            s = ko1[1].replace("", "").replace("", "").replace("/", ":")
        except IndexError:
            os.remove(found)
            return 'Invalid image'
        dfg.append(s)
        df = pd.DataFrame(dfg)
        df = df[0]

        df.to_csv("df.csv", index=False)

        df1 = pd.read_csv("df.csv")
        ve = df1["0"].str.split(",")
        fgf = ve.to_list()
        dfgh = pd.DataFrame(fgf[0])
        maindf = dfgh[0]  # .str.split(":")
        # maindf.to_csv("main.csv")

        main1 = maindf.to_list()
        main1
        # cv=pd.DataFrame(ve)
        # cv
        per = ["PER"]
        org = ["ORG"]
        loc = ["LOC"]
        organizations = [i for i in main1 for j in org if j in i]
        PErsons = [i for i in main1 for j in per if j in i]
        location = [i for i in main1 for j in loc if j in i]

        # ************************************* ORGANIZATION ********************************************************************
        try:
            if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                         '').replace(
                ']', '').replace(
                '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4:
                pass
                # company_url()
            else:

                match = str(urlfinal[0]).lower()
                match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                    'https',
                    '').replace(
                    'http', '').replace(":", "").replace("/", "").upper()
                print(match)

                s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace(
                    '.com', '') + " /" + \
                      organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
                s1 = s1g.upper()
                s2 = match.upper()
                from difflib import SequenceMatcher
                print(s1)
                print(s2)
                print(SequenceMatcher(None, s1, s2).ratio())
                if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                    # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                    final.append(
                        "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                            '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                               '').replace(
                            '.com', '').replace(']', ''))
                else:
                    final.append("OrganizationName--" + s2)


        except IndexError:
            try:
                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                             '').replace(
                    '"',
                    '').replace(
                    '.com', ''))) < 4:
                    pass
                    # company_url()
                else:

                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                        'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']',
                                                                                                         '').replace(
                        '.com', '')
                    s1 = s1g.upper()
                    s2 = match.upper()
                    from difflib import SequenceMatcher
                    print(s1)
                    print(s2)
                    print(SequenceMatcher(None, s1, s2).ratio())
                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                        final.append(
                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                 '').replace(
                                ']', '').replace(
                                '.com', '').replace(']', ''))
                    else:
                        final.append("OrganizationName--" + s2)

            except IndexError:
                org_name()
                organisation()

                # final.append("OrganizationName--")

        # ************************************* CONTACT PERSON *******************************************************************
        try:
            final.append(
                "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
                                                                                                                 "") +
                PErsons[
                    1].replace(":PER", "").replace('"', ''))
        except IndexError:
            try:
                final.append(
                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace(
                        '"',
                        ''))
            except IndexError:
                org_name()
                contactpersonname()
                # final.append("CONTACTPERSONNAME--")
        ###############address flair#####################

        try:
            print(
                '############################################################# address new code #############################################################')
            loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat']
            loclst = [i for i in loactionlst if i in htext.lower()]

            textaddress = htext
            textaddress = textaddress.replace("|", ",")
            textaddress = textaddress.lower()

            nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
            grop = nlp(textaddress)

            citycountry = []
            print('########################### city or country name ###########################')
            d = grop[-1]

            if d['entity_group'] == "COUNTRY":
                print(d["word"])
                citycountry.append(d["word"])
            elif d['entity_group'] == "CITY":
                print(d["word"])
                citycountry.append(d["word"])

            try:
                address1 = loclst[0]
            except IndexError:
                address1 = (textaddress.partition(",")[0])
                words = address1.split()
                address1 = words[-1]

            star_location = address1.lower()
            end_location = citycountry[0].replace("#", "")
            start = star_location
            end = end_location
            s = textaddress.lower()
            middle_address = (s.split(start))[-1].split(end)[0]
            Address = start + middle_address + end
            Address = Address.replace('--', '').title()
            print(Address)
            if Address.count(',') < 2:
                splitaddress()
            else:
                final.append('ADDRESS--' + Address)

            # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '')
            # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '')
            # d1 = star_location.split()
            # d2 = end_location.split()
            # d3 = d1[0]
            # d4 = d2[0]
            # start = d3
            # end = d4
            # s = horizontaltext
            # middle_address = ((s.split(start))[1].split(end)[0])
            # Address = d3 + middle_address + d4
            # final.append('ADDRESS--' + Address)
            # addrespinlst.append(Address)


        except IndexError:
            splitaddress()

        ########################################## Designation ###########################################
        import re
        new = []
        with open('test.txt', 'r') as f:
            flag = False
            for line in f:
                line1 = line
                line = line.upper()
                matches = re.findall(
                    r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''',
                    line)
                for match in matches:
                    line = line.replace('-', '')
                    # print(line)
                    o = "Designation--" + line
                    new.append(o)
                    remove_list.append(str(line1).replace('\n', ''))

        try:
            a = new[0].replace('\n', '')
            final.append(a)

        except IndexError:
            final.append("Designation--")

        ###################################################Phone number#################################################
        num = []
        import phonenumbers

        # print(verticaltext)
        numbers = phonenumbers.PhoneNumberMatcher(
            verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN")

        for number in numbers:
            number = str(number).split(")")
            num.append(number[1])
            # num.append(number[-1])
        if len(num) == 0:
            final.append("ContactNumber--")
            final.append("OrganizationNumber--")
        elif len(num) > 1:
            final.append("ContactNumber--" + num[0].replace(' ', ''))
            final.append("OrganizationNumber--" + num[-1].replace(' ', ''))
        elif len(num) == 1:
            try:
                final.append("ContactNumber--" + num[0].replace(' ', ''))
                final.append("OrganizationNumber--")
            except IndexError:
                final.append("ContactNumber--")
                final.append("OrganizationNumber--")
        print(
            '#############################################################  num #############################################################')
        print(num)
        # try:
        #     final.append("PhoneNumber--" + num[0].replace(' ', ''))
        #     remove_list.append(num[0])
        # except IndexError:
        #     pass
        # try:
        #     final.append("PhoneNumber1--" + num[1].replace(' ', ''))
        #     remove_list.append(num[1])
        # except IndexError:
        #     pass
        # try:
        #     final.append("PhoneNumber2--" + num[2].replace(' ', ''))
        #     remove_list.append(num[2])
        # except IndexError:
        #     pass

        ################################################### Email######################################################
        import re
        from email_scraper import scrape_emails
        s = list(scrape_emails(horizontaltext))
        email_id = s

        # email_id = []
        # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext)
        # for match in matches:
        #     email_id.append(match)

        #     # final.append('Email--' + match)
        #     email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "")
        #     # final.append(email_)

        #     # final.append('Email--' + email_)
        #     # remove_list.append(email_)
        if len(email_id) > 1:
            final.append(
                'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
                                                                                                                 ""))
            final.append(
                'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace(
                    "'",
                    ""))
        else:
            try:
                final.append(
                    'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace(
                        "'",
                        ""))
                final.append('OrganizationEmail--')
            except IndexError:
                final.append('ContactEmail--')
                final.append('OrganizationEmail--')

        ###############PINCODE############

        pinlst = []
        print(addrespinlst)
        import pgeocode

        # try:
        #     matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0])
        #     for i in matche1:
        #         address3 = i.replace(' ', '').replace('-', '')
        #         pinlst.append(address3)
        # except IndexError:

        lst = []
        for i in num:
            i = i[1:]
            lst.append(i)

        infile = r"vtext.txt"
        outfile = r"cleaned_file.txt"
        import glob
        delete_list = lst
        # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director -  Sales  &  Business  Development']
        fin = open(infile, "r+")
        fout = open(outfile, "w+")
        for line12 in fin:
            for word in delete_list:
                line12 = line12.replace(word, "")

            fout.write(line12)
        fin.close()
        # print(line)

        # print(addrespinlst)
        import pgeocode
        #print(line12)
        import re
        matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
        for i in matche1:
            address3 = i.replace(' ', '').replace('-', '')
            pinlst.append(address3)

        nomi = pgeocode.Nominatim('IN')
        try:
            a = nomi.query_postal_code(str(pinlst[-1]))
            # print(a)
            b = a.keys()
            c = b.values.tolist()
            d = a.tolist()
            postal_code = "PinCode1" + "--" + d[0]
            final.append(postal_code)
            country_code = c[1] + "--" + str(d[1])
            final.append(country_code)
            place_name = 'LandMark1' + "--" + str(d[2])
            final.append(place_name)
            state_name = c[3] + "--" + str(d[3])
            final.append(state_name)
            state_code = c[4] + "--" + str(d[4])
            final.append(state_code)
            county_name = 'CityName1' + "--" + str(d[5])
            final.append(county_name)

        except (IndexError, NameError):
            final.append("PinCode1--")
            final.append("country_code--")
            final.append("LandMark1--")
            final.append("state_name--")
            final.append("state_code--")
            final.append("CityName1--")

        ########################################################   json     #####################################################################

        import pandas as pd
        df = pd.DataFrame(final)
        df1 = df[0].str.split('--', expand=True)
        # print(df1)
        df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True)
        df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True)
        df1['Keys'] = df1['Keys'].str.strip()
        df1.to_csv('path123.csv', index=False)
        df2 = pd.read_csv('path123.csv')
        print(df2)
        df2 = df2.T
        df2.to_csv('path1.csv', index=False, header=False)
        df1 = pd.read_csv('path1.csv')
        df1.to_json('firstjson1.json', orient="index")
        import json
        with open('firstjson1.json', 'r') as json_file:
            json_load = json.load(json_file)
        #     # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
        nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
        # # print('--------------------------------------------------------------------------')
        # # print(nothing)
        empty = []
        import base64
        name = found
        image = open(name, 'rb')
        image_read = image.read()
        image_64_encode = base64.b64encode(image_read)
        NULL = 'null'
        empty.append("ByteData--" + (NULL).strip('""'))
        image_64_encode = image_64_encode.decode('utf-8')
        empty.append("FileData--" + str(image_64_encode))
        imagedata = name.split("/")
        imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
        imagename1 = str(imagename).split('.')
        imagename = str(imagename1[-2]).replace("[", "]")
        empty.append("FileName--" + imagename)
        empty.append("FilePath--"+ "")
        imageExtension = str(imagename1[-1]).replace("[", "]")
        empty.append("FileType--" + imageExtension)
        image.close()
        import pandas as pd
        df = pd.DataFrame(empty)
        df = df[0].str.split("--", expand=True)
        data1 = pd.DataFrame(df[0])
        data2 = pd.DataFrame(df[1])
        dt = data2.set_index(data1[0])
        dt4 = dt.T
        dictionary = dt4.to_dict(orient="index")
        list1 = []
        # list.append(a)
        list1.append(dictionary[1])
        # # final.append("image--"+str(dictionary[1]).replace("\'",'"'))
        print('--------------------')
        # print(namelist)
        import json
        # JSON data:
        x = nothing
        # python object to be appended
        y = {"image": dictionary[1]}
        # parsing JSON string:
        z = json.loads(x)
        # appending the data
        z.update(y)
        # the result is a JSON string:
        # print(json.dumps(z))
        
        zlist.append(z)
        #############################################creating csv#####################################
        #print(final)
        #print(imagelist)
        #final.append('image--' + str(imagelist))
       #  import requests
       #  import json

       # # url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev
       #  url = "https://qa.bizgaze.com/apis/v4/bizgaze/integrations/businesscards/create/list" #testing
       #  # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test
       #  # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create'
       #  # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01
       #  payload1 = json.dumps(zlist)
       #  # print('--------------------------------------------------------------------------')
       #  #print(payload1)
       #  headers = {
       #      #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a',   #dev
       #      # 'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing
       #      # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1',
       #      # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
       #      #'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo
       #      'Authorization':'Stat e5bc6ad08f2c42feb5f98a2a521d00af',


       #      'Content-Type': 'application/json'
       #  }
       #  response = requests.request("POST", url, headers=headers, data=payload1)
       #  # print("##############################################################")

       #  print(payload1)
       #  #print(zlist)
       #  # import os
       #  # if 'BusinessCards Created Successfully' in response.text:
       #  #     print('present')
       #  #     os.remove(found)
       #  # else:
       #  #     print('not present')

       #  df1.to_json('visitingcard.json')
       #  data = df1.to_json('visiting.json', orient='records')
       #  print(data)

        #return render_template('index.html')
       

    #return response.text
    #return z
    return zlist


if __name__ == "__main__":
    app.run(host='0.0.0.0', port=1112)