from flask import Flask, render_template, request, redirect, Response, send_file
import os
import requests
import pandas as pd
import pgeocode
from email_scraper import scrape_emails
import phonenumbers
from pdfminer.high_level import extract_text
import pytesseract
import time
import multiprocessing
from PIL import Image
from functools import partial
from urlextract import URLExtract
import pytesseract as tess
from PIL import Image
# from doctr.io import DocumentFile
# from doctr.models import ocr_predictor
# model = ocr_predictor(pretrained=True)
# load tagger

# import spacy

# nlp_model1 = spacy.load('./ADD300_new3.0')
from flair.data import Sentence
from flair.models import SequenceTagger
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

tokenizer = AutoTokenizer.from_pretrained("ml6team/bert-base-uncased-city-country-ner")
model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncased-city-country-ner")

from paddleocr import PaddleOCR, draw_ocr

ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=True)
tagger = SequenceTagger.load("flair/ner-english-large")

import datetime

app = Flask(__name__)


# app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"


@app.route('/', methods=['GET'])
def resume():
    return render_template('index.html')


#@app.route('/upload_BusinessCards', methods=["POST"])
def predict(Dataset):
    print('################## single card detection #######################')
    starttime = datetime.datetime.now()
    print('Execution Started at:', starttime)

    # print(Dataset)
    import os

    # if request.method == "POST":

    #     if request.files:

    #         image = request.files["image"]
    #         try:
    #             image.save(os.path.join(
    #                 app.config["IMAGE_UPLOADS"], image.filename))
    #         except IsADirectoryError:
    #             return render_template('card.html')
    #         # image.save(os.path.join(
    #         #     app.config["IMAGE_UPLOADS"], image.filename))

    #         print("Image saved")

    #         return redirect(request.url)
    #url_list = request.get_json()
    # print(Dataset)
    # print(url_list)
        #dataset = request.get_json()
    # print(data)
   # data = {'visiting': Dataset}
    a=Dataset[0]
    #a = url_list
    # print(a)
    x = a['FileData']
    # print(x)
    y = a['FileName']
    z = a['FileType']
    # CreatedBy=a['CreatedBy']

    name = y + '.' + z
    # print(name)
    # print(y)
    # image = y.split("/")
    # filename=image[-1]

    # print(x)
    img_data = x.encode()

    import base64
    with open('./upload/' + name, "wb") as fh:
        fh.write(base64.decodebytes(img_data))
    import re
    import csv
    import glob
    import os
    # import pytesseract
    # import cv2
    import numpy as np

    import glob
    import os
    import cv2
    import requests

    final = []
    # final.append('assignto--'+CreatedBy)
    imagelist = []

    # print(found)

    remove_list = []
    import os
    import glob
    import pdfminer

    # import os
    # ts = 0
    # for file_name in glob.glob('./upload/*'):
    #     fts = os.path.getmtime(file_name)
    #     if fts > ts:
    #         ts = fts
    #         found = file_name
    found = './upload/' + name
    print(found)
    extension = found.split('.')[-1]

    # print(extension)

    def org_name():
        print('org_name is working')
        import pytesseract
        fname = found
        if extension != 'pdf':

            img = cv2.imread(fname)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            cv2.imwrite(str(found), img)
            from PIL import Image
            im = Image.open(found)
            im.save("images1.png", dpi=(1200, 1200))
            # import pytesseract
            fname = "images1.png"
            import pytesseract as tess
            from PIL import Image 

            tess.pytesseract.tesseract_cmd=r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
            pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf")
            with open("demo.pdf","w+b",) as f:
                f.write(pdf)

            from pdfminer.high_level import extract_text
            text = extract_text('demo.pdf')

            # doc = DocumentFile.from_images(found)
            # result = model(doc)
            # text = result.render()

            # from pdfminer.high_level import extract_text
            # txt  = extract_text('demo.pdf')
        else:
            from pdfminer.high_level import extract_text
            text = extract_text(fname)

        sentence = Sentence(text)

        # predict NER tags
        tagger.predict(sentence)

        # print sentence
        ko = (sentence)

        ko1 = str(ko).split("→")
        import pandas as pd

        dfg = []
        try:
            s = ko1[1].replace("", "").replace("", "").replace("/", ":")

            # os.remove(found)
            # return 'Invalid image'
            dfg.append(s)
            df = pd.DataFrame(dfg)
            df = df[0]

            df.to_csv("df.csv", index=False)

            df1 = pd.read_csv("df.csv")
            ve = df1["0"].str.split(",")
            fgf = ve.to_list()
            dfgh = pd.DataFrame(fgf[0])
            maindf = dfgh[0]  # .str.split(":")
            # maindf.to_csv("main.csv")

            main1 = maindf.to_list()
            main1
            # cv=pd.DataFrame(ve)
            # cv
            per = ["PER"]
            org = ["ORG"]
            loc = ["LOC"]
            organizations = [i for i in main1 for j in org if j in i]
            PErsons = [i for i in main1 for j in per if j in i]
            location = [i for i in main1 for j in loc if j in i]
        except IndexError:
            pass

            # ************************************* ORGANIZATION ********************************************************************

    def organisation():
        print('organisation working ')
        try:
            if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                         '').replace(
                '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                     '').replace(
                '.com', ''))) < 4:
                pass


            else:

                match = str(urlfinal[0]).lower()
                match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                    'https',
                    '').replace(
                    'http', '').replace(":", "").replace("/", "").upper()
                print(match)

                s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com',
                                                                                                     '') + " /" + \
                      organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
                s1 = s1g.upper()
                s2 = match.upper()
                from difflib import SequenceMatcher
                print(s1)
                print(s2)
                print(SequenceMatcher(None, s1, s2).ratio())
                if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                    # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                    final.append(
                        "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                            '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                               '').replace(
                            '.com',
                            '').replace(']', ''))
                else:
                    final.append("OrganizationName--" + s2)

        except IndexError:
            try:
                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                             '').replace(
                        '"',
                        '').replace(
                    '.com', '').replace('.in', ''))) < 4:
                    pass

                else:
                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                        'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '')
                    s1 = s1g.upper()
                    s2 = match.upper()
                    from difflib import SequenceMatcher
                    print(s1)
                    print(s2)
                    print(SequenceMatcher(None, s1, s2).ratio())
                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                        final.append(
                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                 '').replace(
                                ']', '').replace(
                                '.com', ''))
                    else:
                        final.append("OrganizationName--" + s2)

            except IndexError:
                try:
                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').upper()
                    final.append("OrganizationName--" + match)
                    # remove_list.append(match)
                except IndexError:
                    company()

    #################################################company Name########################################

    def company():
        print('company list working')
        import re

        new = []
        with open('test.txt', 'r+') as f:
            flag = False
            for line in f:
                line = line.upper()
                matches = re.findall(
                    r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''',
                    line)

                for i in matches:
                    if i in line:
                        flag = True
                        if flag:
                            o = "OrganizationName--" + line
                            new.append(o)
        #                       if line.startswith('\n'):
        #                           flag = False
        try:
            a = new[0].replace('\n', '')
            final.append(a)
        except IndexError:
            final.append("OrganizationName--")

    # ************************************* CONTACT PERSON *******************************************************************
    def contactpersonname():
        print('contactpersonname working')
        try:
            final.append(
                "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
                                                                                                                 "") + '/' +
                PErsons[
                    1].replace(":PER", "").replace('"', ''))
        except IndexError:
            try:
                final.append(
                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace(
                        '"', ''))
            except IndexError:
                final.append("CONTACTPERSONNAME--")

    def image_to_text():

        # doc = DocumentFile.from_images(found)
        # result = model(doc)
        # image_to_text.txt = result.render()

        # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
        # img = Image.open(found)
        # text = tess.image_to_string(img)
        # image_to_text.txt = text
        # print(text)
        import cv2
        img_path = found
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(str(found), img)

        result = ocr.ocr(img_path, cls=True)
        result = result[0]

        txts = [line[1][0] for line in result]

        image_to_text.txt = ""
        for i in txts:
            if len(i) < 4:
                continue
                # print(i+"\n")
            image_to_text.txt = image_to_text.txt + str(i) + "\n"
            # print(image_to_text.txt)

    def pdf_to_text():

        from pdfminer.high_level import extract_text
        pdf_to_text.txt = extract_text(found)
        # pdf_to_text.txt= text.replace('\n', ' ')

    extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']

    if extension in extensionlist:
        print('image' + extension)
        image_to_text()
        x = image_to_text.txt

    else:
        print('pdf' + extension)
        pdf_to_text()
        x = pdf_to_text.txt

    verticaltext = x
    htext = x
    # print('------------------------------------------------')
    print(
        '############################################################# this is verticaltext #################################################################')
    print(verticaltext)
    htext = htext.replace('\n', ' ')
    print(
        '############################################################# this is htext #############################################################')
    print(htext)
    y = x.replace('\n', ',')
    y = y.replace('  ', ' ')
    # y = y.replace(".", " .")
    horizontaltext = y
    # print('------------------------------------------------')
    print(
        '############################################################# this is horizontaltext #############################################################')
    print(horizontaltext)

    textfile = open("test123456.txt", "w")
    a = textfile.write(verticaltext)
    textfile.close()
    textfile = open("vtext.txt", "w")
    a = textfile.write(horizontaltext)
    textfile.close()
    with open('test123456.txt', 'r') as f:
        with open('test.txt', 'w') as w:
            for line in f:
                if line.strip().replace('|', ''):
                    w.write(line)

    ###########################ADDRESS##################################
    addrespinlst = []

    def splitaddress():
        import re
        textaddress = htext.replace('\n', ' ')
        # print(textaddress)

        address1 = (textaddress.partition(",")[0])
        words = address1.split()
        address1 = words[-1]
        addre = (htext.partition(",")[2])
        a = addre.replace('\n', ' ').replace('\x0c', '')
        addre = (a.partition(",")[2])
        matches = re.findall(
            r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3}  \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b',
            a)
        for match in matches:
            address2 = match
            address2 = str(address2)
            address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace('  ', '')

        matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
        for address3 in matches:
            pass
        try:
            Address = address1 + "," + address2 + "," + address3
            final.append('ADDRESS--' + Address)
            addrespinlst.append(Address)

        except NameError:

            print(
                '############################################################ Addressmodelworking #############################################################')

            # doc = nlp_model1(textaddress)
            # addlist = []
            # for ent in doc.ents:
            #     name = (f'{ent.label_.upper():{10}}--{ent.text}')
            #     addlist.append(name)
            # try:
            #     Address = addlist[0]
            #     final.append(Address)
            #     addrespinlst.append(Address)
            #     remove_list.append(
            #         str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace(
            #             "ADDRESS--",
            #             ""))
            # except IndexError:
            #     final.append("ADDRESS--")
            pass
    ################################################## website#######################################################

    # import re

    # url = []
    # matches = re.findall(r'www.*', verticaltext)
    # for match in matches:
    #     if (match.count('.')) == 1:
    #         a_string1 = match.replace("www", "www.")

    #         final.append("Urls--" + a_string1)
    #         url.append(a_string1)
    #     else:

    #         final.append("Urls--" + match)

    # if len(url)==0:

    #     from urlextract import URLExtract

    #     extractor = URLExtract()
    #     urls = extractor.find_urls(verticaltext)
    #     try:
    #         urllist = urls[0]
    #         final.append("Urls--"+urllist)
    #         url.append(urllist)
    #     except IndexError:
    #         final.append("Urls--")

    #     for match in matches:
    #         if (match.count('.')) == 1:
    #             a_string1 = match.replace("www", "www.")

    #             final.append("Urls--" + a_string1)
    #             url.append(a_string1)
    #         else:

    #             final.append("Urls--" + match)
    #             url.append(match)
    #             remove_list.append(match)
    # else:
    #     final.append("Urls--" )

    ################################################## website#######################################################

    import re
    # final=[]
    url = []
    urlfinal = []
    matches = re.findall(r'www.*', verticaltext)
    for match in matches:

        if (match.count('.')) == 1:
            a_string1 = match.replace("www", "www.")

            # final.append("Urls--" + a_string1)
            url.append(a_string1)
        else:

            url.append(match)

    if len(url) == 0:

        from urlextract import URLExtract

        extractor = URLExtract()
        urls = extractor.find_urls(verticaltext)
        try:
            urllist = urls[0]
            url.append(urllist)
            url.append(urllist)
        except IndexError:
            pass

        for match in matches:
            if (match.count('.')) == 1:
                a_string1 = match.replace("www", "www.")

                url.append(a_string1)
                # url.append(a_string1)
            else:

                url.append(match)
                url.append(match)

    else:
        pass
    try:
        test_string = url[0]

        test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"]

        res = [ele for ele in test_list if (ele in test_string)]

        if len(res) == 0:
            print('no match')

            final.append('urls--')


        else:
            print('matched')
            final.append('urls--' + url[0])
            urlfinal.append(url[0])


    except IndexError:
        final.append('urls--')

    print(
        '############################################################# url #############################################################')
    print(url)
    #######organisation and contact################

    # def company_url():
    #     # print('--url--')
    #     # print(url)

    #     try:
    #         match = str(url[0]).lower()
    #         match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper()
    #         final.append("OrganizationName--" + match)
    #         # remove_list.append(match)
    #     except IndexError:
    #         org_name()
    #         organisation()
    # final.append("OrganizationName--")

    # make example sentence

    # print(horizontaltext)
    sentence = Sentence(verticaltext)

    # predict NER tags
    tagger.predict(sentence)

    # print sentence
    ko = (sentence)

    ko1 = str(ko).split("→")
    import pandas as pd

    dfg = []
    try:
        s = ko1[1].replace("", "").replace("", "").replace("/", ":")
    except IndexError:
        os.remove(found)
        return 'Invalid image'
    dfg.append(s)
    df = pd.DataFrame(dfg)
    df = df[0]

    df.to_csv("df.csv", index=False)

    df1 = pd.read_csv("df.csv")
    ve = df1["0"].str.split(",")
    fgf = ve.to_list()
    dfgh = pd.DataFrame(fgf[0])
    maindf = dfgh[0]  # .str.split(":")
    # maindf.to_csv("main.csv")

    main1 = maindf.to_list()
    main1
    # cv=pd.DataFrame(ve)
    # cv
    per = ["PER"]
    org = ["ORG"]
    loc = ["LOC"]
    organizations = [i for i in main1 for j in org if j in i]
    PErsons = [i for i in main1 for j in per if j in i]
    location = [i for i in main1 for j in loc if j in i]

    # ************************************* ORGANIZATION ********************************************************************
    try:
        if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(
                ']', '').replace(
                '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4:
            pass
            # company_url()
        else:

            match = str(urlfinal[0]).lower()
            match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace('https',
                                                                                                                '').replace(
                'http', '').replace(":", "").replace("/", "").upper()
            print(match)

            s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace(
                '.com', '') + " /" + \
                  organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
            s1 = s1g.upper()
            s2 = match.upper()
            from difflib import SequenceMatcher
            print(s1)
            print(s2)
            print(SequenceMatcher(None, s1, s2).ratio())
            if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                final.append("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                  '').replace(
                    '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace(
                    '.com', '').replace(']', ''))
            else:
                final.append("OrganizationName--" + s2)


    except IndexError:
        try:
            if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                         '').replace(
                    '"',
                    '').replace(
                '.com', ''))) < 4:
                pass
                # company_url()
            else:

                match = str(urlfinal[0]).lower()
                match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                    'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace(
                    '.com', '')
                s1 = s1g.upper()
                s2 = match.upper()
                from difflib import SequenceMatcher
                print(s1)
                print(s2)
                print(SequenceMatcher(None, s1, s2).ratio())
                if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                    # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                    final.append(
                        "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                            ']', '').replace(
                            '.com', '').replace(']', ''))
                else:
                    final.append("OrganizationName--" + s2)

        except IndexError:
            org_name()
            organisation()

            # final.append("OrganizationName--")

    # ************************************* CONTACT PERSON *******************************************************************
    try:
        final.append(
            "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]", "") +
            PErsons[
                1].replace(":PER", "").replace('"', ''))
    except IndexError:
        try:
            final.append(
                "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace('"',
                                                                                                                 ''))
        except IndexError:
            org_name()
            contactpersonname()
            # final.append("CONTACTPERSONNAME--")
    ###############address flair#####################

    try:
        print(
            '############################################################# address new code #############################################################')
        loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat']
        loclst = [i for i in loactionlst if i in htext.lower()]

        textaddress = htext
        textaddress = textaddress.replace("|", ",")
        textaddress = textaddress.lower()

        nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
        grop = nlp(textaddress)

        citycountry = []
        print('########################### city or country name ###########################')
        d = grop[-1]

        if d['entity_group'] == "COUNTRY":
            print(d["word"])
            citycountry.append(d["word"])
        elif d['entity_group'] == "CITY":
            print(d["word"])
            citycountry.append(d["word"])

        try:
            address1 = loclst[0]
        except IndexError:
            address1 = (textaddress.partition(",")[0])
            words = address1.split()
            address1 = words[-1]

        star_location = address1.lower()
        end_location = citycountry[0].replace("#", "")
        start = star_location
        end = end_location
        s = textaddress.lower()
        middle_address = (s.split(start))[-1].split(end)[0]
        Address = start + middle_address + end
        Address = Address.replace('--', '').title()
        print(Address)
        if Address.count(',') < 2:
            splitaddress()
        else:
            final.append('ADDRESS--' + Address)

        # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '')
        # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '')
        # d1 = star_location.split()
        # d2 = end_location.split()
        # d3 = d1[0]
        # d4 = d2[0]
        # start = d3
        # end = d4
        # s = horizontaltext
        # middle_address = ((s.split(start))[1].split(end)[0])
        # Address = d3 + middle_address + d4
        # final.append('ADDRESS--' + Address)
        # addrespinlst.append(Address)


    except IndexError:
        splitaddress()

    ########################################## Designation ###########################################
    import re
    new = []
    with open('test.txt', 'r') as f:
        flag = False
        for line in f:
            line1 = line
            line = line.upper()
            matches = re.findall(
                r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''',
                line)
            for match in matches:
                line = line.replace('-', '')
                # print(line)
                o = "Designation--" + line
                new.append(o)
                remove_list.append(str(line1).replace('\n', ''))

    try:
        a = new[0].replace('\n', '')
        final.append(a)

    except IndexError:
        final.append("Designation--")

    ###################################################Phone number#################################################
    num = []
    import phonenumbers

    # print(verticaltext)
    numbers = phonenumbers.PhoneNumberMatcher(
        verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN")

    for number in numbers:
        number = str(number).split(")")
        num.append(number[1])
        # num.append(number[-1])
    if len(num) == 0:
        final.append("ContactNumber--")
        final.append("OrganizationNumber--")
    elif len(num) > 1:
        final.append("ContactNumber--" + num[0].replace(' ', ''))
        final.append("OrganizationNumber--" + num[-1].replace(' ', ''))
    elif len(num) == 1:
        try:
            final.append("ContactNumber--" + num[0].replace(' ', ''))
            final.append("OrganizationNumber--")
        except IndexError:
            final.append("ContactNumber--")
            final.append("OrganizationNumber--")
    print(
        '#############################################################  num #############################################################')
    print(num)
    # try:
    #     final.append("PhoneNumber--" + num[0].replace(' ', ''))
    #     remove_list.append(num[0])
    # except IndexError:
    #     pass
    # try:
    #     final.append("PhoneNumber1--" + num[1].replace(' ', ''))
    #     remove_list.append(num[1])
    # except IndexError:
    #     pass
    # try:
    #     final.append("PhoneNumber2--" + num[2].replace(' ', ''))
    #     remove_list.append(num[2])
    # except IndexError:
    #     pass

    ################################################### Email######################################################
    import re
    from email_scraper import scrape_emails
    s = list(scrape_emails(horizontaltext))
    email_id = s

    # email_id = []
    # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext)
    # for match in matches:
    #     email_id.append(match)

    #     # final.append('Email--' + match)
    #     email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "")
    #     # final.append(email_)

    #     # final.append('Email--' + email_)
    #     # remove_list.append(email_)
    if len(email_id) > 1:
        final.append(
            'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'", ""))
        final.append(
            'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
                                                                                                                   ""))
    else:
        try:
            final.append(
                'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
                                                                                                                 ""))
            final.append('OrganizationEmail--')
        except IndexError:
            final.append('ContactEmail--')
            final.append('OrganizationEmail--')

    ###############PINCODE############

    pinlst = []
    print(addrespinlst)
    import pgeocode

    # try:
    #     matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0])
    #     for i in matche1:
    #         address3 = i.replace(' ', '').replace('-', '')
    #         pinlst.append(address3)
    # except IndexError:

    lst = []
    for i in num:
        i = i[1:]
        lst.append(i)

    infile = r"vtext.txt"
    outfile = r"cleaned_file.txt"
    import glob
    delete_list = lst
    # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director -  Sales  &  Business  Development']
    fin = open(infile, "r+")
    fout = open(outfile, "w+")
    for line12 in fin:
        for word in delete_list:
            line12 = line12.replace(word, "")

        fout.write(line12)
    fin.close()
    # print(line)

    # print(addrespinlst)
    import pgeocode
    print(line12)
    import re
    matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
    for i in matche1:
        address3 = i.replace(' ', '').replace('-', '')
        pinlst.append(address3)

    nomi = pgeocode.Nominatim('IN')
    try:
        a = nomi.query_postal_code(str(pinlst[-1]))
        # print(a)
        b = a.keys()
        c = b.values.tolist()
        d = a.tolist()
        postal_code = "PinCode1" + "--" + d[0]
        final.append(postal_code)
        country_code = c[1] + "--" + str(d[1])
        final.append(country_code)
        place_name = 'LandMark1' + "--" + str(d[2])
        final.append(place_name)
        state_name = c[3] + "--" + str(d[3])
        final.append(state_name)
        state_code = c[4] + "--" + str(d[4])
        final.append(state_code)
        county_name = 'CityName1' + "--" + str(d[5])
        final.append(county_name)

    except (IndexError, NameError):
        final.append("PinCode1--")
        final.append("country_code--")
        final.append("LandMark1--")
        final.append("state_name--")
        final.append("state_code--")
        final.append("CityName1--")

    ########################################################   json     #####################################################################
    import pandas as pd

    df = pd.DataFrame(final)

    df1 = df[0].str.split('--', expand=True)
    # print(df1)
    df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True)
    df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True)
    df1['Keys']=df1['Keys'].str.strip()
    df1.to_csv('path12.csv', index=False)

    df2 = pd.read_csv('path12.csv')
    print(final)
    print(df2)
    df2 = df2.T

    df2.to_csv('path.csv', index=False, header=False)
    df1 = pd.read_csv('path.csv')
    df1.to_json('firstjson.json', orient="index")

    import json

    with open('firstjson.json', 'r') as json_file:
        json_load = json.load(json_file)

        # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"

    nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
    # print('--------------------------------------------------------------------------')
    # print(nothing)

    empty = []
    import base64

    name = found
    image = open(name, 'rb')
    image_read = image.read()
    image_64_encode = base64.b64encode(image_read)
    NULL = 'null'
    empty.append("ByteData--" + (NULL).strip('""'))
    image_64_encode = image_64_encode.decode('utf-8')
    empty.append("FileData--" + str(image_64_encode))
    imagedata = name.split("/")
    imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
    imagename1 = str(imagename).split('.')

    imagename = str(imagename1[-2]).replace("[", "]")
    empty.append("FileName--" + imagename)
    empty.append("FilePath--" + found)
    imageExtension = str(imagename1[-1]).replace("[", "]")
    empty.append("FileType--" + imageExtension)
    image.close()
    import pandas as pd
    df = pd.DataFrame(empty)
    df = df[0].str.split("--", expand=True)
    data1 = pd.DataFrame(df[0])
    data2 = pd.DataFrame(df[1])
    dt = data2.set_index(data1[0])

    dt4 = dt.T

    dictionary = dt4.to_dict(orient="index")
    list1 = []

    # list.append(a)
    list1.append(dictionary[1])
    # # final.append("image--"+str(dictionary[1]).replace("\'",'"'))
    print('--------------------')
    # print(namelist)
    import json

    # JSON data:
    x = nothing

    # python object to be appended
    y = {"image": dictionary[1]}

    # parsing JSON string:
    z = json.loads(x)

    # appending the data
    z.update(y)

    # the result is a JSON string:
    # print(json.dumps(z))
    # print('##########################')
    # #print(z)
    # print('##########################')

    # #############################################creating csv#####################################
    # # print(final)
    # # print(imagelist)
    # final.append('image--' + str(imagelist))
    #
    # import requests
    # import json

    # with open('visitingcard1.json', 'r') as json_file:
    #     json_load = json.load(json_file)
    # url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create"
    url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create"
    # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01

    payload1 = json.dumps(z)
    # print('--------------------------------------------------------------------------')
    # print(payload1)
    headers = {
        'Authorization': 'stat db226c95fae04943aa3e3c03a4381b2a',
        # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
        # 'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demosss
        'Content-Type': 'application/json'
    }
    response = requests.request("POST", url, headers=headers, data=payload1)
    print("##############################################################")

    # print(payload1)
    print(response.text)
    if 'BusinessCards Created Successfully' in response.text:
        print('present')
        os.remove(found) 
    else:
        print('not present')

    # df1.to_json('visitingcard.json')
    # data = df1.to_json('visiting.json', orient='records')
    # print(data)

    # return render_template('index.html')

  
    # print('Time Taken:',total)
    endtime = datetime.datetime.now()
    print('Completed at:', endtime)
    print(starttime)
    print(endtime)
    print('--------------------------')
    
    # z=end-start
    # print('Time Taken:',z)
    # return response.text
    # return 'done'
    return response.text

#@app.route('/upload_BusinessCards', methods=["POST"])
#@app.route('/multiplecards', methods=["POST"])
def multiplecards(Dataset):
    print('################## multiple card detection #######################')
    #print(Dataset)
    #dataset = request.get_json()
    # print(data)
    data = {'visiting': Dataset}
    for i in data['visiting']:
        import time
        #time.sleep(1)
        a = i
        x = a['FileData']
        # print(x)
        y = a['FileName']
        z = a['FileType']
        # CreatedBy=a['CreatedBy']

        name = y + '.' + z
        # print(name)
        # print(y)
        # image = y.split("/")
        # filename=image[-1]

        # print(x)
        img_data = x.encode()

        import base64
        with open('./multicards/' + name, "wb") as fh:
            fh.write(base64.decodebytes(img_data))
        # print(i)

        # import os
        # import glob
        # for i in glob.glob('./multipleupload/*'):

        found = './multicards/' + name
        print(found)
        extension = found.split('.')[-1]

        # for root, dirs, fils in os.glob('./multipleupload'):
        #     for name in files:
        #         foundfile= os.path.join(root, name)
        #         print(foundfile)

        import re
        import csv
        import glob
        import os
        # import pytesseract
        # import cv2
        import numpy as np
        import glob
        import os
        import cv2
        import requests
        final = []
        # final.append('assignto--'+CreatedBy)
        imagelist = []
        # print(found)
        remove_list = []
        import os
        import glob
        import pdfminer

        # import os
        # ts = 0
        # for file_name in glob.glob('./upload/*'):
        #     fts = os.path.getmtime(file_name)
        #     if fts > ts:
        #         ts = fts
        #         found = file_name
        # print(found)


        # print(extension)

        def org_name():
            print('org_name is working')
            import pytesseract
            fname = found
            if extension != 'pdf':

                img = cv2.imread(fname)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                cv2.imwrite(str(found), img)
                from PIL import Image
                im = Image.open(found)
                im.save("images1.png", dpi=(1200, 1200))
                # import pytesseract
                fname = "images1.png"
                import pytesseract as tess
                from PIL import Image 

                tess.pytesseract.tesseract_cmd=r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
                pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf")
                with open("demo.pdf","w+b",) as f:
                    f.write(pdf)

                from pdfminer.high_level import extract_text
                text = extract_text('demo.pdf')
                # doc = DocumentFile.from_images(found)
                # result = model(doc)
                # text = result.render()

                # from pdfminer.high_level import extract_text
                # txt  = extract_text('demo.pdf')
            else:
                from pdfminer.high_level import extract_text
                text = extract_text(fname)

            sentence = Sentence(text)

            # predict NER tags
            tagger.predict(sentence)

            # print sentence
            ko = (sentence)

            ko1 = str(ko).split("→")
            import pandas as pd

            dfg = []
            try:
                s = ko1[1].replace("", "").replace("", "").replace("/", ":")

                # os.remove(found)
                # return 'Invalid image'
                dfg.append(s)
                df = pd.DataFrame(dfg)
                df = df[0]

                df.to_csv("df.csv", index=False)

                df1 = pd.read_csv("df.csv")
                ve = df1["0"].str.split(",")
                fgf = ve.to_list()
                dfgh = pd.DataFrame(fgf[0])
                maindf = dfgh[0]  # .str.split(":")
                # maindf.to_csv("main.csv")

                main1 = maindf.to_list()
                main1
                # cv=pd.DataFrame(ve)
                # cv
                per = ["PER"]
                org = ["ORG"]
                loc = ["LOC"]
                organizations = [i for i in main1 for j in org if j in i]
                PErsons = [i for i in main1 for j in per if j in i]
                location = [i for i in main1 for j in loc if j in i]
            except IndexError:
                pass

                # ************************************* ORGANIZATION ********************************************************************

        def organisation():
            print('organisation working ')
            try:
                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                    '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                         '').replace(
                    '.com', ''))) < 4:
                    pass


                else:

                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                        'https',
                        '').replace(
                        'http', '').replace(":", "").replace("/", "").upper()
                    print(match)

                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com',
                                                                                                         '') + " /" + \
                          organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
                    s1 = s1g.upper()
                    s2 = match.upper()
                    from difflib import SequenceMatcher
                    print(s1)
                    print(s2)
                    print(SequenceMatcher(None, s1, s2).ratio())
                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                        final.append(
                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                 '').replace(
                                '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                                   '').replace(
                                '.com',
                                '').replace(']', ''))
                    else:
                        final.append("OrganizationName--" + s2)

            except IndexError:
                try:
                    if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                                 '').replace(
                        '"',
                        '').replace(
                        '.com', '').replace('.in', ''))) < 4:
                        pass

                    else:
                        match = str(urlfinal[0]).lower()
                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
                                                                                                         '').replace(
                            'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                        s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '')
                        s1 = s1g.upper()
                        s2 = match.upper()
                        from difflib import SequenceMatcher
                        print(s1)
                        print(s2)
                        print(SequenceMatcher(None, s1, s2).ratio())
                        if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                            # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                            final.append(
                                "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace(
                                    '[',
                                    '').replace(
                                    ']', '').replace(
                                    '.com', ''))
                        else:
                            final.append("OrganizationName--" + s2)

                except IndexError:
                    try:
                        match = str(urlfinal[0]).lower()
                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
                                                                                                         '').upper()
                        final.append("OrganizationName--" + match)
                        # remove_list.append(match)
                    except IndexError:
                        company()

        #################################################company Name########################################

        def company():
            print('company list working')
            import re

            new = []
            with open('test.txt', 'r+') as f:
                flag = False
                for line in f:
                    line = line.upper()
                    matches = re.findall(
                        r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''',
                        line)

                    for i in matches:
                        if i in line:
                            flag = True
                            if flag:
                                o = "OrganizationName--" + line
                                new.append(o)
            #                       if line.startswith('\n'):
            #                           flag = False
            try:
                a = new[0].replace('\n', '')
                final.append(a)
            except IndexError:
                final.append("OrganizationName--")

        # ************************************* CONTACT PERSON *******************************************************************
        def contactpersonname():
            print('contactpersonname working')
            try:
                final.append(
                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace(
                        "]",
                        "") + '/' +
                    PErsons[
                        1].replace(":PER", "").replace('"', ''))
            except IndexError:
                try:
                    final.append(
                        "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]",
                                                                                                        "").replace(
                            '"', ''))
                except IndexError:
                    final.append("CONTACTPERSONNAME--")

        def image_to_text():

            # doc = DocumentFile.from_images(found)
            # result = model(doc)
            # image_to_text.txt = result.render()

            # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
            # img = Image.open(found)
            # text = tess.image_to_string(img)
            # image_to_text.txt = text
            # print(text)
            import cv2
            img_path = found
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(str(found), img)

            result = ocr.ocr(img_path, cls=True)
            result = result[0]

            txts = [line[1][0] for line in result]

            image_to_text.txt = ""
            for i in txts:
                if len(i) < 4:
                    continue
                    # print(i+"\n")
                image_to_text.txt = image_to_text.txt + str(i) + "\n"
                # print(image_to_text.txt)

        def pdf_to_text():

            from pdfminer.high_level import extract_text
            pdf_to_text.txt = extract_text(found)
            # pdf_to_text.txt= text.replace('\n', ' ')

        extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']

        if extension in extensionlist:
            print('image' + extension)
            image_to_text()
            x = image_to_text.txt

        else:
            print('pdf' + extension)
            pdf_to_text()
            x = pdf_to_text.txt

        verticaltext = x
        htext = x
        # print('------------------------------------------------')
        print(
            '############################################################# this is verticaltext #################################################################')
        print(verticaltext)
        htext = htext.replace('\n', ' ')
        print(
            '############################################################# this is htext #############################################################')
        print(htext)
        y = x.replace('\n', ',')
        y = y.replace('  ', ' ')
        # y = y.replace(".", " .")
        horizontaltext = y
        # print('------------------------------------------------')
        print(
            '############################################################# this is horizontaltext #############################################################')
        print(horizontaltext)

        textfile = open("test123456.txt", "w")
        a = textfile.write(verticaltext)
        textfile.close()
        textfile = open("vtext.txt", "w")
        a = textfile.write(horizontaltext)
        textfile.close()
        with open('test123456.txt', 'r') as f:
            with open('test.txt', 'w') as w:
                for line in f:
                    if line.strip().replace('|', ''):
                        w.write(line)

        ###########################ADDRESS##################################
        addrespinlst = []

        def splitaddress():
            import re
            textaddress = htext.replace('\n', ' ')
            # print(textaddress)

            address1 = (textaddress.partition(",")[0])
            words = address1.split()
            address1 = words[-1]
            addre = (htext.partition(",")[2])
            a = addre.replace('\n', ' ').replace('\x0c', '')
            addre = (a.partition(",")[2])
            matches = re.findall(
                r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3}  \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b',
                a)
            for match in matches:
                address2 = match
                address2 = str(address2)
                address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace('  ',
                                                                                                                  '')

            matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
            for address3 in matches:
                pass
            try:
                Address = address1 + "," + address2 + "," + address3
                final.append('ADDRESS--' + Address)
                addrespinlst.append(Address)

            except NameError:

                print(
                    '############################################################ Addressmodelworking #############################################################')

                # doc = nlp_model1(textaddress)
                # addlist = []
                # for ent in doc.ents:
                #     name = (f'{ent.label_.upper():{10}}--{ent.text}')
                #     addlist.append(name)
                # try:
                #     Address = addlist[0]
                #     final.append(Address)
                #     addrespinlst.append(Address)
                #     remove_list.append(
                #         str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace(
                #             "ADDRESS--",
                #             ""))
                # except IndexError:
                #     final.append("ADDRESS--")
                pass
        ################################################## website#######################################################

        # import re

        # url = []
        # matches = re.findall(r'www.*', verticaltext)
        # for match in matches:
        #     if (match.count('.')) == 1:
        #         a_string1 = match.replace("www", "www.")

        #         final.append("Urls--" + a_string1)
        #         url.append(a_string1)
        #     else:

        #         final.append("Urls--" + match)

        # if len(url)==0:

        #     from urlextract import URLExtract

        #     extractor = URLExtract()
        #     urls = extractor.find_urls(verticaltext)
        #     try:
        #         urllist = urls[0]
        #         final.append("Urls--"+urllist)
        #         url.append(urllist)
        #     except IndexError:
        #         final.append("Urls--")

        #     for match in matches:
        #         if (match.count('.')) == 1:
        #             a_string1 = match.replace("www", "www.")

        #             final.append("Urls--" + a_string1)
        #             url.append(a_string1)
        #         else:

        #             final.append("Urls--" + match)
        #             url.append(match)
        #             remove_list.append(match)
        # else:
        #     final.append("Urls--" )

        ################################################## website#######################################################

        import re
        # final=[]
        url = []
        urlfinal = []
        matches = re.findall(r'www.*', verticaltext)
        for match in matches:

            if (match.count('.')) == 1:
                a_string1 = match.replace("www", "www.")

                # final.append("Urls--" + a_string1)
                url.append(a_string1)
            else:

                url.append(match)

        if len(url) == 0:

            from urlextract import URLExtract

            extractor = URLExtract()
            urls = extractor.find_urls(verticaltext)
            try:
                urllist = urls[0]
                url.append(urllist)
                url.append(urllist)
            except IndexError:
                pass

            for match in matches:
                if (match.count('.')) == 1:
                    a_string1 = match.replace("www", "www.")

                    url.append(a_string1)
                    # url.append(a_string1)
                else:

                    url.append(match)
                    url.append(match)

        else:
            pass
        try:
            test_string = url[0]

            test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"]

            res = [ele for ele in test_list if (ele in test_string)]

            if len(res) == 0:
                print('no match')

                final.append('urls--')


            else:
                print('matched')
                final.append('urls--' + url[0])
                urlfinal.append(url[0])


        except IndexError:
            final.append('urls--')

        print(
            '############################################################# url #############################################################')
        print(url)
        #######organisation and contact################

        # def company_url():
        #     # print('--url--')
        #     # print(url)

        #     try:
        #         match = str(url[0]).lower()
        #         match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper()
        #         final.append("OrganizationName--" + match)
        #         # remove_list.append(match)
        #     except IndexError:
        #         org_name()
        #         organisation()
        # final.append("OrganizationName--")

        # make example sentence

        # print(horizontaltext)
        sentence = Sentence(verticaltext)

        # predict NER tags
        tagger.predict(sentence)

        # print sentence
        ko = (sentence)

        ko1 = str(ko).split("→")
        import pandas as pd

        dfg = []
        try:
            s = ko1[1].replace("", "").replace("", "").replace("/", ":")
        except IndexError:
            os.remove(found)
            return 'Invalid image'
        dfg.append(s)
        df = pd.DataFrame(dfg)
        df = df[0]

        df.to_csv("df.csv", index=False)

        df1 = pd.read_csv("df.csv")
        ve = df1["0"].str.split(",")
        fgf = ve.to_list()
        dfgh = pd.DataFrame(fgf[0])
        maindf = dfgh[0]  # .str.split(":")
        # maindf.to_csv("main.csv")

        main1 = maindf.to_list()
        main1
        # cv=pd.DataFrame(ve)
        # cv
        per = ["PER"]
        org = ["ORG"]
        loc = ["LOC"]
        organizations = [i for i in main1 for j in org if j in i]
        PErsons = [i for i in main1 for j in per if j in i]
        location = [i for i in main1 for j in loc if j in i]

        # ************************************* ORGANIZATION ********************************************************************
        try:
            if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                         '').replace(
                    ']', '').replace(
                '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4:
                pass
                # company_url()
            else:

                match = str(urlfinal[0]).lower()
                match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                    'https',
                    '').replace(
                    'http', '').replace(":", "").replace("/", "").upper()
                print(match)

                s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace(
                    '.com', '') + " /" + \
                      organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
                s1 = s1g.upper()
                s2 = match.upper()
                from difflib import SequenceMatcher
                print(s1)
                print(s2)
                print(SequenceMatcher(None, s1, s2).ratio())
                if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                    # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                    final.append(
                        "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                            '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                               '').replace(
                            '.com', '').replace(']', ''))
                else:
                    final.append("OrganizationName--" + s2)


        except IndexError:
            try:
                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                             '').replace(
                    '"',
                    '').replace(
                    '.com', ''))) < 4:
                    pass
                    # company_url()
                else:

                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                        'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']',
                                                                                                         '').replace(
                        '.com', '')
                    s1 = s1g.upper()
                    s2 = match.upper()
                    from difflib import SequenceMatcher
                    print(s1)
                    print(s2)
                    print(SequenceMatcher(None, s1, s2).ratio())
                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                        final.append(
                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                 '').replace(
                                ']', '').replace(
                                '.com', '').replace(']', ''))
                    else:
                        final.append("OrganizationName--" + s2)

            except IndexError:
                org_name()
                organisation()

                # final.append("OrganizationName--")

        # ************************************* CONTACT PERSON *******************************************************************
        try:
            final.append(
                "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
                                                                                                                 "") +
                PErsons[
                    1].replace(":PER", "").replace('"', ''))
        except IndexError:
            try:
                final.append(
                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace(
                        '"',
                        ''))
            except IndexError:
                org_name()
                contactpersonname()
                # final.append("CONTACTPERSONNAME--")
        ###############address flair#####################

        try:
            print(
                '############################################################# address new code #############################################################')
            loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat']
            loclst = [i for i in loactionlst if i in htext.lower()]

            textaddress = htext
            textaddress = textaddress.replace("|", ",")
            textaddress = textaddress.lower()

            nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
            grop = nlp(textaddress)

            citycountry = []
            print('########################### city or country name ###########################')
            d = grop[-1]

            if d['entity_group'] == "COUNTRY":
                print(d["word"])
                citycountry.append(d["word"])
            elif d['entity_group'] == "CITY":
                print(d["word"])
                citycountry.append(d["word"])

            try:
                address1 = loclst[0]
            except IndexError:
                address1 = (textaddress.partition(",")[0])
                words = address1.split()
                address1 = words[-1]

            star_location = address1.lower()
            end_location = citycountry[0].replace("#", "")
            start = star_location
            end = end_location
            s = textaddress.lower()
            middle_address = (s.split(start))[-1].split(end)[0]
            Address = start + middle_address + end
            Address = Address.replace('--', '').title()
            print(Address)
            if Address.count(',') < 2:
                splitaddress()
            else:
                final.append('ADDRESS--' + Address)

            # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '')
            # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '')
            # d1 = star_location.split()
            # d2 = end_location.split()
            # d3 = d1[0]
            # d4 = d2[0]
            # start = d3
            # end = d4
            # s = horizontaltext
            # middle_address = ((s.split(start))[1].split(end)[0])
            # Address = d3 + middle_address + d4
            # final.append('ADDRESS--' + Address)
            # addrespinlst.append(Address)


        except IndexError:
            splitaddress()

        ########################################## Designation ###########################################
        import re
        new = []
        with open('test.txt', 'r') as f:
            flag = False
            for line in f:
                line1 = line
                line = line.upper()
                matches = re.findall(
                    r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''',
                    line)
                for match in matches:
                    line = line.replace('-', '')
                    # print(line)
                    o = "Designation--" + line
                    new.append(o)
                    remove_list.append(str(line1).replace('\n', ''))

        try:
            a = new[0].replace('\n', '')
            final.append(a)

        except IndexError:
            final.append("Designation--")

        ###################################################Phone number#################################################
        num = []
        import phonenumbers

        # print(verticaltext)
        numbers = phonenumbers.PhoneNumberMatcher(
            verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN")

        for number in numbers:
            number = str(number).split(")")
            num.append(number[1])
            # num.append(number[-1])
        if len(num) == 0:
            final.append("ContactNumber--")
            final.append("OrganizationNumber--")
        elif len(num) > 1:
            final.append("ContactNumber--" + num[0].replace(' ', ''))
            final.append("OrganizationNumber--" + num[-1].replace(' ', ''))
        elif len(num) == 1:
            try:
                final.append("ContactNumber--" + num[0].replace(' ', ''))
                final.append("OrganizationNumber--")
            except IndexError:
                final.append("ContactNumber--")
                final.append("OrganizationNumber--")
        print(
            '#############################################################  num #############################################################')
        print(num)
        # try:
        #     final.append("PhoneNumber--" + num[0].replace(' ', ''))
        #     remove_list.append(num[0])
        # except IndexError:
        #     pass
        # try:
        #     final.append("PhoneNumber1--" + num[1].replace(' ', ''))
        #     remove_list.append(num[1])
        # except IndexError:
        #     pass
        # try:
        #     final.append("PhoneNumber2--" + num[2].replace(' ', ''))
        #     remove_list.append(num[2])
        # except IndexError:
        #     pass

        ################################################### Email######################################################
        import re
        from email_scraper import scrape_emails
        s = list(scrape_emails(horizontaltext))
        email_id = s

        # email_id = []
        # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext)
        # for match in matches:
        #     email_id.append(match)

        #     # final.append('Email--' + match)
        #     email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "")
        #     # final.append(email_)

        #     # final.append('Email--' + email_)
        #     # remove_list.append(email_)
        if len(email_id) > 1:
            final.append(
                'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
                                                                                                                 ""))
            final.append(
                'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace(
                    "'",
                    ""))
        else:
            try:
                final.append(
                    'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace(
                        "'",
                        ""))
                final.append('OrganizationEmail--')
            except IndexError:
                final.append('ContactEmail--')
                final.append('OrganizationEmail--')

        ###############PINCODE############

        pinlst = []
        print(addrespinlst)
        import pgeocode

        # try:
        #     matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0])
        #     for i in matche1:
        #         address3 = i.replace(' ', '').replace('-', '')
        #         pinlst.append(address3)
        # except IndexError:

        lst = []
        for i in num:
            i = i[1:]
            lst.append(i)

        infile = r"vtext.txt"
        outfile = r"cleaned_file.txt"
        import glob
        delete_list = lst
        # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director -  Sales  &  Business  Development']
        fin = open(infile, "r+")
        fout = open(outfile, "w+")
        for line12 in fin:
            for word in delete_list:
                line12 = line12.replace(word, "")

            fout.write(line12)
        fin.close()
        # print(line)

        # print(addrespinlst)
        import pgeocode
        print(line12)
        import re
        matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
        for i in matche1:
            address3 = i.replace(' ', '').replace('-', '')
            pinlst.append(address3)

        nomi = pgeocode.Nominatim('IN')
        try:
            a = nomi.query_postal_code(str(pinlst[-1]))
            # print(a)
            b = a.keys()
            c = b.values.tolist()
            d = a.tolist()
            postal_code = "PinCode1" + "--" + d[0]
            final.append(postal_code)
            country_code = c[1] + "--" + str(d[1])
            final.append(country_code)
            place_name = 'LandMark1' + "--" + str(d[2])
            final.append(place_name)
            state_name = c[3] + "--" + str(d[3])
            final.append(state_name)
            state_code = c[4] + "--" + str(d[4])
            final.append(state_code)
            county_name = 'CityName1' + "--" + str(d[5])
            final.append(county_name)

        except (IndexError, NameError):
            final.append("PinCode1--")
            final.append("country_code--")
            final.append("LandMark1--")
            final.append("state_name--")
            final.append("state_code--")
            final.append("CityName1--")


        ########################################################   json     #####################################################################

        import pandas as pd
        df = pd.DataFrame(final)
        df1 = df[0].str.split('--', expand=True)
        # print(df1)
        df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True)
        df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True)
        df1['Keys']=df1['Keys'].str.strip()
        df1.to_csv('path123.csv', index=False)
        df2 = pd.read_csv('path123.csv')
        print(df2)
        df2 = df2.T
        df2.to_csv('path1.csv', index=False, header=False)
        df1 = pd.read_csv('path1.csv')
        df1.to_json('firstjson1.json', orient="index")
        import json
        with open('firstjson1.json', 'r') as json_file:
            json_load = json.load(json_file)
        #     # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
        nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
        # # print('--------------------------------------------------------------------------')
        # # print(nothing)
        empty = []
        import base64
        name = found
        image = open(name, 'rb')
        image_read = image.read()
        image_64_encode = base64.b64encode(image_read)
        NULL = 'null'
        empty.append("ByteData--" + (NULL).strip('""'))
        image_64_encode = image_64_encode.decode('utf-8')
        empty.append("FileData--" + str(image_64_encode))
        imagedata = name.split("/")
        imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
        imagename1 = str(imagename).split('.')
        imagename = str(imagename1[-2]).replace("[", "]")
        empty.append("FileName--" + imagename)
        empty.append("FilePath--" + found)
        imageExtension = str(imagename1[-1]).replace("[", "]")
        empty.append("FileType--" + imageExtension)
        image.close()
        import pandas as pd
        df = pd.DataFrame(empty)
        df = df[0].str.split("--", expand=True)
        data1 = pd.DataFrame(df[0])
        data2 = pd.DataFrame(df[1])
        dt = data2.set_index(data1[0])
        dt4 = dt.T
        dictionary = dt4.to_dict(orient="index")
        list1 = []
        # list.append(a)
        list1.append(dictionary[1])
        # # final.append("image--"+str(dictionary[1]).replace("\'",'"'))
        print('--------------------')
        # print(namelist)
        import json
        # JSON data:
        x = nothing
        # python object to be appended
        y = {"image": dictionary[1]}
        # parsing JSON string:
        z = json.loads(x)
        # appending the data
        z.update(y)
        # the result is a JSON string:
        # print(json.dumps(z))

        #############################################creating csv#####################################
        # print(final)
        # print(imagelist)
        # final.append('image--'+str(imagelist))
        # import requests
        # import json
        # # with open('visitingcard1.json', 'r') as json_file:
        # #     json_load = json.load(json_file)
        # url = "https://demo.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create"
        url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create"
        # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01
        payload1 = json.dumps(z)
        # print('--------------------------------------------------------------------------')
        # print(payload1)
        headers = {
            'Authorization': 'stat db226c95fae04943aa3e3c03a4381b2a',
            # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
            # 'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo
            'Content-Type': 'application/json'
        }
        response = requests.request("POST", url, headers=headers, data=payload1)
        # print("##############################################################")

        # #print(payload1)
        print(response.text)
        import os
        if 'BusinessCards Created Successfully' in response.text:
            print('present')
            os.remove(found) 
        else:
            print('not present')

        # df1.to_json('visitingcard.json')
        # data = df1.to_json('visiting.json', orient='records')
        # print(data)

        # return render_template('index.html')
        # files = glob.glob('./upload/*')
        # for f in files:
        #     os.remove(f)

        # print('Time Taken:',total)

   
    return response.text

    # return 'done'


# # return send_file(p,as_attachment=True)
# @app.route('/upload_BusinessCards', methods=["POST"])
# def upload_BusinessCards():
#     if __name__ == "__main__":
#         url_list = []
#         Dataset = request.get_json()
#         print("8888888888888888888888888888888888888888888888888888888888888888888888888888888888")
#         #print(Dataset)
#         # id = "100013660000125"
#         url_list.append(Dataset)
#         # multiprocessing

#         with multiprocessing.Pool(processes=1) as pool:
#             # try:
#             results = pool.map(predict, url_list)
#             # except IndexError:
#             #     return 'Invalid image'
#             # results.clear()
#             # a=results[0]

#         pool.close()

#         return results[0]

@app.route('/upload_BusinessCards', methods=["POST"])
def mainfunction():
    Dataset = request.get_json()
    if len(Dataset)==1:
        # predict(Dataset)
        return predict(Dataset)
    else:
        # multiplecards(Dataset)
        return multiplecards(Dataset)


if __name__ == "__main__":
    app.run(host='0.0.0.0',port=1112)