From 8b1964e0b61d2be96579b19f76d7138c90248f49 Mon Sep 17 00:00:00 2001
From: SadhulaSaiKumar <sagar96767@gmail.com>
Date: Tue, 2 May 2023 05:32:46 +0000
Subject: [PATCH] Update 'Business_cards/Business_cards.py'

---
 Business_cards/Business_cards.py | 2232 +++++++++++++++---------------
 1 file changed, 1112 insertions(+), 1120 deletions(-)

diff --git a/Business_cards/Business_cards.py b/Business_cards/Business_cards.py
index fad586e..178f35e 100644
--- a/Business_cards/Business_cards.py
+++ b/Business_cards/Business_cards.py
@@ -1,1121 +1,1113 @@
-from flask import Flask, render_template, request, redirect, Response, send_file
-import os
-import openai
-import requests
-import pandas as pd
-import pgeocode
-from email_scraper import scrape_emails
-import phonenumbers
-from pdfminer.high_level import extract_text
-import pytesseract
-import time
-import multiprocessing
-from PIL import Image
-from functools import partial
-from urlextract import URLExtract
-import pytesseract as tess
-from PIL import Image
-# from doctr.io import DocumentFile
-# from doctr.models import ocr_predictor
-# model = ocr_predictor(pretrained=True)
-# load tagger
-######################################################
-import os
-import glob
-
-from pytesseract import *
-import shutil
-import cv2
-import matplotlib
-from werkzeug.utils import secure_filename
-import requests
-import spacy
-import time
-import multiprocessing
-from PIL import Image
-from functools import partial
-nlp_model = spacy.load("D:/projects/C01app/Resume_parser/ME")
-nlp_model1 = spacy.load("D:/projects/C01app/Resume_parser/bdeeducation_50_0.2")
-from flask import Flask, render_template, request, redirect, Response, send_file
-
-import pandas as pd
-################################################################
-Current_Working_Directory=os.getcwd()
-Current_Working_Directory=Current_Working_Directory.replace("\\","/")
-nlp_model1 = spacy.load(Current_Working_Directory + "/Invoice_parser/p")
-
-################################################################
-# import spacy
-
-# nlp_model1 = spacy.load('./ADD3001.2')
-from flair.data import Sentence
-from flair.models import SequenceTagger
-from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
-
-tokenizer = AutoTokenizer.from_pretrained("ml6team/bert-base-uncased-city-country-ner")
-model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncased-city-country-ner")
-
-from paddleocr import PaddleOCR, draw_ocr
-
-ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=True)
-tagger = SequenceTagger.load("flair/ner-english-large")
-
-import datetime
-
-app = Flask(__name__)
-
-
-# app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"
-
-@app.route('/', methods=['GET'])
-def card():
-    return render_template('card.html')
-
-
-
-@app.route('/upload_BusinessCards', methods=["POST"])
-# @app.route('/multiplecards', methods=["POST"])
-def multiplecards():
-    # print('################## multiple card detection #######################')
-    # print(Dataset)
-    datalist=[]
-    Dataset = request.get_json()
-    # print(data)
-    #datalist.append(Dataset)
-    data = {'visiting': Dataset}
-    for i in data['visiting']:
-        import time
-        # time.sleep(1)
-        a = i
-        x = a['FileData']
-        # print(x)
-        y = a['FileName']
-        z = a['FileType']
-        # CreatedBy=a['CreatedBy']
-
-        name = y + '.' + z
-        # print(name)
-        # print(y)
-        # image = y.split("/")
-        # filename=image[-1]
-
-        # print(x)
-        img_data = x.encode()
-
-        import base64
-        with open('./multicards/' + name, "wb") as fh:
-            fh.write(base64.decodebytes(img_data))
-        # print(i)
-
-        # import os
-        # import glob
-        # for i in glob.glob('./multipleupload/*'):
-
-        found = './multicards/' + name
-        print(found)
-        extension = found.split('.')[-1]
-
-        # for root, dirs, fils in os.glob('./multipleupload'):
-        #     for name in files:
-        #         foundfile= os.path.join(root, name)
-        #         print(foundfile)
-
-        import re
-        import csv
-        import glob
-        import os
-        # import pytesseract
-        # import cv2
-        import numpy as np
-        import glob
-        import os
-        import cv2
-        import requests
-        final = []
-        # final.append('assignto--'+CreatedBy)
-        imagelist = []
-        # print(found)
-        remove_list = []
-        import os
-        import glob
-        import pdfminer
-
-        # import os
-        # ts = 0
-        # for file_name in glob.glob('./upload/*'):
-        #     fts = os.path.getmtime(file_name)
-        #     if fts > ts:
-        #         ts = fts
-        #         found = file_name
-        # print(found)
-
-        # print(extension)
-
-        def org_name():
-            print('org_name is working')
-            import pytesseract
-            fname = found
-            if extension != 'pdf':
-
-                img = cv2.imread(fname)
-                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-
-                cv2.imwrite(str(found), img)
-                from PIL import Image
-                im = Image.open(found)
-                im.save("images1.png", dpi=(1200, 1200))
-                # import pytesseract
-                fname = "images1.png"
-                import pytesseract as tess
-                from PIL import Image
-
-                tess.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
-                pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf")
-                with open("demo.pdf", "w+b", ) as f:
-                    f.write(pdf)
-
-                from pdfminer.high_level import extract_text
-                text = extract_text('demo.pdf')
-                # doc = DocumentFile.from_images(found)
-                # result = model(doc)
-                # text = result.render()
-
-                # from pdfminer.high_level import extract_text
-                # txt  = extract_text('demo.pdf')
-            else:
-                from pdfminer.high_level import extract_text
-                text = extract_text(fname)
-
-            sentence = Sentence(text)
-
-            # predict NER tags
-            tagger.predict(sentence)
-
-            # print sentence
-            ko = (sentence)
-
-            ko1 = str(ko).split("→")
-            import pandas as pd
-
-            dfg = []
-            try:
-                s = ko1[1].replace("", "").replace("", "").replace("/", ":")
-
-                # os.remove(found)
-                # return 'Invalid image'
-                dfg.append(s)
-                df = pd.DataFrame(dfg)
-                df = df[0]
-
-                df.to_csv("df.csv", index=False)
-
-                df1 = pd.read_csv("df.csv")
-                ve = df1["0"].str.split(",")
-                fgf = ve.to_list()
-                dfgh = pd.DataFrame(fgf[0])
-                maindf = dfgh[0]  # .str.split(":")
-                # maindf.to_csv("main.csv")
-
-                main1 = maindf.to_list()
-                main1
-                # cv=pd.DataFrame(ve)
-                # cv
-                per = ["PER"]
-                org = ["ORG"]
-                loc = ["LOC"]
-                organizations = [i for i in main1 for j in org if j in i]
-                PErsons = [i for i in main1 for j in per if j in i]
-                location = [i for i in main1 for j in loc if j in i]
-            except IndexError:
-                pass
-
-                # ************************************* ORGANIZATION ********************************************************************
-
-        def organisation():
-            print('organisation working ')
-            try:
-                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
-                                                                                                             '').replace(
-                    '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
-                                                                                                         '').replace(
-                    '.com', ''))) < 4:
-                    pass
-
-
-                else:
-
-                    match = str(urlfinal[0]).lower()
-                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
-                        'https',
-                        '').replace(
-                        'http', '').replace(":", "").replace("/", "").upper()
-                    print(match)
-
-                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com',
-                                                                                                         '') + " /" + \
-                          organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
-                    s1 = s1g.upper()
-                    s2 = match.upper()
-                    from difflib import SequenceMatcher
-                    print(s1)
-                    print(s2)
-                    print(SequenceMatcher(None, s1, s2).ratio())
-                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
-                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
-                        final.append(
-                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
-                                                                                                                 '').replace(
-                                '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
-                                                                                                                   '').replace(
-                                '.com',
-                                '').replace(']', ''))
-                    else:
-                        final.append("OrganizationName--" + s2)
-
-            except IndexError:
-                try:
-                    if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
-                                                                                                                 '').replace(
-                        '"',
-                        '').replace(
-                        '.com', '').replace('.in', ''))) < 4:
-                        pass
-
-                    else:
-                        match = str(urlfinal[0]).lower()
-                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
-                                                                                                         '').replace(
-                            'https', '').replace('http', '').replace(":", "").replace("/", "").upper()
-
-                        s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '')
-                        s1 = s1g.upper()
-                        s2 = match.upper()
-                        from difflib import SequenceMatcher
-                        print(s1)
-                        print(s2)
-                        print(SequenceMatcher(None, s1, s2).ratio())
-                        if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
-                            # and SequenceMatcher(None, s1, s2).ratio()<0.50:
-                            final.append(
-                                "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace(
-                                    '[',
-                                    '').replace(
-                                    ']', '').replace(
-                                    '.com', ''))
-                        else:
-                            final.append("OrganizationName--" + s2)
-
-                except IndexError:
-                    try:
-                        match = str(urlfinal[0]).lower()
-                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
-                                                                                                         '').upper()
-                        final.append("OrganizationName--" + match)
-                        # remove_list.append(match)
-                    except IndexError:
-                        company()
-
-        #################################################company Name########################################
-
-        def company():
-            print('company list working')
-            import re
-
-            new = []
-            with open('test.txt', 'r+') as f:
-                flag = False
-                for line in f:
-                    line = line.upper()
-                    matches = re.findall(
-                        r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''',
-                        line)
-
-                    for i in matches:
-                        if i in line:
-                            flag = True
-                            if flag:
-                                o = "OrganizationName--" + line
-                                new.append(o)
-            #                       if line.startswith('\n'):
-            #                           flag = False
-            try:
-                a = new[0].replace('\n', '')
-                final.append(a)
-            except IndexError:
-                final.append("OrganizationName--")
-
-        # ************************************* CONTACT PERSON *******************************************************************
-        def contactpersonname():
-            print('contactpersonname working')
-            try:
-                final.append(
-                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace(
-                        "]",
-                        "") + '/' +
-                    PErsons[
-                        1].replace(":PER", "").replace('"', ''))
-            except IndexError:
-                try:
-                    final.append(
-                        "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]",
-                                                                                                        "").replace(
-                            '"', ''))
-                except IndexError:
-                    final.append("CONTACTPERSONNAME--")
-
-        def image_to_text():
-
-            # doc = DocumentFile.from_images(found)
-            # result = model(doc)
-            # image_to_text.txt = result.render()
-
-            # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
-            # img = Image.open(found)
-            # text = tess.image_to_string(img)
-            # image_to_text.txt = text
-            # print(text)
-            import cv2
-            img_path = found
-            img = cv2.imread(img_path)
-            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-            cv2.imwrite(str(found), img)
-
-            result = ocr.ocr(img_path, cls=True)
-            result = result[0]
-
-            txts = [line[1][0] for line in result]
-
-            image_to_text.txt = ""
-            for i in txts:
-                if len(i) < 4:
-                    continue
-                    # print(i+"\n")
-                image_to_text.txt = image_to_text.txt + str(i) + "\n"
-                # print(image_to_text.txt)
-
-        def pdf_to_text():
-
-            from pdfminer.high_level import extract_text
-            pdf_to_text.txt = extract_text(found)
-            # pdf_to_text.txt= text.replace('\n', ' ')
-
-        extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']
-
-        if extension in extensionlist:
-            print('image' + extension)
-            image_to_text()
-            x = image_to_text.txt
-
-        else:
-            print('pdf' + extension)
-            pdf_to_text()
-            x = pdf_to_text.txt
-
-        verticaltext = x
-        htext = x
-        # print('------------------------------------------------')
-        print(
-            '############################################################# this is verticaltext #################################################################')
-        print(verticaltext)
-        htext = htext.replace('\n', ' ')
-        print(
-            '############################################################# this is htext #############################################################')
-        print(htext)
-        y = x.replace('\n', ',')
-        y = y.replace('  ', ' ')
-        # y = y.replace(".", " .")
-        horizontaltext = y
-        # print('------------------------------------------------')
-        print(
-            '############################################################# this is horizontaltext #############################################################')
-        print(horizontaltext)
-
-        textfile = open("test123456.txt", "w")
-        a = textfile.write(verticaltext)
-        textfile.close()
-        textfile = open("vtext.txt", "w")
-        a = textfile.write(horizontaltext)
-        textfile.close()
-        with open('test123456.txt', 'r') as f:
-            with open('test.txt', 'w') as w:
-                for line in f:
-                    if line.strip().replace('|', ''):
-                        w.write(line)
-
-        ###########################ADDRESS##################################
-        addrespinlst = []
-
-        def splitaddress():
-            import re
-            textaddress = htext.replace('\n', ' ')
-            # print(textaddress)
-
-            address1 = (textaddress.partition(",")[0])
-            words = address1.split()
-            address1 = words[-1]
-            addre = (htext.partition(",")[2])
-            a = addre.replace('\n', ' ').replace('\x0c', '')
-            addre = (a.partition(",")[2])
-            matches = re.findall(
-                r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3}  \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b',
-                a)
-            for match in matches:
-                address2 = match
-                address2 = str(address2)
-                address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace('  ',
-                                                                                                                  '')
-
-            matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
-            for address3 in matches:
-                pass
-            try:
-                Address = address1 + "," + address2 + "," + address3
-                final.append('ADDRESS--' + Address)
-                addrespinlst.append(Address)
-
-            except NameError:
-
-                print(
-                    '############################################################ Addressmodelworking #############################################################')
-
-                # doc = nlp_model1(textaddress)
-                # addlist = []
-                # for ent in doc.ents:
-                #     name = (f'{ent.label_.upper():{10}}--{ent.text}')
-                #     addlist.append(name)
-                # try:
-                #     Address = addlist[0]
-                #     final.append(Address)
-                #     addrespinlst.append(Address)
-                #     remove_list.append(
-                #         str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace(
-                #             "ADDRESS--",
-                #             ""))
-                # except IndexError:
-                #     final.append("ADDRESS--")
-                pass
-
-        ################################################## website#######################################################
-
-        # import re
-
-        # url = []
-        # matches = re.findall(r'www.*', verticaltext)
-        # for match in matches:
-        #     if (match.count('.')) == 1:
-        #         a_string1 = match.replace("www", "www.")
-
-        #         final.append("Urls--" + a_string1)
-        #         url.append(a_string1)
-        #     else:
-
-        #         final.append("Urls--" + match)
-
-        # if len(url)==0:
-
-        #     from urlextract import URLExtract
-
-        #     extractor = URLExtract()
-        #     urls = extractor.find_urls(verticaltext)
-        #     try:
-        #         urllist = urls[0]
-        #         final.append("Urls--"+urllist)
-        #         url.append(urllist)
-        #     except IndexError:
-        #         final.append("Urls--")
-
-        #     for match in matches:
-        #         if (match.count('.')) == 1:
-        #             a_string1 = match.replace("www", "www.")
-
-        #             final.append("Urls--" + a_string1)
-        #             url.append(a_string1)
-        #         else:
-
-        #             final.append("Urls--" + match)
-        #             url.append(match)
-        #             remove_list.append(match)
-        # else:
-        #     final.append("Urls--" )
-
-        ################################################## website#######################################################
-
-        import re
-        # final=[]
-        url = []
-        urlfinal = []
-        matches = re.findall(r'www.*', verticaltext)
-        for match in matches:
-
-            if (match.count('.')) == 1:
-                a_string1 = match.replace("www", "www.")
-
-                # final.append("Urls--" + a_string1)
-                url.append(a_string1)
-            else:
-
-                url.append(match)
-
-        if len(url) == 0:
-
-            from urlextract import URLExtract
-
-            extractor = URLExtract()
-            urls = extractor.find_urls(verticaltext)
-            try:
-                urllist = urls[0]
-                url.append(urllist)
-                url.append(urllist)
-            except IndexError:
-                pass
-
-            for match in matches:
-                if (match.count('.')) == 1:
-                    a_string1 = match.replace("www", "www.")
-
-                    url.append(a_string1)
-                    # url.append(a_string1)
-                else:
-
-                    url.append(match)
-                    url.append(match)
-
-        else:
-            pass
-        try:
-            test_string = url[0]
-
-            test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"]
-
-            res = [ele for ele in test_list if (ele in test_string)]
-
-            if len(res) == 0:
-                print('no match')
-
-                final.append('urls--')
-
-
-            else:
-                print('matched')
-                final.append('urls--' + url[0])
-                urlfinal.append(url[0])
-
-
-        except IndexError:
-            final.append('urls--')
-
-        print(
-            '############################################################# url #############################################################')
-        print(url)
-        #######organisation and contact################
-
-        # def company_url():
-        #     # print('--url--')
-        #     # print(url)
-
-        #     try:
-        #         match = str(url[0]).lower()
-        #         match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper()
-        #         final.append("OrganizationName--" + match)
-        #         # remove_list.append(match)
-        #     except IndexError:
-        #         org_name()
-        #         organisation()
-        # final.append("OrganizationName--")
-
-        # make example sentence
-
-        # print(horizontaltext)
-        sentence = Sentence(verticaltext)
-
-        # predict NER tags
-        tagger.predict(sentence)
-
-        # print sentence
-        ko = (sentence)
-
-        ko1 = str(ko).split("→")
-        import pandas as pd
-
-        dfg = []
-        try:
-            s = ko1[1].replace("", "").replace("", "").replace("/", ":")
-        except IndexError:
-            os.remove(found)
-            return 'Invalid image'
-        dfg.append(s)
-        df = pd.DataFrame(dfg)
-        df = df[0]
-
-        df.to_csv("df.csv", index=False)
-
-        df1 = pd.read_csv("df.csv")
-        ve = df1["0"].str.split(",")
-        fgf = ve.to_list()
-        dfgh = pd.DataFrame(fgf[0])
-        maindf = dfgh[0]  # .str.split(":")
-        # maindf.to_csv("main.csv")
-
-        main1 = maindf.to_list()
-        main1
-        # cv=pd.DataFrame(ve)
-        # cv
-        per = ["PER"]
-        org = ["ORG"]
-        loc = ["LOC"]
-        organizations = [i for i in main1 for j in org if j in i]
-        PErsons = [i for i in main1 for j in per if j in i]
-        location = [i for i in main1 for j in loc if j in i]
-
-        # ************************************* ORGANIZATION ********************************************************************
-        try:
-            if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
-                                                                                                         '').replace(
-                ']', '').replace(
-                '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4:
-                pass
-                # company_url()
-            else:
-
-                match = str(urlfinal[0]).lower()
-                match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
-                    'https',
-                    '').replace(
-                    'http', '').replace(":", "").replace("/", "").upper()
-                print(match)
-
-                s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace(
-                    '.com', '') + " /" + \
-                      organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
-                s1 = s1g.upper()
-                s2 = match.upper()
-                from difflib import SequenceMatcher
-                print(s1)
-                print(s2)
-                print(SequenceMatcher(None, s1, s2).ratio())
-                if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
-                    # and SequenceMatcher(None, s1, s2).ratio()<0.50:
-                    final.append(
-                        "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
-                                                                                                             '').replace(
-                            '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
-                                                                                                               '').replace(
-                            '.com', '').replace(']', ''))
-                else:
-                    final.append("OrganizationName--" + s2)
-
-
-
-        except IndexError:
-            try:
-                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
-                                                                                                             '').replace(
-                    '"',
-                    '').replace(
-                    '.com', ''))) < 4:
-                    pass
-                    # company_url()
-                else:
-
-                    match = str(urlfinal[0]).lower()
-                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
-                        'https', '').replace('http', '').replace(":", "").replace("/", "").upper()
-
-                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']',
-                                                                                                         '').replace(
-                        '.com', '')
-                    s1 = s1g.upper()
-                    s2 = match.upper()
-                    from difflib import SequenceMatcher
-                    print(s1)
-                    print(s2)
-                    print(SequenceMatcher(None, s1, s2).ratio())
-                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
-                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
-                        final.append(
-                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
-                                                                                                                 '').replace(
-                                ']', '').replace(
-                                '.com', '').replace(']', ''))
-                    else:
-                        final.append("OrganizationName--" + s2)
-
-            except IndexError:
-                org_name()
-                organisation()
-
-                # final.append("OrganizationName--")
-
-        # ************************************* CONTACT PERSON *******************************************************************
-        try:
-            final.append(
-                "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
-                                                                                                                 "") +
-                PErsons[
-                    1].replace(":PER", "").replace('"', ''))
-        except IndexError:
-            try:
-                final.append(
-                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace(
-                        '"',
-                        ''))
-            except IndexError:
-                org_name()
-                contactpersonname()
-                # final.append("CONTACTPERSONNAME--")
-        ###############address flair#####################
-
-        try:
-            print(
-                '############################################################# address new code #############################################################')
-            loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat']
-            loclst = [i for i in loactionlst if i in htext.lower()]
-
-            textaddress = htext
-            textaddress = textaddress.replace("|", ",")
-            textaddress = textaddress.lower()
-
-            nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
-            grop = nlp(textaddress)
-
-            citycountry = []
-            print('########################### city or country name ###########################')
-            d = grop[-1]
-
-            if d['entity_group'] == "COUNTRY":
-                print(d["word"])
-                citycountry.append(d["word"])
-            elif d['entity_group'] == "CITY":
-                print(d["word"])
-                citycountry.append(d["word"])
-
-            try:
-                address1 = loclst[0]
-            except IndexError:
-                address1 = (textaddress.partition(",")[0])
-                words = address1.split()
-                address1 = words[-1]
-
-            star_location = address1.lower()
-            end_location = citycountry[0].replace("#", "")
-            start = star_location
-            end = end_location
-            s = textaddress.lower()
-            middle_address = (s.split(start))[-1].split(end)[0]
-            Address = start + middle_address + end
-            Address = Address.replace('--', '').title()
-            print(Address)
-            if Address.count(',') < 2:
-                splitaddress()
-            else:
-                final.append('ADDRESS--' + Address)
-
-            # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '')
-            # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '')
-            # d1 = star_location.split()
-            # d2 = end_location.split()
-            # d3 = d1[0]
-            # d4 = d2[0]
-            # start = d3
-            # end = d4
-            # s = horizontaltext
-            # middle_address = ((s.split(start))[1].split(end)[0])
-            # Address = d3 + middle_address + d4
-            # final.append('ADDRESS--' + Address)
-            # addrespinlst.append(Address)
-
-
-        except IndexError:
-            splitaddress()
-
-        ########################################## Designation ###########################################
-        import re
-        new = []
-        with open('test.txt', 'r') as f:
-            flag = False
-            for line in f:
-                line1 = line
-                line = line.upper()
-                matches = re.findall(
-                    r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''',
-                    line)
-                for match in matches:
-                    line = line.replace('-', '')
-                    # print(line)
-                    o = "Designation--" + line
-                    new.append(o)
-                    remove_list.append(str(line1).replace('\n', ''))
-
-        try:
-            a = new[0].replace('\n', '')
-            final.append(a)
-
-        except IndexError:
-            final.append("Designation--")
-
-        ###################################################Phone number#################################################
-        num = []
-        import phonenumbers
-
-        # print(verticaltext)
-        numbers = phonenumbers.PhoneNumberMatcher(
-            verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN")
-
-        for number in numbers:
-            number = str(number).split(")")
-            num.append(number[1])
-            # num.append(number[-1])
-        if len(num) == 0:
-            final.append("ContactNumber--")
-            final.append("OrganizationNumber--")
-        elif len(num) > 1:
-            final.append("ContactNumber--" + num[0].replace(' ', ''))
-            final.append("OrganizationNumber--" + num[-1].replace(' ', ''))
-        elif len(num) == 1:
-            try:
-                final.append("ContactNumber--" + num[0].replace(' ', ''))
-                final.append("OrganizationNumber--")
-            except IndexError:
-                final.append("ContactNumber--")
-                final.append("OrganizationNumber--")
-        print(
-            '#############################################################  num #############################################################')
-        print(num)
-        # try:
-        #     final.append("PhoneNumber--" + num[0].replace(' ', ''))
-        #     remove_list.append(num[0])
-        # except IndexError:
-        #     pass
-        # try:
-        #     final.append("PhoneNumber1--" + num[1].replace(' ', ''))
-        #     remove_list.append(num[1])
-        # except IndexError:
-        #     pass
-        # try:
-        #     final.append("PhoneNumber2--" + num[2].replace(' ', ''))
-        #     remove_list.append(num[2])
-        # except IndexError:
-        #     pass
-
-        ################################################### Email######################################################
-        import re
-        from email_scraper import scrape_emails
-        s = list(scrape_emails(horizontaltext))
-        email_id = s
-
-        # email_id = []
-        # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext)
-        # for match in matches:
-        #     email_id.append(match)
-
-        #     # final.append('Email--' + match)
-        #     email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "")
-        #     # final.append(email_)
-
-        #     # final.append('Email--' + email_)
-        #     # remove_list.append(email_)
-        if len(email_id) > 1:
-            final.append(
-                'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
-                                                                                                                 ""))
-            final.append(
-                'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace(
-                    "'",
-                    ""))
-        else:
-            try:
-                final.append(
-                    'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace(
-                        "'",
-                        ""))
-                final.append('OrganizationEmail--')
-            except IndexError:
-                final.append('ContactEmail--')
-                final.append('OrganizationEmail--')
-
-        ###############PINCODE############
-
-        pinlst = []
-        print(addrespinlst)
-        import pgeocode
-
-        # try:
-        #     matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0])
-        #     for i in matche1:
-        #         address3 = i.replace(' ', '').replace('-', '')
-        #         pinlst.append(address3)
-        # except IndexError:
-
-        lst = []
-        for i in num:
-            i = i[1:]
-            lst.append(i)
-
-        infile = r"vtext.txt"
-        outfile = r"cleaned_file.txt"
-        import glob
-        delete_list = lst
-        # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director -  Sales  &  Business  Development']
-        fin = open(infile, "r+")
-        fout = open(outfile, "w+")
-        for line12 in fin:
-            for word in delete_list:
-                line12 = line12.replace(word, "")
-
-            fout.write(line12)
-        fin.close()
-        # print(line)
-
-        # print(addrespinlst)
-        import pgeocode
-        print(line12)
-        import re
-        matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
-        for i in matche1:
-            address3 = i.replace(' ', '').replace('-', '')
-            pinlst.append(address3)
-
-        nomi = pgeocode.Nominatim('IN')
-        try:
-            a = nomi.query_postal_code(str(pinlst[-1]))
-            # print(a)
-            b = a.keys()
-            c = b.values.tolist()
-            d = a.tolist()
-            postal_code = "PinCode1" + "--" + d[0]
-            final.append(postal_code)
-            country_code = c[1] + "--" + str(d[1])
-            final.append(country_code)
-            place_name = 'LandMark1' + "--" + str(d[2])
-            final.append(place_name)
-            state_name = c[3] + "--" + str(d[3])
-            final.append(state_name)
-            state_code = c[4] + "--" + str(d[4])
-            final.append(state_code)
-            county_name = 'CityName1' + "--" + str(d[5])
-            final.append(county_name)
-
-        except (IndexError, NameError):
-            final.append("PinCode1--")
-            final.append("country_code--")
-            final.append("LandMark1--")
-            final.append("state_name--")
-            final.append("state_code--")
-            final.append("CityName1--")
-
-        ########################################################   json     #####################################################################
-
-        import pandas as pd
-        df = pd.DataFrame(final)
-        df1 = df[0].str.split('--', expand=True)
-        # print(df1)
-        df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True)
-        df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True)
-        df1['Keys'] = df1['Keys'].str.strip()
-        df1.to_csv('path123.csv', index=False)
-        df2 = pd.read_csv('path123.csv')
-        print(df2)
-        df2 = df2.T
-        df2.to_csv('path1.csv', index=False, header=False)
-        df1 = pd.read_csv('path1.csv')
-        df1.to_json('firstjson1.json', orient="index")
-        import json
-        with open('firstjson1.json', 'r') as json_file:
-            json_load = json.load(json_file)
-        #     # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
-        nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
-        # # print('--------------------------------------------------------------------------')
-        # # print(nothing)
-        empty = []
-        import base64
-        name = found
-        image = open(name, 'rb')
-        image_read = image.read()
-        image_64_encode = base64.b64encode(image_read)
-        NULL = 'null'
-        empty.append("ByteData--" + (NULL).strip('""'))
-        image_64_encode = image_64_encode.decode('utf-8')
-        empty.append("FileData--" + str(image_64_encode))
-        imagedata = name.split("/")
-        imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
-        imagename1 = str(imagename).split('.')
-        imagename = str(imagename1[-2]).replace("[", "]")
-        empty.append("FileName--" + imagename)
-        empty.append("FilePath--"+ "")
-        imageExtension = str(imagename1[-1]).replace("[", "]")
-        empty.append("FileType--" + imageExtension)
-        image.close()
-        import pandas as pd
-        df = pd.DataFrame(empty)
-        df = df[0].str.split("--", expand=True)
-        data1 = pd.DataFrame(df[0])
-        data2 = pd.DataFrame(df[1])
-        dt = data2.set_index(data1[0])
-        dt4 = dt.T
-        dictionary = dt4.to_dict(orient="index")
-        list1 = []
-        # list.append(a)
-        list1.append(dictionary[1])
-        # # final.append("image--"+str(dictionary[1]).replace("\'",'"'))
-        print('--------------------')
-        # print(namelist)
-        import json
-        # JSON data:
-        x = nothing
-        # python object to be appended
-        y = {"image": dictionary[1]}
-        # parsing JSON string:
-        z = json.loads(x)
-        # appending the data
-        z.update(y)
-        # the result is a JSON string:
-        # print(json.dumps(z))
-        zlist=[]
-        zlist.append(z)
-        #############################################creating csv#####################################
-        print(final)
-        print(imagelist)
-        final.append('image--' + str(imagelist))
-        import requests
-        import json
-        url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev
-        # url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" #testing
-        # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test
-        # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create'
-        # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01
-        payload1 = json.dumps(zlist)
-        # print('--------------------------------------------------------------------------')
-        #print(payload1)
-        headers = {
-            #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a',   #dev
-            'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing
-            # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1',
-            # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
-            # 'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo
-            'Content-Type': 'application/json'
-        }
-        response = requests.request("POST", url, headers=headers, data=payload1)
-        # print("##############################################################")
-
-        #print(payload1)
-        print(response.text)
-        import os
-        if 'BusinessCards Created Successfully' in response.text:
-            print('present')
-            os.remove(found)
-        else:
-            print('not present')
-
-        df1.to_json('visitingcard.json')
-        data = df1.to_json('visiting.json', orient='records')
-        print(data)
-
-        #return render_template('index.html')
-       
-
-    return response.text
-    # return 'done'
-
-
-if __name__ == "__main__":
+from flask import Flask, render_template, request, redirect, Response, send_file
+import os
+import openai
+import requests
+import pandas as pd
+import pgeocode
+from email_scraper import scrape_emails
+import phonenumbers
+from pdfminer.high_level import extract_text
+import pytesseract
+import time
+import multiprocessing
+from PIL import Image
+from functools import partial
+from urlextract import URLExtract
+import pytesseract as tess
+from PIL import Image
+import os
+import glob
+
+from pytesseract import *
+import shutil
+import cv2
+import matplotlib
+from werkzeug.utils import secure_filename
+import requests
+import spacy
+import time
+import multiprocessing
+from PIL import Image
+from functools import partial
+
+import pandas as pd
+################################################################
+Current_Working_Directory=os.getcwd()
+Current_Working_Directory=Current_Working_Directory.replace("\\","/")
+nlp_model1 = spacy.load(Current_Working_Directory + "/Invoice_parser/p")
+
+################################################################
+# import spacy
+
+# nlp_model1 = spacy.load('./ADD3001.2')
+from flair.data import Sentence
+from flair.models import SequenceTagger
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+
+tokenizer = AutoTokenizer.from_pretrained("ml6team/bert-base-uncased-city-country-ner")
+model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncased-city-country-ner")
+
+from paddleocr import PaddleOCR, draw_ocr
+
+ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=True)
+tagger = SequenceTagger.load("flair/ner-english-large")
+
+import datetime
+
+app = Flask(__name__)
+
+
+# app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"
+
+@app.route('/', methods=['GET'])
+def card():
+    return render_template('card.html')
+
+
+
+@app.route('/upload_BusinessCards', methods=["POST"])
+# @app.route('/multiplecards', methods=["POST"])
+def multiplecards():
+    # print('################## multiple card detection #######################')
+    # print(Dataset)
+    datalist=[]
+    Dataset = request.get_json()
+    # print(data)
+    #datalist.append(Dataset)
+    data = {'visiting': Dataset}
+    for i in data['visiting']:
+        import time
+        # time.sleep(1)
+        a = i
+        x = a['FileData']
+        # print(x)
+        y = a['FileName']
+        z = a['FileType']
+        # CreatedBy=a['CreatedBy']
+
+        name = y + '.' + z
+        # print(name)
+        # print(y)
+        # image = y.split("/")
+        # filename=image[-1]
+
+        # print(x)
+        img_data = x.encode()
+
+        import base64
+        with open('./multicards/' + name, "wb") as fh:
+            fh.write(base64.decodebytes(img_data))
+        # print(i)
+
+        # import os
+        # import glob
+        # for i in glob.glob('./multipleupload/*'):
+
+        found = './multicards/' + name
+        print(found)
+        extension = found.split('.')[-1]
+
+        # for root, dirs, fils in os.glob('./multipleupload'):
+        #     for name in files:
+        #         foundfile= os.path.join(root, name)
+        #         print(foundfile)
+
+        import re
+        import csv
+        import glob
+        import os
+        # import pytesseract
+        # import cv2
+        import numpy as np
+        import glob
+        import os
+        import cv2
+        import requests
+        final = []
+        # final.append('assignto--'+CreatedBy)
+        imagelist = []
+        # print(found)
+        remove_list = []
+        import os
+        import glob
+        import pdfminer
+
+        # import os
+        # ts = 0
+        # for file_name in glob.glob('./upload/*'):
+        #     fts = os.path.getmtime(file_name)
+        #     if fts > ts:
+        #         ts = fts
+        #         found = file_name
+        # print(found)
+
+        # print(extension)
+
+        def org_name():
+            print('org_name is working')
+            import pytesseract
+            fname = found
+            if extension != 'pdf':
+
+                img = cv2.imread(fname)
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+                cv2.imwrite(str(found), img)
+                from PIL import Image
+                im = Image.open(found)
+                im.save("images1.png", dpi=(1200, 1200))
+                # import pytesseract
+                fname = "images1.png"
+                import pytesseract as tess
+                from PIL import Image
+
+                tess.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+                pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf")
+                with open("demo.pdf", "w+b", ) as f:
+                    f.write(pdf)
+
+                from pdfminer.high_level import extract_text
+                text = extract_text('demo.pdf')
+                # doc = DocumentFile.from_images(found)
+                # result = model(doc)
+                # text = result.render()
+
+                # from pdfminer.high_level import extract_text
+                # txt  = extract_text('demo.pdf')
+            else:
+                from pdfminer.high_level import extract_text
+                text = extract_text(fname)
+
+            sentence = Sentence(text)
+
+            # predict NER tags
+            tagger.predict(sentence)
+
+            # print sentence
+            ko = (sentence)
+
+            ko1 = str(ko).split("→")
+            import pandas as pd
+
+            dfg = []
+            try:
+                s = ko1[1].replace("", "").replace("", "").replace("/", ":")
+
+                # os.remove(found)
+                # return 'Invalid image'
+                dfg.append(s)
+                df = pd.DataFrame(dfg)
+                df = df[0]
+
+                df.to_csv("df.csv", index=False)
+
+                df1 = pd.read_csv("df.csv")
+                ve = df1["0"].str.split(",")
+                fgf = ve.to_list()
+                dfgh = pd.DataFrame(fgf[0])
+                maindf = dfgh[0]  # .str.split(":")
+                # maindf.to_csv("main.csv")
+
+                main1 = maindf.to_list()
+                main1
+                # cv=pd.DataFrame(ve)
+                # cv
+                per = ["PER"]
+                org = ["ORG"]
+                loc = ["LOC"]
+                organizations = [i for i in main1 for j in org if j in i]
+                PErsons = [i for i in main1 for j in per if j in i]
+                location = [i for i in main1 for j in loc if j in i]
+            except IndexError:
+                pass
+
+                # ************************************* ORGANIZATION ********************************************************************
+
+        def organisation():
+            print('organisation working ')
+            try:
+                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
+                                                                                                             '').replace(
+                    '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
+                                                                                                         '').replace(
+                    '.com', ''))) < 4:
+                    pass
+
+
+                else:
+
+                    match = str(urlfinal[0]).lower()
+                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
+                        'https',
+                        '').replace(
+                        'http', '').replace(":", "").replace("/", "").upper()
+                    print(match)
+
+                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com',
+                                                                                                         '') + " /" + \
+                          organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
+                    s1 = s1g.upper()
+                    s2 = match.upper()
+                    from difflib import SequenceMatcher
+                    print(s1)
+                    print(s2)
+                    print(SequenceMatcher(None, s1, s2).ratio())
+                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
+                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
+                        final.append(
+                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
+                                                                                                                 '').replace(
+                                '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
+                                                                                                                   '').replace(
+                                '.com',
+                                '').replace(']', ''))
+                    else:
+                        final.append("OrganizationName--" + s2)
+
+            except IndexError:
+                try:
+                    if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
+                                                                                                                 '').replace(
+                        '"',
+                        '').replace(
+                        '.com', '').replace('.in', ''))) < 4:
+                        pass
+
+                    else:
+                        match = str(urlfinal[0]).lower()
+                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
+                                                                                                         '').replace(
+                            'https', '').replace('http', '').replace(":", "").replace("/", "").upper()
+
+                        s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '')
+                        s1 = s1g.upper()
+                        s2 = match.upper()
+                        from difflib import SequenceMatcher
+                        print(s1)
+                        print(s2)
+                        print(SequenceMatcher(None, s1, s2).ratio())
+                        if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
+                            # and SequenceMatcher(None, s1, s2).ratio()<0.50:
+                            final.append(
+                                "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace(
+                                    '[',
+                                    '').replace(
+                                    ']', '').replace(
+                                    '.com', ''))
+                        else:
+                            final.append("OrganizationName--" + s2)
+
+                except IndexError:
+                    try:
+                        match = str(urlfinal[0]).lower()
+                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
+                                                                                                         '').upper()
+                        final.append("OrganizationName--" + match)
+                        # remove_list.append(match)
+                    except IndexError:
+                        company()
+
+        #################################################company Name########################################
+
+        def company():
+            print('company list working')
+            import re
+
+            new = []
+            with open('test.txt', 'r+') as f:
+                flag = False
+                for line in f:
+                    line = line.upper()
+                    matches = re.findall(
+                        r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''',
+                        line)
+
+                    for i in matches:
+                        if i in line:
+                            flag = True
+                            if flag:
+                                o = "OrganizationName--" + line
+                                new.append(o)
+            #                       if line.startswith('\n'):
+            #                           flag = False
+            try:
+                a = new[0].replace('\n', '')
+                final.append(a)
+            except IndexError:
+                final.append("OrganizationName--")
+
+        # ************************************* CONTACT PERSON *******************************************************************
+        def contactpersonname():
+            print('contactpersonname working')
+            try:
+                final.append(
+                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace(
+                        "]",
+                        "") + '/' +
+                    PErsons[
+                        1].replace(":PER", "").replace('"', ''))
+            except IndexError:
+                try:
+                    final.append(
+                        "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]",
+                                                                                                        "").replace(
+                            '"', ''))
+                except IndexError:
+                    final.append("CONTACTPERSONNAME--")
+
+        def image_to_text():
+
+            # doc = DocumentFile.from_images(found)
+            # result = model(doc)
+            # image_to_text.txt = result.render()
+
+            # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
+            # img = Image.open(found)
+            # text = tess.image_to_string(img)
+            # image_to_text.txt = text
+            # print(text)
+            import cv2
+            img_path = found
+            img = cv2.imread(img_path)
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            cv2.imwrite(str(found), img)
+
+            result = ocr.ocr(img_path, cls=True)
+            result = result[0]
+
+            txts = [line[1][0] for line in result]
+
+            image_to_text.txt = ""
+            for i in txts:
+                if len(i) < 4:
+                    continue
+                    # print(i+"\n")
+                image_to_text.txt = image_to_text.txt + str(i) + "\n"
+                # print(image_to_text.txt)
+
+        def pdf_to_text():
+
+            from pdfminer.high_level import extract_text
+            pdf_to_text.txt = extract_text(found)
+            # pdf_to_text.txt= text.replace('\n', ' ')
+
+        extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']
+
+        if extension in extensionlist:
+            print('image' + extension)
+            image_to_text()
+            x = image_to_text.txt
+
+        else:
+            print('pdf' + extension)
+            pdf_to_text()
+            x = pdf_to_text.txt
+
+        verticaltext = x
+        htext = x
+        # print('------------------------------------------------')
+        print(
+            '############################################################# this is verticaltext #################################################################')
+        print(verticaltext)
+        htext = htext.replace('\n', ' ')
+        print(
+            '############################################################# this is htext #############################################################')
+        print(htext)
+        y = x.replace('\n', ',')
+        y = y.replace('  ', ' ')
+        # y = y.replace(".", " .")
+        horizontaltext = y
+        # print('------------------------------------------------')
+        print(
+            '############################################################# this is horizontaltext #############################################################')
+        print(horizontaltext)
+
+        textfile = open("test123456.txt", "w")
+        a = textfile.write(verticaltext)
+        textfile.close()
+        textfile = open("vtext.txt", "w")
+        a = textfile.write(horizontaltext)
+        textfile.close()
+        with open('test123456.txt', 'r') as f:
+            with open('test.txt', 'w') as w:
+                for line in f:
+                    if line.strip().replace('|', ''):
+                        w.write(line)
+
+        ###########################ADDRESS##################################
+        addrespinlst = []
+
+        def splitaddress():
+            import re
+            textaddress = htext.replace('\n', ' ')
+            # print(textaddress)
+
+            address1 = (textaddress.partition(",")[0])
+            words = address1.split()
+            address1 = words[-1]
+            addre = (htext.partition(",")[2])
+            a = addre.replace('\n', ' ').replace('\x0c', '')
+            addre = (a.partition(",")[2])
+            matches = re.findall(
+                r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3}  \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b',
+                a)
+            for match in matches:
+                address2 = match
+                address2 = str(address2)
+                address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace('  ',
+                                                                                                                  '')
+
+            matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
+            for address3 in matches:
+                pass
+            try:
+                Address = address1 + "," + address2 + "," + address3
+                final.append('ADDRESS--' + Address)
+                addrespinlst.append(Address)
+
+            except NameError:
+
+                print(
+                    '############################################################ Addressmodelworking #############################################################')
+
+                # doc = nlp_model1(textaddress)
+                # addlist = []
+                # for ent in doc.ents:
+                #     name = (f'{ent.label_.upper():{10}}--{ent.text}')
+                #     addlist.append(name)
+                # try:
+                #     Address = addlist[0]
+                #     final.append(Address)
+                #     addrespinlst.append(Address)
+                #     remove_list.append(
+                #         str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace(
+                #             "ADDRESS--",
+                #             ""))
+                # except IndexError:
+                #     final.append("ADDRESS--")
+                pass
+
+        ################################################## website#######################################################
+
+        # import re
+
+        # url = []
+        # matches = re.findall(r'www.*', verticaltext)
+        # for match in matches:
+        #     if (match.count('.')) == 1:
+        #         a_string1 = match.replace("www", "www.")
+
+        #         final.append("Urls--" + a_string1)
+        #         url.append(a_string1)
+        #     else:
+
+        #         final.append("Urls--" + match)
+
+        # if len(url)==0:
+
+        #     from urlextract import URLExtract
+
+        #     extractor = URLExtract()
+        #     urls = extractor.find_urls(verticaltext)
+        #     try:
+        #         urllist = urls[0]
+        #         final.append("Urls--"+urllist)
+        #         url.append(urllist)
+        #     except IndexError:
+        #         final.append("Urls--")
+
+        #     for match in matches:
+        #         if (match.count('.')) == 1:
+        #             a_string1 = match.replace("www", "www.")
+
+        #             final.append("Urls--" + a_string1)
+        #             url.append(a_string1)
+        #         else:
+
+        #             final.append("Urls--" + match)
+        #             url.append(match)
+        #             remove_list.append(match)
+        # else:
+        #     final.append("Urls--" )
+
+        ################################################## website#######################################################
+
+        import re
+        # final=[]
+        url = []
+        urlfinal = []
+        matches = re.findall(r'www.*', verticaltext)
+        for match in matches:
+
+            if (match.count('.')) == 1:
+                a_string1 = match.replace("www", "www.")
+
+                # final.append("Urls--" + a_string1)
+                url.append(a_string1)
+            else:
+
+                url.append(match)
+
+        if len(url) == 0:
+
+            from urlextract import URLExtract
+
+            extractor = URLExtract()
+            urls = extractor.find_urls(verticaltext)
+            try:
+                urllist = urls[0]
+                url.append(urllist)
+                url.append(urllist)
+            except IndexError:
+                pass
+
+            for match in matches:
+                if (match.count('.')) == 1:
+                    a_string1 = match.replace("www", "www.")
+
+                    url.append(a_string1)
+                    # url.append(a_string1)
+                else:
+
+                    url.append(match)
+                    url.append(match)
+
+        else:
+            pass
+        try:
+            test_string = url[0]
+
+            test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"]
+
+            res = [ele for ele in test_list if (ele in test_string)]
+
+            if len(res) == 0:
+                print('no match')
+
+                final.append('urls--')
+
+
+            else:
+                print('matched')
+                final.append('urls--' + url[0])
+                urlfinal.append(url[0])
+
+
+        except IndexError:
+            final.append('urls--')
+
+        print(
+            '############################################################# url #############################################################')
+        print(url)
+        #######organisation and contact################
+
+        # def company_url():
+        #     # print('--url--')
+        #     # print(url)
+
+        #     try:
+        #         match = str(url[0]).lower()
+        #         match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper()
+        #         final.append("OrganizationName--" + match)
+        #         # remove_list.append(match)
+        #     except IndexError:
+        #         org_name()
+        #         organisation()
+        # final.append("OrganizationName--")
+
+        # make example sentence
+
+        # print(horizontaltext)
+        sentence = Sentence(verticaltext)
+
+        # predict NER tags
+        tagger.predict(sentence)
+
+        # print sentence
+        ko = (sentence)
+
+        ko1 = str(ko).split("→")
+        import pandas as pd
+
+        dfg = []
+        try:
+            s = ko1[1].replace("", "").replace("", "").replace("/", ":")
+        except IndexError:
+            os.remove(found)
+            return 'Invalid image'
+        dfg.append(s)
+        df = pd.DataFrame(dfg)
+        df = df[0]
+
+        df.to_csv("df.csv", index=False)
+
+        df1 = pd.read_csv("df.csv")
+        ve = df1["0"].str.split(",")
+        fgf = ve.to_list()
+        dfgh = pd.DataFrame(fgf[0])
+        maindf = dfgh[0]  # .str.split(":")
+        # maindf.to_csv("main.csv")
+
+        main1 = maindf.to_list()
+        main1
+        # cv=pd.DataFrame(ve)
+        # cv
+        per = ["PER"]
+        org = ["ORG"]
+        loc = ["LOC"]
+        organizations = [i for i in main1 for j in org if j in i]
+        PErsons = [i for i in main1 for j in per if j in i]
+        location = [i for i in main1 for j in loc if j in i]
+
+        # ************************************* ORGANIZATION ********************************************************************
+        try:
+            if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
+                                                                                                         '').replace(
+                ']', '').replace(
+                '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4:
+                pass
+                # company_url()
+            else:
+
+                match = str(urlfinal[0]).lower()
+                match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
+                    'https',
+                    '').replace(
+                    'http', '').replace(":", "").replace("/", "").upper()
+                print(match)
+
+                s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace(
+                    '.com', '') + " /" + \
+                      organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
+                s1 = s1g.upper()
+                s2 = match.upper()
+                from difflib import SequenceMatcher
+                print(s1)
+                print(s2)
+                print(SequenceMatcher(None, s1, s2).ratio())
+                if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
+                    # and SequenceMatcher(None, s1, s2).ratio()<0.50:
+                    final.append(
+                        "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
+                                                                                                             '').replace(
+                            '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
+                                                                                                               '').replace(
+                            '.com', '').replace(']', ''))
+                else:
+                    final.append("OrganizationName--" + s2)
+
+
+
+        except IndexError:
+            try:
+                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
+                                                                                                             '').replace(
+                    '"',
+                    '').replace(
+                    '.com', ''))) < 4:
+                    pass
+                    # company_url()
+                else:
+
+                    match = str(urlfinal[0]).lower()
+                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
+                        'https', '').replace('http', '').replace(":", "").replace("/", "").upper()
+
+                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']',
+                                                                                                         '').replace(
+                        '.com', '')
+                    s1 = s1g.upper()
+                    s2 = match.upper()
+                    from difflib import SequenceMatcher
+                    print(s1)
+                    print(s2)
+                    print(SequenceMatcher(None, s1, s2).ratio())
+                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
+                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
+                        final.append(
+                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
+                                                                                                                 '').replace(
+                                ']', '').replace(
+                                '.com', '').replace(']', ''))
+                    else:
+                        final.append("OrganizationName--" + s2)
+
+            except IndexError:
+                org_name()
+                organisation()
+
+                # final.append("OrganizationName--")
+
+        # ************************************* CONTACT PERSON *******************************************************************
+        try:
+            final.append(
+                "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
+                                                                                                                 "") +
+                PErsons[
+                    1].replace(":PER", "").replace('"', ''))
+        except IndexError:
+            try:
+                final.append(
+                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace(
+                        '"',
+                        ''))
+            except IndexError:
+                org_name()
+                contactpersonname()
+                # final.append("CONTACTPERSONNAME--")
+        ###############address flair#####################
+
+        try:
+            print(
+                '############################################################# address new code #############################################################')
+            loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat']
+            loclst = [i for i in loactionlst if i in htext.lower()]
+
+            textaddress = htext
+            textaddress = textaddress.replace("|", ",")
+            textaddress = textaddress.lower()
+
+            nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
+            grop = nlp(textaddress)
+
+            citycountry = []
+            print('########################### city or country name ###########################')
+            d = grop[-1]
+
+            if d['entity_group'] == "COUNTRY":
+                print(d["word"])
+                citycountry.append(d["word"])
+            elif d['entity_group'] == "CITY":
+                print(d["word"])
+                citycountry.append(d["word"])
+
+            try:
+                address1 = loclst[0]
+            except IndexError:
+                address1 = (textaddress.partition(",")[0])
+                words = address1.split()
+                address1 = words[-1]
+
+            star_location = address1.lower()
+            end_location = citycountry[0].replace("#", "")
+            start = star_location
+            end = end_location
+            s = textaddress.lower()
+            middle_address = (s.split(start))[-1].split(end)[0]
+            Address = start + middle_address + end
+            Address = Address.replace('--', '').title()
+            print(Address)
+            if Address.count(',') < 2:
+                splitaddress()
+            else:
+                final.append('ADDRESS--' + Address)
+
+            # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '')
+            # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '')
+            # d1 = star_location.split()
+            # d2 = end_location.split()
+            # d3 = d1[0]
+            # d4 = d2[0]
+            # start = d3
+            # end = d4
+            # s = horizontaltext
+            # middle_address = ((s.split(start))[1].split(end)[0])
+            # Address = d3 + middle_address + d4
+            # final.append('ADDRESS--' + Address)
+            # addrespinlst.append(Address)
+
+
+        except IndexError:
+            splitaddress()
+
+        ########################################## Designation ###########################################
+        import re
+        new = []
+        with open('test.txt', 'r') as f:
+            flag = False
+            for line in f:
+                line1 = line
+                line = line.upper()
+                matches = re.findall(
+                    r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''',
+                    line)
+                for match in matches:
+                    line = line.replace('-', '')
+                    # print(line)
+                    o = "Designation--" + line
+                    new.append(o)
+                    remove_list.append(str(line1).replace('\n', ''))
+
+        try:
+            a = new[0].replace('\n', '')
+            final.append(a)
+
+        except IndexError:
+            final.append("Designation--")
+
+        ###################################################Phone number#################################################
+        num = []
+        import phonenumbers
+
+        # print(verticaltext)
+        numbers = phonenumbers.PhoneNumberMatcher(
+            verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN")
+
+        for number in numbers:
+            number = str(number).split(")")
+            num.append(number[1])
+            # num.append(number[-1])
+        if len(num) == 0:
+            final.append("ContactNumber--")
+            final.append("OrganizationNumber--")
+        elif len(num) > 1:
+            final.append("ContactNumber--" + num[0].replace(' ', ''))
+            final.append("OrganizationNumber--" + num[-1].replace(' ', ''))
+        elif len(num) == 1:
+            try:
+                final.append("ContactNumber--" + num[0].replace(' ', ''))
+                final.append("OrganizationNumber--")
+            except IndexError:
+                final.append("ContactNumber--")
+                final.append("OrganizationNumber--")
+        print(
+            '#############################################################  num #############################################################')
+        print(num)
+        # try:
+        #     final.append("PhoneNumber--" + num[0].replace(' ', ''))
+        #     remove_list.append(num[0])
+        # except IndexError:
+        #     pass
+        # try:
+        #     final.append("PhoneNumber1--" + num[1].replace(' ', ''))
+        #     remove_list.append(num[1])
+        # except IndexError:
+        #     pass
+        # try:
+        #     final.append("PhoneNumber2--" + num[2].replace(' ', ''))
+        #     remove_list.append(num[2])
+        # except IndexError:
+        #     pass
+
+        ################################################### Email######################################################
+        import re
+        from email_scraper import scrape_emails
+        s = list(scrape_emails(horizontaltext))
+        email_id = s
+
+        # email_id = []
+        # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext)
+        # for match in matches:
+        #     email_id.append(match)
+
+        #     # final.append('Email--' + match)
+        #     email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "")
+        #     # final.append(email_)
+
+        #     # final.append('Email--' + email_)
+        #     # remove_list.append(email_)
+        if len(email_id) > 1:
+            final.append(
+                'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
+                                                                                                                 ""))
+            final.append(
+                'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace(
+                    "'",
+                    ""))
+        else:
+            try:
+                final.append(
+                    'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace(
+                        "'",
+                        ""))
+                final.append('OrganizationEmail--')
+            except IndexError:
+                final.append('ContactEmail--')
+                final.append('OrganizationEmail--')
+
+        ###############PINCODE############
+
+        pinlst = []
+        print(addrespinlst)
+        import pgeocode
+
+        # try:
+        #     matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0])
+        #     for i in matche1:
+        #         address3 = i.replace(' ', '').replace('-', '')
+        #         pinlst.append(address3)
+        # except IndexError:
+
+        lst = []
+        for i in num:
+            i = i[1:]
+            lst.append(i)
+
+        infile = r"vtext.txt"
+        outfile = r"cleaned_file.txt"
+        import glob
+        delete_list = lst
+        # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director -  Sales  &  Business  Development']
+        fin = open(infile, "r+")
+        fout = open(outfile, "w+")
+        for line12 in fin:
+            for word in delete_list:
+                line12 = line12.replace(word, "")
+
+            fout.write(line12)
+        fin.close()
+        # print(line)
+
+        # print(addrespinlst)
+        import pgeocode
+        print(line12)
+        import re
+        matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
+        for i in matche1:
+            address3 = i.replace(' ', '').replace('-', '')
+            pinlst.append(address3)
+
+        nomi = pgeocode.Nominatim('IN')
+        try:
+            a = nomi.query_postal_code(str(pinlst[-1]))
+            # print(a)
+            b = a.keys()
+            c = b.values.tolist()
+            d = a.tolist()
+            postal_code = "PinCode1" + "--" + d[0]
+            final.append(postal_code)
+            country_code = c[1] + "--" + str(d[1])
+            final.append(country_code)
+            place_name = 'LandMark1' + "--" + str(d[2])
+            final.append(place_name)
+            state_name = c[3] + "--" + str(d[3])
+            final.append(state_name)
+            state_code = c[4] + "--" + str(d[4])
+            final.append(state_code)
+            county_name = 'CityName1' + "--" + str(d[5])
+            final.append(county_name)
+
+        except (IndexError, NameError):
+            final.append("PinCode1--")
+            final.append("country_code--")
+            final.append("LandMark1--")
+            final.append("state_name--")
+            final.append("state_code--")
+            final.append("CityName1--")
+
+        ########################################################   json     #####################################################################
+
+        import pandas as pd
+        df = pd.DataFrame(final)
+        df1 = df[0].str.split('--', expand=True)
+        # print(df1)
+        df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True)
+        df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True)
+        df1['Keys'] = df1['Keys'].str.strip()
+        df1.to_csv('path123.csv', index=False)
+        df2 = pd.read_csv('path123.csv')
+        print(df2)
+        df2 = df2.T
+        df2.to_csv('path1.csv', index=False, header=False)
+        df1 = pd.read_csv('path1.csv')
+        df1.to_json('firstjson1.json', orient="index")
+        import json
+        with open('firstjson1.json', 'r') as json_file:
+            json_load = json.load(json_file)
+        #     # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
+        nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
+        # # print('--------------------------------------------------------------------------')
+        # # print(nothing)
+        empty = []
+        import base64
+        name = found
+        image = open(name, 'rb')
+        image_read = image.read()
+        image_64_encode = base64.b64encode(image_read)
+        NULL = 'null'
+        empty.append("ByteData--" + (NULL).strip('""'))
+        image_64_encode = image_64_encode.decode('utf-8')
+        empty.append("FileData--" + str(image_64_encode))
+        imagedata = name.split("/")
+        imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
+        imagename1 = str(imagename).split('.')
+        imagename = str(imagename1[-2]).replace("[", "]")
+        empty.append("FileName--" + imagename)
+        empty.append("FilePath--"+ "")
+        imageExtension = str(imagename1[-1]).replace("[", "]")
+        empty.append("FileType--" + imageExtension)
+        image.close()
+        import pandas as pd
+        df = pd.DataFrame(empty)
+        df = df[0].str.split("--", expand=True)
+        data1 = pd.DataFrame(df[0])
+        data2 = pd.DataFrame(df[1])
+        dt = data2.set_index(data1[0])
+        dt4 = dt.T
+        dictionary = dt4.to_dict(orient="index")
+        list1 = []
+        # list.append(a)
+        list1.append(dictionary[1])
+        # # final.append("image--"+str(dictionary[1]).replace("\'",'"'))
+        print('--------------------')
+        # print(namelist)
+        import json
+        # JSON data:
+        x = nothing
+        # python object to be appended
+        y = {"image": dictionary[1]}
+        # parsing JSON string:
+        z = json.loads(x)
+        # appending the data
+        z.update(y)
+        # the result is a JSON string:
+        # print(json.dumps(z))
+        zlist=[]
+        zlist.append(z)
+        #############################################creating csv#####################################
+        print(final)
+        print(imagelist)
+        final.append('image--' + str(imagelist))
+        import requests
+        import json
+        url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev
+        # url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" #testing
+        # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test
+        # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create'
+        # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01
+        payload1 = json.dumps(zlist)
+        # print('--------------------------------------------------------------------------')
+        #print(payload1)
+        headers = {
+            #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a',   #dev
+            'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing
+            # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1',
+            # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
+            # 'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo
+            'Content-Type': 'application/json'
+        }
+        response = requests.request("POST", url, headers=headers, data=payload1)
+        # print("##############################################################")
+
+        #print(payload1)
+        print(response.text)
+        import os
+        if 'BusinessCards Created Successfully' in response.text:
+            print('present')
+            os.remove(found)
+        else:
+            print('not present')
+
+        df1.to_json('visitingcard.json')
+        data = df1.to_json('visiting.json', orient='records')
+        print(data)
+
+        #return render_template('index.html')
+       
+
+    return response.text
+    # return 'done'
+
+
+if __name__ == "__main__":
     app.run(host='0.0.0.0', port=1112)
\ No newline at end of file