SadhulaSaiKumar
/
AI


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030
							from flask import Flask, render_template, request, redirect, Response, send_file
import os
# import openai
import requests
import pandas as pd
import pgeocode
from email_scraper import scrape_emails
import phonenumbers
from pdfminer.high_level import extract_text
import pytesseract
import time
import multiprocessing
from PIL import Image
from functools import partial
from urlextract import URLExtract
import pytesseract as tess
from PIL import Image
# from doctr.io import DocumentFile
# from doctr.models import ocr_predictor
# model = ocr_predictor(pretrained=True)
# load tagger
######################################################
import os
import glob

from pytesseract import *
import shutil
import cv2
import matplotlib
from werkzeug.utils import secure_filename
import requests
import spacy
import time
import multiprocessing
from PIL import Image
from functools import partial
# nlp_model = spacy.load("D:/projects/C01app/Resume_parser/ME")
# nlp_model1 = spacy.load("D:/projects/C01app/Resume_parser/bdeeducation_50_0.2")
from flask import Flask, render_template, request, redirect, Response, send_file

import pandas as pd

################################################################
Current_Working_Directory = os.getcwd()
Current_Working_Directory = Current_Working_Directory.replace("\\", "/")
# nlp_model1 = spacy.load(Current_Working_Directory + "/Invoice_parser/p")

################################################################
# import spacy

# nlp_model1 = spacy.load('./ADD3001.2')
from flair.data import Sentence
from flair.models import SequenceTagger
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

tokenizer = AutoTokenizer.from_pretrained("ml6team/bert-base-uncased-city-country-ner")
model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncased-city-country-ner")

from paddleocr import PaddleOCR, draw_ocr

ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=False)
tagger = SequenceTagger.load("flair/ner-english-large")
# tagger.to("cuda")
import datetime

app = Flask(__name__)


# app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"


@app.route('/', methods=['GET'])
def home():
    return render_template('home.html')


@app.route('/resume', methods=['GET'])
def resume():
    return render_template('resume.html')


@app.route('/invoice', methods=['GET'])
def invoice():
    return render_template('invoice.html')


@app.route('/card', methods=['GET'])
def card():
    return render_template('card.html')


@app.route('/upload_BusinessCards', methods=["POST"])
# @app.route('/multiplecards', methods=["POST"])
def multiplecards():
    # print('################## multiple card detection #######################')
    # print(Dataset)
    datalist = []
    zlist = []
    Dataset = request.get_json()
    # print(data)
    # datalist.append(Dataset)
    data = {'visiting': Dataset}
    for i in data['visiting']:
        import time
        # time.sleep(1)
        a = i
        x = a['FileData']
        # print(x)
        y = a['FileName']
        z = a['FileType']
        # CreatedBy=a['CreatedBy']

        name = y + '.' + z
        # print(name)
        # print(y)
        # image = y.split("/")
        # filename=image[-1]

        # print(x)
        img_data = x.encode()

        import base64
        with open('./multicards/' + name, "wb") as fh:
            fh.write(base64.decodebytes(img_data))
        # print(i)

        # import os
        # import glob
        # for i in glob.glob('./multipleupload/*'):

        found = './multicards/' + name
        print(found)
        extension = found.split('.')[-1]

        # for root, dirs, fils in os.glob('./multipleupload'):
        #     for name in files:
        #         foundfile= os.path.join(root, name)
        #         print(foundfile)

        import re
        import csv
        import glob
        import os
        # import pytesseract
        # import cv2
        import numpy as np
        import glob
        import os
        import cv2
        import requests
        final = []
        # final.append('assignto--'+CreatedBy)
        imagelist = []
        # print(found)
        remove_list = []
        import os
        import glob
        import pdfminer

        # import os
        # ts = 0
        # for file_name in glob.glob('./upload/*'):
        #     fts = os.path.getmtime(file_name)
        #     if fts > ts:
        #         ts = fts
        #         found = file_name
        # print(found)

        # print(extension)

        def org_name():
            print('org_name is working')
            import pytesseract
            fname = found
            if extension != 'pdf':

                img = cv2.imread(fname)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                cv2.imwrite(str(found), img)
                from PIL import Image
                im = Image.open(found)
                im.save("images1.png", dpi=(1200, 1200))
                # import pytesseract
                fname = "images1.png"
                import pytesseract as tess
                from PIL import Image

                tess.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
                pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf")
                with open("demo.pdf", "w+b", ) as f:
                    f.write(pdf)

                from pdfminer.high_level import extract_text
                text = extract_text('demo.pdf')
                # doc = DocumentFile.from_images(found)
                # result = model(doc)
                # text = result.render()

                # from pdfminer.high_level import extract_text
                # txt  = extract_text('demo.pdf')
            else:
                from pdfminer.high_level import extract_text
                text = extract_text(fname)

            sentence = Sentence(text)

            # predict NER tags
            tagger.predict(sentence)

            # print sentence
            ko = (sentence)

            ko1 = str(ko).split("→")
            import pandas as pd

            dfg = []
            try:
                s = ko1[1].replace("", "").replace("", "").replace("/", ":")

                # os.remove(found)
                # return 'Invalid image'
                dfg.append(s)
                df = pd.DataFrame(dfg)
                df = df[0]

                df.to_csv("df.csv", index=False)

                df1 = pd.read_csv("df.csv")
                ve = df1["0"].str.split(",")
                fgf = ve.to_list()
                dfgh = pd.DataFrame(fgf[0])
                maindf = dfgh[0]  # .str.split(":")
                # maindf.to_csv("main.csv")

                main1 = maindf.to_list()
                main1
                # cv=pd.DataFrame(ve)
                # cv
                per = ["PER"]
                org = ["ORG"]
                loc = ["LOC"]
                organizations = [i for i in main1 for j in org if j in i]
                PErsons = [i for i in main1 for j in per if j in i]
                location = [i for i in main1 for j in loc if j in i]
            except IndexError:
                pass

                # ************************************* ORGANIZATION ********************************************************************

        def organisation():
            print('organisation working ')
            try:
                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                    '.com', '').replace('.in', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                         '').replace(
                    '.com', ''))) < 4:
                    pass


                else:

                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                        'https',
                        '').replace(
                        'http', '').replace(":", "").replace("/", "").upper()
                    print(match)

                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com',
                                                                                                         '') + " /" + \
                          organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
                    s1 = s1g.upper()
                    s2 = match.upper()
                    from difflib import SequenceMatcher
                    print(s1)
                    print(s2)
                    print(SequenceMatcher(None, s1, s2).ratio())
                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                        final.append(
                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                 '').replace(
                                '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                                   '').replace(
                                '.com',
                                '').replace(']', ''))
                    else:
                        final.append("OrganizationName--" + s2)

            except IndexError:
                try:
                    if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                                 '').replace(
                        '"',
                        '').replace(
                        '.com', '').replace('.in', ''))) < 4:
                        pass

                    else:
                        match = str(urlfinal[0]).lower()
                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
                                                                                                         '').replace(
                            'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                        s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace('.com', '')
                        s1 = s1g.upper()
                        s2 = match.upper()
                        from difflib import SequenceMatcher
                        print(s1)
                        print(s2)
                        print(SequenceMatcher(None, s1, s2).ratio())
                        if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                            # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                            final.append(
                                "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace(
                                    '[',
                                    '').replace(
                                    ']', '').replace(
                                    '.com', ''))
                        else:
                            final.append("OrganizationName--" + s2)

                except IndexError:
                    try:
                        match = str(urlfinal[0]).lower()
                        match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co',
                                                                                                         '').upper()
                        final.append("OrganizationName--" + match)
                        # remove_list.append(match)
                    except IndexError:
                        company()

        #################################################company Name########################################

        def company():
            print('company list working')
            import re

            new = []
            with open('test.txt', 'r+') as f:
                flag = False
                for line in f:
                    line = line.upper()
                    matches = re.findall(
                        r'''\bENTERPRISE\b|\bTRADE\b|\bEMPIRE\b|\bSTORES\b|\bMACHINERY\b|\bINDUSTRIES\b|\bTECHNOLOGY\b|\bCOMPANY\b|\bDESIGNERS\b|\bPOLYMER\b|\bBELT\b|\bAGRO\b|\bPLASTIC\b|\bGROUP\b|\bTOOLS\b|\bENGG.\b|\bSOLUTION\b|\bCONSTRUCTION\b|\bPACK\b|\bELECT\b|\bSTEEL\b|\bIRON\b|\bDIES\b|\bMOULD\b|\bCORPORATION\b|\bSEEDS\b|\bPOWER\b|\bCONSULTANT\b|\bMFG.\b|\bPRINT\b|\bFOOD\b|\bSOLAR\b|\bINDUSTRY\b|\bLIMITED\b|\bPRIVATE\b|\bPVT\b|\bLTD\b|\bOUTSOURCING\b|\bCNC\b|\bMACHINERIES\b|\bSOLUTIONS\b|\bENGINEERS\b|\bWORKS\b|\bPRODUCTS\b|\bENTERPRISES\b|\bCOMPANIES\b|\bPOLYMERS\b|\bTRADING\b''',
                        line)

                    for i in matches:
                        if i in line:
                            flag = True
                            if flag:
                                o = "OrganizationName--" + line
                                new.append(o)
            #                       if line.startswith('\n'):
            #                           flag = False
            try:
                a = new[0].replace('\n', '')
                final.append(a)
            except IndexError:
                final.append("OrganizationName--")

        # ************************************* CONTACT PERSON *******************************************************************
        def contactpersonname():
            print('contactpersonname working')
            try:
                final.append(
                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace(
                        "]",
                        "") + '/' +
                    PErsons[
                        1].replace(":PER", "").replace('"', ''))
            except IndexError:
                try:
                    final.append(
                        "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]",
                                                                                                        "").replace(
                            '"', ''))
                except IndexError:
                    final.append("CONTACTPERSONNAME--")

        def image_to_text():

            # doc = DocumentFile.from_images(found)
            # result = model(doc)
            # image_to_text.txt = result.render()

            # tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
            # img = Image.open(found)
            # text = tess.image_to_string(img)
            # image_to_text.txt = text
            # print(text)
            import cv2
            img_path = found
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(str(found), img)

            result = ocr.ocr(img_path, cls=True)
            result = result[0]

            txts = [line[1][0] for line in result]

            image_to_text.txt = ""
            for i in txts:
                if len(i) < 4:
                    continue
                    # print(i+"\n")
                image_to_text.txt = image_to_text.txt + str(i) + "\n"
                # print(image_to_text.txt)

        def pdf_to_text():

            from pdfminer.high_level import extract_text
            pdf_to_text.txt = extract_text(found)
            # pdf_to_text.txt= text.replace('\n', ' ')

        extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']

        if extension in extensionlist:
            print('image' + extension)
            image_to_text()
            x = image_to_text.txt

        else:
            print('pdf' + extension)
            pdf_to_text()
            x = pdf_to_text.txt

        verticaltext = x
        htext = x
        # print('------------------------------------------------')
        # print('############################################################# this is verticaltext #################################################################')
        print(verticaltext)
        htext = htext.replace('\n', ' ')
        # print('############################################################# this is htext #############################################################')
        # print(htext)
        y = x.replace('\n', ',')
        y = y.replace('  ', ' ')
        # y = y.replace(".", " .")
        horizontaltext = y
        # print('------------------------------------------------')
        # print('############################################################# this is horizontaltext #############################################################')
        # print(horizontaltext)

        textfile = open("test123456.txt", "w")
        a = textfile.write(verticaltext)
        textfile.close()
        textfile = open("vtext.txt", "w")
        a = textfile.write(horizontaltext)
        textfile.close()
        with open('test123456.txt', 'r') as f:
            with open('test.txt', 'w') as w:
                for line in f:
                    if line.strip().replace('|', ''):
                        w.write(line)

        ###########################ADDRESS##################################
        addrespinlst = []

        def splitaddress():
            import re
            textaddress = htext.replace('\n', ' ')
            # print(textaddress)

            address1 = (textaddress.partition(",")[0])
            words = address1.split()
            address1 = words[-1]
            addre = (htext.partition(",")[2])
            a = addre.replace('\n', ' ').replace('\x0c', '')
            addre = (a.partition(",")[2])
            matches = re.findall(
                r'(.*?)-\d{3} \d{3}|(.*?)\b-\d{6}\b|(.*?)\b\d{6}\b|(.*?)\b\d{3}  \d{3}\b|\b(.*?)-\d{2}\b|(.*?)\b\d{3} \d{3}\b',
                a)
            for match in matches:
                address2 = match
                address2 = str(address2)
                address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace('  ',
                                                                                                                  '')

            matches = re.findall(r'-\d{6}\b|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
            for address3 in matches:
                pass
            try:
                Address = address1 + "," + address2 + "," + address3
                final.append('ADDRESS--' + Address)
                addrespinlst.append(Address)

            except NameError:
                final.append('ADDRESS--')

                # print('############################################################ Addressmodelworking #############################################################')

                # doc = nlp_model1(textaddress)
                # addlist = []
                # for ent in doc.ents:
                #     name = (f'{ent.label_.upper():{10}}--{ent.text}')
                #     addlist.append(name)
                # try:
                #     Address = addlist[0]
                #     final.append(Address)
                #     addrespinlst.append(Address)
                #     remove_list.append(
                #         str(Address).replace("[", "").replace("]", "").replace("\\n", "").replace("'", "").replace(
                #             "ADDRESS--",
                #             ""))
                # except IndexError:
                #     final.append("ADDRESS--")
                pass

        ################################################## website#######################################################

        # import re

        # url = []
        # matches = re.findall(r'www.*', verticaltext)
        # for match in matches:
        #     if (match.count('.')) == 1:
        #         a_string1 = match.replace("www", "www.")

        #         final.append("Urls--" + a_string1)
        #         url.append(a_string1)
        #     else:

        #         final.append("Urls--" + match)

        # if len(url)==0:

        #     from urlextract import URLExtract

        #     extractor = URLExtract()
        #     urls = extractor.find_urls(verticaltext)
        #     try:
        #         urllist = urls[0]
        #         final.append("Urls--"+urllist)
        #         url.append(urllist)
        #     except IndexError:
        #         final.append("Urls--")

        #     for match in matches:
        #         if (match.count('.')) == 1:
        #             a_string1 = match.replace("www", "www.")

        #             final.append("Urls--" + a_string1)
        #             url.append(a_string1)
        #         else:

        #             final.append("Urls--" + match)
        #             url.append(match)
        #             remove_list.append(match)
        # else:
        #     final.append("Urls--" )

        ################################################## website#######################################################

        import re
        # final=[]
        url = []
        urlfinal = []
        matches = re.findall(r'www.*', verticaltext)
        for match in matches:

            if (match.count('.')) == 1:
                a_string1 = match.replace("www", "www.")

                # final.append("Urls--" + a_string1)
                url.append(a_string1)
            else:

                url.append(match)

        if len(url) == 0:

            from urlextract import URLExtract

            extractor = URLExtract()
            urls = extractor.find_urls(verticaltext)
            try:
                urllist = urls[0]
                url.append(urllist)
                url.append(urllist)
            except IndexError:
                pass

            for match in matches:
                if (match.count('.')) == 1:
                    a_string1 = match.replace("www", "www.")

                    url.append(a_string1)
                    # url.append(a_string1)
                else:

                    url.append(match)
                    url.append(match)

        else:
            pass
        try:
            test_string = url[0]

            test_list = ['com', 'www', 'in', 'co', "WWW", "COM", "CO", "IN"]

            res = [ele for ele in test_list if (ele in test_string)]

            if len(res) == 0:
                print('no match')

                final.append('urls--')


            else:
                print('matched')
                final.append('urls--' + url[0])
                urlfinal.append(url[0])


        except IndexError:
            final.append('urls--')

        print(
            '############################################################# url #############################################################')
        print(url)
        #######organisation and contact################

        # def company_url():
        #     # print('--url--')
        #     # print(url)

        #     try:
        #         match = str(url[0]).lower()
        #         match =match.replace('.com','').replace('www.','').replace('.in','').replace('.co','').upper()
        #         final.append("OrganizationName--" + match)
        #         # remove_list.append(match)
        #     except IndexError:
        #         org_name()
        #         organisation()
        # final.append("OrganizationName--")

        # make example sentence

        # print(horizontaltext)
        sentence = Sentence(verticaltext)

        # predict NER tags
        tagger.predict(sentence)

        # print sentence
        ko = (sentence)

        ko1 = str(ko).split("→")
        import pandas as pd

        dfg = []
        try:
            s = ko1[1].replace("", "").replace("", "").replace("/", ":")
        except IndexError:
            os.remove(found)
            return 'Invalid image'
        dfg.append(s)
        df = pd.DataFrame(dfg)
        df = df[0]

        df.to_csv("df.csv", index=False)

        df1 = pd.read_csv("df.csv")
        ve = df1["0"].str.split(",")
        fgf = ve.to_list()
        dfgh = pd.DataFrame(fgf[0])
        maindf = dfgh[0]  # .str.split(":")
        # maindf.to_csv("main.csv")

        main1 = maindf.to_list()
        main1
        # cv=pd.DataFrame(ve)
        # cv
        per = ["PER"]
        org = ["ORG"]
        loc = ["LOC"]
        organizations = [i for i in main1 for j in org if j in i]
        PErsons = [i for i in main1 for j in per if j in i]
        location = [i for i in main1 for j in loc if j in i]

        # ************************************* ORGANIZATION ********************************************************************
        try:
            if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                         '').replace(
                ']', '').replace(
                '.com', '') + " /" + organizations[1].replace(":ORG", "").replace('"', '').replace('.com', ''))) < 4:
                pass
                # company_url()
            else:

                match = str(urlfinal[0]).lower()
                match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                    'https',
                    '').replace(
                    'http', '').replace(":", "").replace("/", "").upper()
                print(match)

                s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']', '').replace(
                    '.com', '') + " /" + \
                      organizations[1].replace(":ORG", "").replace('"', '').replace('.com', '')
                s1 = s1g.upper()
                s2 = match.upper()
                from difflib import SequenceMatcher
                print(s1)
                print(s2)
                print(SequenceMatcher(None, s1, s2).ratio())
                if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                    # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                    final.append(
                        "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                             '').replace(
                            '.com', '').replace(']', '') + " /" + organizations[1].replace(":ORG", "").replace('"',
                                                                                                               '').replace(
                            '.com', '').replace(']', ''))
                else:
                    final.append("OrganizationName--" + s2)


        except IndexError:
            try:
                if len(("OrganizationName--" + organizations[0].replace(":ORG", "").replace('[', '').replace(']',
                                                                                                             '').replace(
                    '"',
                    '').replace(
                    '.com', ''))) < 4:
                    pass
                    # company_url()
                else:

                    match = str(urlfinal[0]).lower()
                    match = match.replace('.com', '').replace('www.', '').replace('.in', '').replace('.co', '').replace(
                        'https', '').replace('http', '').replace(":", "").replace("/", "").upper()

                    s1g = organizations[0].replace(":ORG", "").replace('"', '').replace('[', '').replace(']',
                                                                                                         '').replace(
                        '.com', '')
                    s1 = s1g.upper()
                    s2 = match.upper()
                    from difflib import SequenceMatcher
                    print(s1)
                    print(s2)
                    print(SequenceMatcher(None, s1, s2).ratio())
                    if SequenceMatcher(None, s1, s2).ratio() >= 0.10:
                        # and SequenceMatcher(None, s1, s2).ratio()<0.50:
                        final.append(
                            "OrganizationName--" + organizations[0].replace(":ORG", "").replace('"', '').replace('[',
                                                                                                                 '').replace(
                                ']', '').replace(
                                '.com', '').replace(']', ''))
                    else:
                        final.append("OrganizationName--" + s2)

            except IndexError:
                company()
                # org_name()
                # organisation()

                # final.append("OrganizationName--")
        ################################################### Email######################################################
        import re
        from email_scraper import scrape_emails
        s = list(scrape_emails(horizontaltext))
        email_id1 = s
        import re
        email_id=[]
# Define a function to extract email addresses from a text
        def extract_emails(text):
            email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
            return re.findall(email_pattern, text)

# List of text strings


# Iterate through the list and extract email addresses from each value
        for text in email_id1 :
            email_addresses = extract_emails(text)
    
    # Print the extracted email addresses
            if email_addresses:
       # print("Email addresses in the text:")
                for email in email_addresses:
            #print(email)
                    email_id.append(email)
            
            else:
                print("No email addresses found in the text.")
    
        # Remove "email" if it exists within square brackets
        email_id = [item.replace("email", "").replace("Email", "").replace("E-mail", "") for item in email_id]


        # ************************************* CONTACT PERSON *******************************************************************
        try:
            my_string='Hello'
            print(my_string[-6])
            # final.append(
            #     "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
            #                                                                                                      "") +
            #     PErsons[
            #         1].replace(":PER", "").replace('"', ''))+PErsons[2].replace(":PER", "").replace("[", "").replace('"', '').replace("]","")
                                                                                                                 
        except IndexError:
            try:
                final.append(
                    "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace(
                        '"', ''))
                person_name=PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace('"', '').replace(' ','')
                if not email_id:
                    final.append('ContactEmail--')
                    final.append('OrganizationEmail--')
                else:
                    per_Name=[]
                    per_Name.append(person_name)
                
                    print(email_id)

                    def calculate_matching_percentage(word_list, words):
                        def calculate_single_matching_percentage(word, item):
                            max_length = max(len(word), len(item))
                            word = word.upper()
                            item = item.strip().replace(" ", "").upper()
                            matching_chars = sum(1 for c1, c2 in zip(item, word) if c1 == c2)
                            return (matching_chars / max_length) * 100

                        highest_percentage = 0.0
                        highest_matching_item = None

                        for word in words:
                            word = word.upper()
                            for item in word_list:
                                original_item = item
                                item = item.strip().replace(" ", "").upper()

                                matching_percentage = calculate_single_matching_percentage(word, item)

                                if matching_percentage > highest_percentage:
                                    highest_percentage = matching_percentage
                                    highest_matching_item = original_item

                        return highest_matching_item, highest_percentage

                    word_list = email_id
                    per_Name = [item.split('.')[1] if '.' in item else item for item in per_Name]
                    print(per_Name)

                    word2 = per_Name

                    for word in word2:
                        highest_matching_item, highest_percentage = calculate_matching_percentage(word_list, [word])
                        if highest_matching_item is not None:
                            print(
                                f"For '{word}', the highest matching percentage is {highest_percentage:.2f}% with '{highest_matching_item}'")
                        else:
                            print(f"For '{word}', no matches found.")
                        #final.append('OrganistaionEmail--' + email_id[0])

                    if len(word_list) == 1:

                        if highest_percentage >= 15:
                            print(highest_matching_item)
                            final.append(
                            'ContactEmail--' + str(highest_matching_item).replace("[", "").replace("]", "").replace(
                                "\\n", "").replace("'", ""))
                            final.append('OrganizationEmail--')

                        else:
                            print('not matched')
                            final.append('OrganistaionEmail--' + email_id[0])
                            final.append('ContactEmail--')
                        

                    else:
                        print('it as more elemnt')
                        if highest_percentage >= 15:
                            print(highest_matching_item)
                            final.append('ContactEmail--' + str(highest_matching_item).replace("[", "").replace("]", "").replace("\\n", "").replace("'", ""))


                            # Given list of email addresses
                            email_list = word_list

                            # Email address to remove
                            email_to_remove = highest_matching_item

                            # Check if the email address is in the list before removing it
                            if email_to_remove in email_list:
                                email_list.remove(email_to_remove)
                                print(f"'{email_to_remove}' has been removed from the list.")
                            else:
                                print(f"'{email_to_remove}' is not in the list.")

                            # Print the updated list
                            print("Updated email list:", email_list)
                            final.append('OrganistaionEmail--' + str(email_list[0]).replace("[", "").replace("]", "").replace("\\n","").replace("'", ""))
                        else:
                            final.append('OrganistaionEmail--' + str(email_id[0]) +','+ str(email_id[1]))


            except IndexError:
                # org_name()
                # contactpersonname()
                final.append("CONTACTPERSONNAME--")


                if len(email_id) > 1:
                    final.append(
                'OrganizationEmail--'  + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
                                                                                                                 ""))
                    final.append(
                'ContactEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace(
                    "'",
                    ""))
                else:
                    try:
                        final.append(
                    'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace(
                        "'",
                        ""))
                        final.append('OrganizationEmail--')
                    except IndexError:
                        final.append('ContactEmail--')
                        final.append('OrganizationEmail--')

        ###############address flair#####################

        try:
            print(
                '############################################################# address new code #############################################################')
            loactionlst = ['address', 'factory', 'd.no', 'h.no', 'h. no', 'plot', 'flat', 'plat']
            loclst = [i for i in loactionlst if i in htext.lower()]

            textaddress = htext
            textaddress = textaddress.replace("|", ",")
            textaddress = textaddress.lower()

            nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
            grop = nlp(textaddress)

            citycountry = []
            print('########################### city or country name ###########################')
            d = grop[-1]

            if d['entity_group'] == "COUNTRY":
                print(d["word"])
                citycountry.append(d["word"])
            elif d['entity_group'] == "CITY":
                print(d["word"])
                citycountry.append(d["word"])

            try:
                address1 = loclst[0]
            except IndexError:
                address1 = (textaddress.partition(",")[0])
                words = address1.split()
                address1 = words[-1]

            star_location = address1.lower()
            end_location = citycountry[0].replace("#", "")
            start = star_location
            end = end_location
            s = textaddress.lower()
            middle_address = (s.split(start))[-1].split(end)[0]
            Address = start + middle_address + end
            Address = Address.replace('--', '').title()
            print(Address)
            if Address.count(',') < 2:
                splitaddress()
            else:
                final.append('ADDRESS--' + Address)

            # star_location = location[0].replace(":LOC", "").replace('"', '').replace('[', '')
            # end_location = location[-1].replace(":LOC", "").replace('"', '').replace(']', '')
            # d1 = star_location.split()
            # d2 = end_location.split()
            # d3 = d1[0]
            # d4 = d2[0]
            # start = d3
            # end = d4
            # s = horizontaltext
            # middle_address = ((s.split(start))[1].split(end)[0])
            # Address = d3 + middle_address + d4
            # final.append('ADDRESS--' + Address)
            # addrespinlst.append(Address)


        except IndexError:
            splitaddress()

        ########################################## Designation ###########################################
        import re
        new = []
        with open('test.txt', 'r') as f:
            flag = False
            for line in f:
                line1 = line
                line = line.upper()
                matches = re.findall(
                    r'''\bAPPRENTICE\b|\bEXECUTIVE\b|\bPROPRIETOR\b|\bPARTNER\b|\bMD\b|\bANALYST\b|\bPRACTITIONER\b|\bCUSTOMER\b|\bCOO\b|\bCOACH\b|\bADMINISTRATIVE\b|\bADMINISTRATOR\b|\bAGENT\b|\bHEAD\b|\bCHIEF\b|\bDIRECTOR\b|\bVICE\b|\bPRESIDENT\b|\bMANAGER\b|\bCOORDINATOR\b|\bCOUNSELOR\b|\bSUPERVISOR\b|\bASSISTANT\b|\bSPECIALIST\b|\bARTIST\b|\bWORKER\b|\bCONSULTANT\b|\bREPRESENTATIVE\b|\bARCHITECT\b|\bSTAFF\b|\bMEMBER\b|\bDEVELOPER\b|\bENGINEER\b|\bEXAMINOR\b|\bDOCTOR\b|\bPROFESSOR\b|\bTEACHER\b|\bLEAD\b|\bOFFICER\b|\bCEO\b|\bC.E.O\b|\bJUNIOR\b|\bSENIOR\b|\bPROFESSOR\b|\bSALES\b''',
                    line)
                for match in matches:
                    line = line.replace('-', '')
                    # print(line)
                    o = "Designation--" + line
                    new.append(o)
                    remove_list.append(str(line1).replace('\n', ''))

        try:
            a = new[0].replace('\n', '')
            final.append(a)

        except IndexError:
            final.append("Designation--")

        ###################################################Phone number#################################################
        num = []
        import phonenumbers

        # print(verticaltext)
        numbers = phonenumbers.PhoneNumberMatcher(
            verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', '').replace('-',
                                                                                                         '').replace(
                ' ', ''), "IN")

        for number in numbers:
            number = str(number).split(")")
            num.append(number[1])
            # num.append(number[-1])

        print(num)
        import re

        # Input list of strings
        #         num =[' 7227906777Extn1204634444']

        # Define a regular expression pattern to split when text is present
        pattern = r'[a-zA-Z]+'

        # Function to split a string based on the pattern
        def split_string(text):
            return re.split(pattern, text)

        # Process each line in the list
        split_lines = [split_string(line) for line in num]

        # Flatten the list of lists into a single list
        split_lines = [item for sublist in split_lines for item in sublist]

        # Remove any empty strings
        num = [item for item in split_lines if item]

        # Print the split lines
        print(num)
        if len(num) == 0:
            final.append("ContactNumber--")
            final.append("OrganizationNumber--")
        elif len(num) > 1:
            final.append("ContactNumber--" + num[0].replace(' ', ''))
            final.append("OrganizationNumber--" + num[-1].replace(' ', ''))
        elif len(num) == 1:
            try:
                final.append("ContactNumber--" + num[0].replace(' ', ''))
                final.append("OrganizationNumber--")
            except IndexError:
                final.append("ContactNumber--")
                final.append("OrganizationNumber--")
        print(
            '#############################################################  num #############################################################')
        print(num)
        # try:
        #     final.append("PhoneNumber--" + num[0].replace(' ', ''))
        #     remove_list.append(num[0])
        # except IndexError:
        #     pass
        # try:
        #     final.append("PhoneNumber1--" + num[1].replace(' ', ''))
        #     remove_list.append(num[1])
        # except IndexError:
        #     pass
        # try:
        #     final.append("PhoneNumber2--" + num[2].replace(' ', ''))
        #     remove_list.append(num[2])
        # except IndexError:
        #     pass

        ###############PINCODE############

        pinlst = []
        print(addrespinlst)
        import pgeocode

        # try:
        #     matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', addrespinlst[0])
        #     for i in matche1:
        #         address3 = i.replace(' ', '').replace('-', '')
        #         pinlst.append(address3)
        # except IndexError:

        lst = []
        for i in num:
            i = i[1:]
            lst.append(i)

        infile = r"vtext.txt"
        outfile = r"cleaned_file.txt"
        import glob
        delete_list = lst
        # delete_list = ["firstname1 lastname1","firstname2 lastname2","firstnamen lastnamen",'Director -  Sales  &  Business  Development']
        fin = open(infile, "r+")
        fout = open(outfile, "w+")
        for line12 in fin:
            for word in delete_list:
                line12 = line12.replace(word, "")

            fout.write(line12)
        fin.close()
        # print(line)

        # print(addrespinlst)
        import pgeocode
        # print(line12)
        import re
        matche1 = re.findall(r'-\d{6}\b|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
        for i in matche1:
            address3 = i.replace(' ', '').replace('-', '')
            pinlst.append(address3)

        nomi = pgeocode.Nominatim('IN')
        try:
            a = nomi.query_postal_code(str(pinlst[-1]))
            # print(a)
            b = a.keys()
            c = b.values.tolist()
            d = a.tolist()
            postal_code = "PinCode1" + "--" + d[0]
            final.append(postal_code)
            country_code = c[1] + "--" + str(d[1])
            final.append(country_code)
            place_name = 'LandMark1' + "--" + str(d[2])
            final.append(place_name)
            state_name = c[3] + "--" + str(d[3])
            final.append(state_name)
            state_code = c[4] + "--" + str(d[4])
            final.append(state_code)
            county_name = 'CityName1' + "--" + str(d[5])
            final.append(county_name)

        except (IndexError, NameError):
            final.append("PinCode1--" + " ")
            final.append("country_code--")
            final.append("LandMark1--")
            final.append("state_name--")
            final.append("state_code--")
            final.append("CityName1--")

        ########################################################   json     #####################################################################

        import pandas as pd
        df = pd.DataFrame(final)
        df1 = df[0].str.split('--', expand=True)
        # print(df1)
        df1.rename({df1.columns[-2]: 'Keys'}, axis=1, inplace=True)
        df1.rename({df1.columns[-1]: 'Values'}, axis=1, inplace=True)
        df1['Keys'] = df1['Keys'].str.strip()
        df1.to_csv('path123.csv', index=False)
        df2 = pd.read_csv('path123.csv')
        print(df2)
        if df2['Values'].isnull().all():
            print("Column 'Column2' is empty.")
            return 'Invalid image'
        else:
            pass
        df2 = df2.T
        df2.to_csv('path1.csv', index=False, header=False)
        df1 = pd.read_csv('path1.csv')
        df1.to_json('firstjson1.json', orient="index")
        import json
        with open('firstjson1.json', 'r') as json_file:
            json_load = json.load(json_file)
        #     # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
        nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
        # # print('--------------------------------------------------------------------------')
        # # print(nothing)
        empty = []
        import base64
        name = found
        image = open(name, 'rb')
        image_read = image.read()
        image_64_encode = base64.b64encode(image_read)
        NULL = 'null'
        empty.append("ByteData--" + (NULL).strip('""'))
        image_64_encode = image_64_encode.decode('utf-8')
        empty.append("FileData--" + str(image_64_encode))
        imagedata = name.split("/")
        imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
        imagename1 = str(imagename).split('.')
        imagename = str(imagename1[-2]).replace("[", "]")
        empty.append("FileName--" + imagename)
        empty.append("FilePath--" + "")
        imageExtension = str(imagename1[-1]).replace("[", "]")
        empty.append("FileType--" + imageExtension)
        image.close()
        import pandas as pd
        df = pd.DataFrame(empty)
        df = df[0].str.split("--", expand=True)
        data1 = pd.DataFrame(df[0])
        data2 = pd.DataFrame(df[1])
        dt = data2.set_index(data1[0])
        dt4 = dt.T
        dictionary = dt4.to_dict(orient="index")
        list1 = []
        # list.append(a)
        list1.append(dictionary[1])
        # # final.append("image--"+str(dictionary[1]).replace("\'",'"'))
        print('--------------------')
        # print(namelist)
        import json
        # JSON data:
        x = nothing
        # python object to be appended
        y = {"image": dictionary[1]}
        # parsing JSON string:
        z = json.loads(x)
        # appending the data
        z.update(y)
        # the result is a JSON string:
        # print(json.dumps(z))

        zlist.append(z)
        #############################################creating csv#####################################
    # print(final)

    # print(imagelist)
    # final.append('image--' + str(imagelist))
    #  import requests
    #  import json

    # # url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev
    #  url = "https://qa.bizgaze.com/apis/v4/bizgaze/integrations/businesscards/create/list" #testing
    #  # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test
    #  # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create'
    #  # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01
    #  payload1 = json.dumps(zlist)
    #  # print('--------------------------------------------------------------------------')
    #  #print(payload1)
    #  headers = {
    #      #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a',   #dev
    #      # 'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing
    #      # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1',
    #      # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
    #      #'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo
    #      'Authorization':'Stat e5bc6ad08f2c42feb5f98a2a521d00af',

    #      'Content-Type': 'application/json'
    #  }
    #  response = requests.request("POST", url, headers=headers, data=payload1)
    #  # print("##############################################################")

    #  print(payload1)
    #  #print(zlist)
    #  # import os
    #  # if 'BusinessCards Created Successfully' in response.text:
    #  #     print('present')
    #  #     os.remove(found)
    #  # else:
    #  #     print('not present')

    #  df1.to_json('visitingcard.json')
    #  data = df1.to_json('visiting.json', orient='records')
    #  print(data)

    # return render_template('index.html')

    # return response.text

    return z
    # return zlist


# @app.route('/upload_BusinessCards', methods=["POST"])
# def mainfunction():
#     Dataset = request.get_json()
#     if len(Dataset)==1:
#         # predict(Dataset)
#         return multiplecards(Dataset)
#     else:
#         # multiplecards(Dataset)
#         return multiplecards(Dataset)


###################################################################################   Resume parser  ###################################################################################################

@app.route("/upload_resume", methods=["POST"])
def predict_resume():
    Dataset = request.get_json()
    # data = {'visiting': Dataset}
    # a=url_list[0]
    a = Dataset
    # a = url_list
    # print(a)
    x = a['FileData']
    # print(x)
    y = a['FileName']
    y = y.replace(' ', '')
    y = y.replace('&', '')
    y = y.replace('@', '')
    z = a['FileType']
    # CreatedBy=a['CreatedBy']

    name = y + '.' + z
    print(name)

    # img_data = x.encode()

    img_data = x.encode()

    import base64
    with open('./Resume_parser/upload_resume/' + name, "wb") as fh:
        fh.write(base64.decodebytes(img_data))
    # cmd = "python ./Resume_parser/resume1.0.multiprocessing.py" + " " + str('./Resume_parser/upload_resume/' + name)
    # os.system(cmd)

    # f = "./resume_upload"
    # f = os.listdir(f)
    f = './Resume_parser/upload_resume/' + name
    found = './Resume_parser/upload_resume/' + name
    print('this from resumepy file')
    print(f)

    def docx_to_txt():
        import docx2txt
        import glob
        text = ''
        for file in glob.glob(found):
            c = docx2txt.process(file)
            c = c.rstrip("\n")
            toPrint = c
            d = ' '.join(i for i in toPrint.split())
            d = d.rstrip()
            text += d
        docx_to_txt.text = text

    def doc_to_txt():
        import docx2txt
        import glob
        text = ''
        # for file in glob.glob(found):
        c = docx2txt.process(f)
        c = c.rstrip("\n")
        toPrint = c
        d = ' '.join(i for i in toPrint.split())
        d = d.rstrip()
        text += d
        doc_to_txt.text = text

    def pdf_to_txt():
        import sys
        import fitz
        fname = found
        doc = fitz.open(fname)
        text = ""
        for page in doc:
            text = text + str(page.get_text())
        pdf_to_txt.text = " ".join(text.split('\n'))

    # for file in f:
    print('checking for filetype')
    if f.endswith('.doc'):
        doc_to_txt()
        x = doc_to_txt.text
    elif f.endswith('.docx'):
        docx_to_txt()
        x = docx_to_txt.text
    elif f.endswith('.pdf'):
        pdf_to_txt()
        x = pdf_to_txt.text

    doc = nlp_model(x)
    k = []
    l = []
    for ent in doc.ents:
        # print(f'{ent.label_.upper():{30}}- {ent.text}')
        k.append(ent.label_.upper())
        l.append(ent.text)
    columns = k
    rows = [l]
    import pandas as pd
    data = pd.DataFrame(rows, columns=columns)
    df = data

    data = df.T

    data.to_csv('./Resume_parser/Ad1.csv', index=True)

    data = pd.read_csv('./Resume_parser/Ad1.csv')
    # print(data)
    data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
    data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
    data.to_csv('./Resume_parser/Ad1.csv', index=False)
    #####################################################################################################
    # ModelName = "text-davinci-003"
    # prompt_value = 'find designation in key value pairs from below text?' + "/n" + str(x)
    # max_token_value = 300

    # # usertext= request.get_data()
    # # output = usertext.decode()
    # # print(output)
    # import os
    # import openai

    # # print(usertext)
    # openai.api_key = "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"
    # # userinput='fibonacci series in python'
    # import os
    # import openai

    # # openai.api_key = os.getenv("OPENAI_API_KEY")

    # response_text = openai.Completion.create(
    # model=ModelName,
    # prompt=prompt_value,
    # temperature=0,
    # max_tokens=max_token_value,
    # top_p=1,
    # frequency_penalty=0,
    # presence_penalty=0,
    # stop=["\"\"\""]
    # )
    # a = response_text['choices']
    # data = a[0]['text']
    # data=data.replace('\n','$@$')
    # data=data.replace('$@$$@$','')
    # #data=data.replace(':','')
    # print(data)
    # data=data.replace('Designation','POSITION')
    # data=data.split('$@$')
    # print(data)
    # import pandas as pd
    # desgnaition=pd.DataFrame(data)
    # desgnaition=desgnaition[0].str.split(':',expand=True)
    # desgnaition.columns=['Key','Values']
    # print(desgnaition)

    # data= pd.read_csv('./Resume_parser/Ad1.csv')

    # frames = [data,desgnaition]

    # result = pd.concat(frames,axis=0)
    # result.to_csv('./Resume_parser/Ad1.csv', index=False)

    ########################################################################################################
    # df2 = pd.read_csv('./Ad1.csv')
    x1 = pd.read_csv('D:/projects/C01app/Resume_parser/AD11.csv')
    tp = pd.read_csv('./Resume_parser/Ad1.csv')
    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
    merge = pd.merge(tp, x1, on='Key', how='right')
    merge.to_csv('./Resume_parser/AD.csv', index=False)
    df2 = pd.read_csv('./Resume_parser/AD.csv')
    # print(df2)
    df2 = df2.T

    df2.to_csv('./Resume_parser/path.csv', index=False, header=False)
    df1 = pd.read_csv('./Resume_parser/path.csv')
    df1.to_json('./Resume_parser/firstjson.json', orient="index")
    print(df1)

    doc = nlp_model1(x)
    k = []
    l = []
    for ent in doc.ents:
        # print(f'{ent.label_.upper():{30}}- {ent.text}')
        k.append(ent.label_.upper())
        l.append(ent.text)
    columns = k
    rows = [l]
    data = pd.DataFrame(rows, columns=columns)
    df = data
    data = df.T

    data.to_csv('./Resume_parser/Ad2.csv', index=True)
    data = pd.read_csv('./Resume_parser/Ad2.csv')
    data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
    data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
    data.to_csv('./Resume_parser/Ad2.csv', index=False)
    import pandas as pd
    import json
    dflist = []
    x = pd.read_csv('D:/projects/C01app/Resume_parser/PG.csv')
    tp = pd.read_csv('./Resume_parser/Ad2.csv')
    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
    merge = pd.merge(x, tp, on='Key', how='left')
    import numpy as np
    merge = merge.replace(np.nan, '', regex=True)
    merge.to_csv('./Resume_parser/PGmerge.csv', index=False)

    dfPG = pd.read_csv('./Resume_parser/PGmerge.csv')
    import numpy as np
    dfPG = dfPG.replace({np.nan: None})
    x2 = dfPG.iloc[:, -2].tolist()
    y2 = dfPG.iloc[:, -1].tolist()
    z1 = dict(zip(x2, y2))
    dflist.append(z1)
    # u1 = json.dumps(z1)
    import pandas as pd

    x = pd.read_csv('D:/projects/C01app/Resume_parser/UG.csv')
    tp = pd.read_csv('./Resume_parser/Ad2.csv')
    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
    merge = pd.merge(x, tp, on='Key', how='left')
    import numpy as np
    merge = merge.replace(np.nan, '', regex=True)
    merge.to_csv('./Resume_parser/UGmerge.csv', index=False)

    dfUG = pd.read_csv('./Resume_parser/UGmerge.csv')
    import numpy as np
    dfUG = dfUG.replace({np.nan: None})
    x2 = dfUG.iloc[:, -2].tolist()
    y2 = dfUG.iloc[:, -1].tolist()
    z2 = dict(zip(x2, y2))
    dflist.append(z2)
    # u2 = json.dumps(z2)
    # final = '[' + str(z1) + ',' + str(z2) + ']'
    # return render_template('resume.html')

    ############################################################################
    import pandas as pd

    x = pd.read_csv('D:/projects/C01app/Resume_parser/inter.csv')
    tp = pd.read_csv('./Resume_parser/Ad2.csv')
    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
    merge = pd.merge(x, tp, on='Key', how='left')
    import numpy as np
    merge = merge.replace(np.nan, '', regex=True)
    merge.to_csv('./Resume_parser/intermerge.csv', index=False)

    dfinter = pd.read_csv('./Resume_parser/intermerge.csv')
    import numpy as np
    dfinter = dfinter.replace({np.nan: None})
    x2 = dfinter.iloc[:, -2].tolist()
    y2 = dfinter.iloc[:, -1].tolist()
    z3 = dict(zip(x2, y2))
    dflist.append(z3)

    ############################################################################
    import pandas as pd

    x = pd.read_csv('D:/projects/C01app/Resume_parser/SSC.csv')
    tp = pd.read_csv('./Resume_parser/Ad2.csv')
    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
    merge = pd.merge(x, tp, on='Key', how='left')
    import numpy as np
    merge = merge.replace(np.nan, '', regex=True)
    merge.to_csv('./Resume_parser/sscmerge.csv', index=False)

    dfssc = pd.read_csv('./Resume_parser/sscmerge.csv')
    import numpy as np
    dfssc = dfssc.replace({np.nan: None})
    x2 = dfssc.iloc[:, -2].tolist()
    y2 = dfssc.iloc[:, -1].tolist()
    z4 = dict(zip(x2, y2))
    dflist.append(z4)
    ############################################Document############################################################
    import base64
    empty = []
    name = f
    image = open(name, 'rb')
    image_read = image.read()
    image_64_encode = base64.b64encode(image_read)
    NULL = 'null'
    # empty.append("ByteData--" + (NULL).strip('""'))
    image_64_encode = image_64_encode.decode('utf-8')
    empty.append("FileData--" + str(image_64_encode))
    imagedata = name.split("/")
    imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
    imagename1 = str(imagename).split('.')

    imagename = str(imagename1[-2]).replace("[", "]")
    empty.append("FileName--" + imagename)
    empty.append("FilePath--" + "")
    imageExtension = str(imagename1[-1]).replace("[", "]")
    empty.append("FileType--" + imageExtension)

    import pandas as pd
    df = pd.DataFrame(empty)
    df = df[0].str.split("--", expand=True)
    data1 = pd.DataFrame(df[0])
    data2 = pd.DataFrame(df[1])
    dt = data2.set_index(data1[0])

    dt4 = dt.T
    list = []
    dictionary = dt4.to_dict(orient="index")

    a = {
        "FileId": 0,
        "FileData": "",
        "FileName": "",
        "FileType": "",
        "RefId": 0
    }
    list = []

    list.append(a)
    list.append(dictionary[1])

    import json

    with open('./Resume_parser/firstjson.json', 'r') as json_file:
        json_load = json.load(json_file)

        # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"

    nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
    import json

    # JSON data:
    x = nothing

    # python object to be appended
    y = {"EducationDetails": dflist}
    y1 = {"Document": list}
    print(y)
    # parsing JSON string:
    z = json.loads(x)

    # appending the data
    z.update(y)
    z.update(y1)

    # the result is a JSON string:
    # print(json.dumps(z))
    print('##########################')
    # print(z)
    print('##########################')
    import requests
    import json

    # with open('visitingcard1.json', 'r') as json_file:
    #     json_load = json.load(json_file)
    # url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/resumeparsing/save"  #dev
    # # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/resumeparsing/save"
    # #url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/resumeparsing/save"  #testing
    # payload1 = json.dumps(z)
    # print('--------------------------------------------------------------------------')
    # # print(payload1)
    # headers = {
    #     # 'Authorization': 'stat 53f27e671adf456e974f1d11ceb5db41',
    #      #'Authorization': 'stat 5702ce5a77d34e0381bc2f06588d9bcc',#dev
    #     'Authorization': 'stat ed5dd14ee2094227849f6bbe2928bff3', #testing
    #     'Content-Type': 'application/json'
    # }
    # response = requests.request("POST", url, headers=headers, data=payload1)
    # print("##############################################################")

    # print(response.text)
    # function_1.var=response
    # a=str(response.text)

    files = glob.glob('./resume_upload/*')
    for f in files:
        os.remove(f)

    return z
    # return 'done'


# return render_template('resume.html')


# @app.route('/upload_resume', methods=["POST"])
def upload_resume():
    if __name__ == "__main__":
        # print(os.getpid())

        url_list = []
        Dataset = request.get_json()
        # id = "100013660000125"
        url_list.append(Dataset)
        # multiprocessing
        with multiprocessing.Pool(processes=1) as pool:
            results = pool.map(predict_resume, url_list)

        pool.close()
        return results[0]


@app.route("/Download_resume")
def Download_resume():
    # try:
    with open("Ad1.csv", encoding="unicode_escape") as fp:
        csv = fp.read()
        return Response(csv, mimetype="text/csv", headers={"Content-disposition": "attachment; filename=Resume.csv"})


##############################################################################   Invoice Parser   ###################################################################################################

@app.route('/upload_invoice', methods=["POST", "GET"])
def upload_invoice():
    Dataset = request.get_json()
    # data = {'visiting': Dataset}
    # a=url_list[0]
    a = Dataset

    x = a['FileData']
    # print(x)
    y = a['FileName']
    z = a['FileType']
    # CreatedBy=a['CreatedBy']

    name = y + '.' + z
    print(name)

    img_data = x.encode()

    import base64
    with open('./Invoice_parser/upload_invoice/' + name, "wb") as fh:
        fh.write(base64.decodebytes(img_data))

    # cmd = "python ./Invoice_parser/invoice.multiprocessing.py" + " " + str('./Invoice_parser/upload_invoice/' + name)
    # os.system(cmd)
    #####################################################################################################################################

    name = './Invoice_parser/upload_invoice/' + name
    extension = name.split('.')[-1]

    def image_to_text():
        print('#######################  image-to-pdf   ################')

        import cv2
        import numpy as np
        fname = name
        print(fname)
        import pytesseract as tess
        from PIL import Image

        tess.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
        img = cv2.imread(fname)
        # img = cv2.resize(img, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)

        # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # kernel = np.ones((1, 1), np.uint8)
        # img = cv2.dilate(img, kernel, iterations=1)
        # img = cv2.erode(img, kernel, iterations=1)

        # img=cv2.threshold(cv2.GaussianBlur(img, (5, 5), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        pdf = tess.image_to_pdf_or_hocr(img, extension="pdf")
        with open(Current_Working_Directory + "/Invoice_parser/demo.pdf", "w+b", ) as f:
            f.write(pdf)
        print('demo created')
        import fitz
        fname = Current_Working_Directory + '/Invoice_parser/demo.pdf'
        doc = fitz.open(fname)
        text = ""
        for page in doc:
            text = text + str(page.get_text())
        image_to_text.text = " ".join(text.split("\n"))

        # result = ocr.ocr( Current_Working_Directory + "/Invoice_parser/demo.pdf" , cls=True)
        # result = result[0]

        # txts = [line[1][0] for line in result]

        # image_to_text.text = ""
        # for i in txts:
        #     if len(i) < 4:
        #         continue
        #         # print(i+"\n")
        #     image_to_text.text = image_to_text.text + str(i) + "\n"

    def pdf_to_text():
        import fitz
        fname = name
        doc = fitz.open(fname)
        text = ""
        for page in doc:
            text = text + str(page.get_text())
        pdf_to_text.text = " ".join(text.split("\n"))

    extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']

    if extension in extensionlist:
        print('image' + extension)
        image_to_text()
        x = image_to_text.text

    else:
        print('pdf' + extension)
        pdf_to_text()
        x = pdf_to_text.text

    import spacy
    import sys
    # import fitz
    # fname = "uploads/0.pdf"
    # doc = fitz.open(fname)
    # text = ""
    # for page in doc:
    #     text = text + str(page.get_text())
    # fitz = " ".join(text.split("\n"))
    # # print(fitz)
    import pandas as pd

    doc = nlp_model1(x)
    k = []
    l = []
    for ent in doc.ents:
        # print(f"{ent.label_.upper():{30}}- {ent.text}")
        k.append(ent.label_.upper())
        l.append(ent.text)
    columns = k
    rows = [l]
    data = pd.DataFrame(rows, columns=columns)
    df = data
    df = data.T

    df.to_csv(Current_Working_Directory + "/Invoice_parser/Invoice.csv")
    import pandas as pd
    df = pd.read_csv(Current_Working_Directory + "/Invoice_parser/Invoice.csv")
    # df.head()
    # df = df.T
    # new_header = df.iloc[0]  # grab the first row for the header
    # df = df[1:]  # take the data less the header row
    # df.columns = new_header
    # def df_column_uniquify(df):
    #     df_columns = df.columns
    #     new_columns = []
    #     for item in df_columns:
    #         counter = 0
    #         newitem = item
    #         while newitem in new_columns:
    #             counter += 1
    #             newitem = "{}_{}".format(item, counter)
    #         new_columns.append(newitem)
    #     df.columns = new_columns
    #     return df.T
    # df = df_column_uniquify(df)
    # # df=df.T
    # df.to_csv('final.csv')
    # df = pd.read_csv('final.csv')
    df.rename({df.columns[-2]: 'Key'}, axis=1, inplace=True)
    df.rename({df.columns[-1]: 'Values'}, axis=1, inplace=True)
    df['Key'] = df['Key'].str.replace('/', '')
    df['Key'] = df['Key'].str.replace(' ', '')
    df.to_csv(Current_Working_Directory + '/Invoice_parser/final.csv', index=False)
    import pandas as pd
    x1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final.csv')
    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/finalwithcolen.csv')
    merge = pd.merge(x1, tp, on='Key', how='right')
    merge1 = merge

    merge['Values'] = merge['Values'].astype(str)
    merge = merge['Values'].str.split(":", expand=True)
    merge.rename({merge.columns[-1]: 'Values'}, axis=1, inplace=True)
    frames = [merge1['Key'], merge['Values']]
    result = pd.concat(frames, axis=1)
    x1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final.csv')
    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/finalwithoutcolen.csv')
    merged = pd.merge(x1, tp, on='Key', how='right')
    frames = [result, merged]
    result1 = pd.concat(frames)
    result1.to_csv(Current_Working_Directory + '/Invoice_parser/final1.csv', index=False)

    x1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/main.csv')
    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final1.csv')
    # tp = pd.read_csv(Current_Working_Directory + 'Invoice_parser/final.csv')
    tp['Key'] = tp['Key'].astype(str)
    tp['Values'] = tp['Values'].astype(str)
    tp['Key'] = tp['Key'].str.strip()
    tp['Values'] = tp['Values'].str.strip()

    merge = pd.merge(tp, x1, on='Key', how='right')
    merge.to_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv', index=False)
    df2 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv')

    # Import writer class from csv module
    from csv import writer

    List = ['PlantCode', " "]
    with open(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv', 'a') as f_object:
        writer_object = writer(f_object)
        writer_object.writerow(List)
        f_object.close()
        # print(df2)
    df2 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv')
    print(df2)
    df2 = df2.T

    df2.to_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv', index=False, header=False)

    df1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv')
    df1.to_json(Current_Working_Directory + '/Invoice_parser/firstjson.json', orient="index")
    import pandas as pd
    x = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final.csv')
    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/item1.csv')
    x['Values'] = x['Values'].str.strip()
    merge = pd.merge(tp, x, on='Key', how='inner')
    merge = merge.groupby('Key').agg({
        'Values': '/'.join,
    }).reset_index()
    z = merge['Values'].str.split('/', expand=True)
    frames = [merge, z]
    result1 = pd.concat(frames, axis=1)
    result1 = result1.drop(['Values'], axis=1)
    import pandas as pd
    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/item1.csv')
    merge = pd.merge(tp, result1, on='Key', how='inner')
    merge = merge.T
    new_header = merge.iloc[0]  # grab the first row for the header
    merge = merge[1:]  # take the data less the header row
    merge.columns = new_header

    merge = merge.to_dict('records')
    invoice_Item = merge
    print(invoice_Item)

    ####################################Document############################################################

    import base64
    empty = []
    # name = found
    image = open(name, 'rb')
    image_read = image.read()
    image_64_encode = base64.b64encode(image_read)
    NULL = 'null'
    # empty.append("ByteData--" + (NULL).strip('""'))
    image_64_encode = image_64_encode.decode('utf-8')
    empty.append("FileData--" + str(image_64_encode))
    imagedata = name.split("/")
    imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
    imagename1 = str(imagename).split('.')
    imagename = str(imagename1[-2]).replace("[", "]")
    empty.append("FileName--" + imagename)
    empty.append("FilePath--" + name)
    imageExtension = str(imagename1[-1]).replace("[", "]")
    empty.append("FileType--" + imageExtension)
    import pandas as pd
    df = pd.DataFrame(empty)
    df = df[0].str.split("--", expand=True)
    data1 = pd.DataFrame(df[0])
    data2 = pd.DataFrame(df[1])
    dt = data2.set_index(data1[0])
    dt4 = dt.T
    list = []
    dictionary = dt4.to_dict(orient="index")

    a = {
        "FileId": 0,
        "FileData": "",
        "FileName": "",
        "FileType": "",
        "RefId": 0
    }
    list = []
    list.append(a)
    list.append(dictionary[1])
    import json
    with open(Current_Working_Directory + '/Invoice_parser/firstjson.json', 'r') as json_file:
        json_load = json.load(json_file)
        # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
    nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
    import json

    # JSON data:
    x = nothing
    # python object to be appended
    y = {"InvoiceItems": invoice_Item}
    y1 = {"Document": list}
    # parsing JSON string:
    z = json.loads(x)
    # appending the data
    z.update(y)
    z.update(y1)
    # print(z)
    # the result is a JSON string:
    # print(json.dumps(z))
    # print('##########################')
    # print(z)
    # print('##########################')
    # import requests
    # import json
    # # with open('visitingcard1.json', 'r') as json_file:
    # #     json_load = json.load(json_file)
    # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
    # #url="https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
    # payload1 = json.dumps(z)
    # print('--------------------------------------------------------------------------')
    # print(payload1)
    # headers = {
    #     'Authorization': 'stat 089166c35d4c4d7d941c99d6f8986834',
    #     'Content-Type': 'application/json'
    # }
    # response = requests.request("POST", url, headers=headers, data=payload1)
    # print("##############################################################")
    # print(response.text)
    # import glob
    # files = glob.glob(
    #     "upload_invoice/*"
    # )
    # for f in files:
    #     os.remove(f)
    # files = glob.glob(
    #     "uploads/*"
    # )
    # for f in files:
    #     os.remove(f)

    return z

    # return render_template('invoice.html')


@app.route("/Download_invoice")
def Download_invoice():
    pass


@app.route("/Table")
def Table():
    pass


if __name__ == "__main__":
    app.run(host='0.0.0.0', port=1112)