From 05cefc1ac2afbf0a11b770e8bc7f9e513653a7e0 Mon Sep 17 00:00:00 2001
From: SadhulaSaiKumar <sagar96767@gmail.com>
Date: Thu, 11 Jan 2024 04:41:41 +0000
Subject: [PATCH] Update 'Business_cards/Business_cards.py'

---
 Business_cards/Business_cards.py | 1155 ++++--------------------------
 1 file changed, 122 insertions(+), 1033 deletions(-)

diff --git a/Business_cards/Business_cards.py b/Business_cards/Business_cards.py
index 24a6185..70c3c19 100644
--- a/Business_cards/Business_cards.py
+++ b/Business_cards/Business_cards.py
@@ -15,11 +15,6 @@ from functools import partial
 from urlextract import URLExtract
 import pytesseract as tess
 from PIL import Image
-# from doctr.io import DocumentFile
-# from doctr.models import ocr_predictor
-# model = ocr_predictor(pretrained=True)
-# load tagger
-######################################################
 import os
 import glob
 
@@ -29,20 +24,16 @@ import cv2
 import matplotlib
 from werkzeug.utils import secure_filename
 import requests
-import spacy
+#import spacy
 import time
 import multiprocessing
 from PIL import Image
 from functools import partial
-# nlp_model = spacy.load("D:/projects/C01app/Resume_parser/ME")
-# nlp_model1 = spacy.load("D:/projects/C01app/Resume_parser/bdeeducation_50_0.2")
-from flask import Flask, render_template, request, redirect, Response, send_file
 
 import pandas as pd
-
 ################################################################
-Current_Working_Directory = os.getcwd()
-Current_Working_Directory = Current_Working_Directory.replace("\\", "/")
+Current_Working_Directory=os.getcwd()
+Current_Working_Directory=Current_Working_Directory.replace("\\","/")
 # nlp_model1 = spacy.load(Current_Working_Directory + "/Invoice_parser/p")
 
 ################################################################
@@ -58,9 +49,9 @@ model = AutoModelForTokenClassification.from_pretrained("ml6team/bert-base-uncas
 
 from paddleocr import PaddleOCR, draw_ocr
 
-ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=False)
+ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=True)
 tagger = SequenceTagger.load("flair/ner-english-large")
-# tagger.to("cuda")
+
 import datetime
 
 app = Flask(__name__)
@@ -68,23 +59,7 @@ app = Flask(__name__)
 
 # app.config["IMAGE_UPLOADS"] = "C:/inetpub/wwwroot/FlaskApplication/Flask_Demo/upload/"
 
-
 @app.route('/', methods=['GET'])
-def home():
-    return render_template('home.html')
-
-
-@app.route('/resume', methods=['GET'])
-def resume():
-    return render_template('resume.html')
-
-
-@app.route('/invoice', methods=['GET'])
-def invoice():
-    return render_template('invoice.html')
-
-
-@app.route('/card', methods=['GET'])
 def card():
     return render_template('card.html')
 
@@ -94,11 +69,13 @@ def card():
 def multiplecards():
     # print('################## multiple card detection #######################')
     # print(Dataset)
-    datalist = []
-    zlist = []
+    from pathlib import Path
+    Path("multicards").mkdir(exist_ok=True)
+    datalist=[]
+    zlist=[]
     Dataset = request.get_json()
     # print(data)
-    # datalist.append(Dataset)
+    #datalist.append(Dataset)
     data = {'visiting': Dataset}
     for i in data['visiting']:
         import time
@@ -186,7 +163,7 @@ def multiplecards():
                 import pytesseract as tess
                 from PIL import Image
 
-                tess.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+                tess.pytesseract.tesseract_cmd = r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
                 pdf = tess.image_to_pdf_or_hocr(fname, extension="pdf")
                 with open("demo.pdf", "w+b", ) as f:
                     f.write(pdf)
@@ -431,18 +408,18 @@ def multiplecards():
         verticaltext = x
         htext = x
         # print('------------------------------------------------')
-        # print('############################################################# this is verticaltext #################################################################')
-        print(verticaltext)
+        #print('############################################################# this is verticaltext #################################################################')
+       # print(verticaltext)
         htext = htext.replace('\n', ' ')
-        # print('############################################################# this is htext #############################################################')
-        # print(htext)
+       # print('############################################################# this is htext #############################################################')
+        #print(htext)
         y = x.replace('\n', ',')
         y = y.replace('  ', ' ')
         # y = y.replace(".", " .")
         horizontaltext = y
         # print('------------------------------------------------')
-        # print('############################################################# this is horizontaltext #############################################################')
-        # print(horizontaltext)
+        #print('############################################################# this is horizontaltext #############################################################')
+        #print(horizontaltext)
 
         textfile = open("test123456.txt", "w")
         a = textfile.write(verticaltext)
@@ -479,7 +456,7 @@ def multiplecards():
                 address2 = address2.replace("'", "").replace("(", "").replace(")", "").replace(', ,', '').replace('  ',
                                                                                                                   '')
 
-            matches = re.findall(r'-\d{6}\b|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
+            matches = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|\b-\d{2}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', a)
             for address3 in matches:
                 pass
             try:
@@ -488,9 +465,9 @@ def multiplecards():
                 addrespinlst.append(Address)
 
             except NameError:
-                final.append('ADDRESS--')
 
-                # print('############################################################ Addressmodelworking #############################################################')
+                print(
+                    '############################################################ Addressmodelworking #############################################################')
 
                 # doc = nlp_model1(textaddress)
                 # addlist = []
@@ -755,177 +732,28 @@ def multiplecards():
                         final.append("OrganizationName--" + s2)
 
             except IndexError:
-                company()
-                # org_name()
-                # organisation()
+                org_name()
+                organisation()
 
                 # final.append("OrganizationName--")
-        ################################################### Email######################################################
-        import re
-        from email_scraper import scrape_emails
-        s = list(scrape_emails(horizontaltext))
-        email_id1 = s
-        import re
-        email_id=[]
-# Define a function to extract email addresses from a text
-        def extract_emails(text):
-            email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
-            return re.findall(email_pattern, text)
-
-# List of text strings
-
-
-# Iterate through the list and extract email addresses from each value
-        for text in email_id1 :
-            email_addresses = extract_emails(text)
-    
-    # Print the extracted email addresses
-            if email_addresses:
-       # print("Email addresses in the text:")
-                for email in email_addresses:
-            #print(email)
-                    email_id.append(email)
-            
-            else:
-                print("No email addresses found in the text.")
-    
-        # Remove "email" if it exists within square brackets
-        email_id = [item.replace("email", "").replace("Email", "").replace("E-mail", "") for item in email_id]
-
-
 
         # ************************************* CONTACT PERSON *******************************************************************
         try:
-            my_string='Hello'
-            print(my_string[-6])
-            # final.append(
-            #     "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
-            #                                                                                                      "") +
-            #     PErsons[
-            #         1].replace(":PER", "").replace('"', ''))+PErsons[2].replace(":PER", "").replace("[", "").replace('"', '').replace("]","")
-                                                                                                                 
+            final.append(
+                "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace('"', '').replace("]",
+                                                                                                                 "") +
+                PErsons[
+                    1].replace(":PER", "").replace('"', ''))
         except IndexError:
             try:
                 final.append(
                     "CONTACTPERSONNAME--" + PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace(
-                        '"', ''))
-                person_name=PErsons[0].replace(":PER", "").replace("[", "").replace("]", "").replace('"', '').replace(' ','')
-                if not email_id:
-                    final.append('ContactEmail--')
-                    final.append('OrganizationEmail--')
-                else:
-                    per_Name=[]
-                    per_Name.append(person_name)
-                
-                    print(email_id)
-
-                    def calculate_matching_percentage(word_list, words):
-                        def calculate_single_matching_percentage(word, item):
-                            max_length = max(len(word), len(item))
-                            word = word.upper()
-                            item = item.strip().replace(" ", "").upper()
-                            matching_chars = sum(1 for c1, c2 in zip(item, word) if c1 == c2)
-                            return (matching_chars / max_length) * 100
-
-                        highest_percentage = 0.0
-                        highest_matching_item = None
-
-                        for word in words:
-                            word = word.upper()
-                            for item in word_list:
-                                original_item = item
-                                item = item.strip().replace(" ", "").upper()
-
-                                matching_percentage = calculate_single_matching_percentage(word, item)
-
-                                if matching_percentage > highest_percentage:
-                                    highest_percentage = matching_percentage
-                                    highest_matching_item = original_item
-
-                        return highest_matching_item, highest_percentage
-
-                    word_list = email_id
-                    per_Name = [item.split('.')[1] if '.' in item else item for item in per_Name]
-                    print(per_Name)
-
-                    word2 = per_Name
-
-                    for word in word2:
-                        highest_matching_item, highest_percentage = calculate_matching_percentage(word_list, [word])
-                        if highest_matching_item is not None:
-                            print(
-                                f"For '{word}', the highest matching percentage is {highest_percentage:.2f}% with '{highest_matching_item}'")
-                        else:
-                            print(f"For '{word}', no matches found.")
-                        #final.append('OrganistaionEmail--' + email_id[0])
-
-                    if len(word_list) == 1:
-
-                        if highest_percentage >= 15:
-                            print(highest_matching_item)
-                            final.append(
-                            'ContactEmail--' + str(highest_matching_item).replace("[", "").replace("]", "").replace(
-                                "\\n", "").replace("'", ""))
-                            final.append('OrganizationEmail--')
-
-                        else:
-                            print('not matched')
-                            final.append('OrganistaionEmail--' + email_id[0])
-                            final.append('ContactEmail--')
-                        
-
-                    else:
-                        print('it as more elemnt')
-                        if highest_percentage >= 15:
-                            print(highest_matching_item)
-                            final.append('ContactEmail--' + str(highest_matching_item).replace("[", "").replace("]", "").replace("\\n", "").replace("'", ""))
-
-
-                            # Given list of email addresses
-                            email_list = word_list
-
-                            # Email address to remove
-                            email_to_remove = highest_matching_item
-
-                            # Check if the email address is in the list before removing it
-                            if email_to_remove in email_list:
-                                email_list.remove(email_to_remove)
-                                print(f"'{email_to_remove}' has been removed from the list.")
-                            else:
-                                print(f"'{email_to_remove}' is not in the list.")
-
-                            # Print the updated list
-                            print("Updated email list:", email_list)
-                            final.append('OrganistaionEmail--' + str(email_list[0]).replace("[", "").replace("]", "").replace("\\n","").replace("'", ""))
-                        else:
-                            final.append('OrganistaionEmail--' + str(email_id[0]) +','+ str(email_id[1]))
-
-
+                        '"',
+                        ''))
             except IndexError:
-                # org_name()
-                # contactpersonname()
-                final.append("CONTACTPERSONNAME--")
-
-
-                if len(email_id) > 1:
-                    final.append(
-                'OrganizationEmail--'  + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
-                                                                                                                 ""))
-                    final.append(
-                'ContactEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace(
-                    "'",
-                    ""))
-                else:
-                    try:
-                        final.append(
-                    'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace(
-                        "'",
-                        ""))
-                        final.append('OrganizationEmail--')
-                    except IndexError:
-                        final.append('ContactEmail--')
-                        final.append('OrganizationEmail--')
-
+                org_name()
+                contactpersonname()
+                # final.append("CONTACTPERSONNAME--")
         ###############address flair#####################
 
         try:
@@ -1022,39 +850,12 @@ def multiplecards():
 
         # print(verticaltext)
         numbers = phonenumbers.PhoneNumberMatcher(
-            verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', '').replace('-',
-                                                                                                         '').replace(
-                ' ', ''), "IN")
+            verticaltext.replace('+91', '').replace('(0)', '').replace('(', '').replace(')', ''), "IN")
 
         for number in numbers:
             number = str(number).split(")")
             num.append(number[1])
             # num.append(number[-1])
-
-        print(num)
-        import re
-
-        # Input list of strings
-        #         num =[' 7227906777Extn1204634444']
-
-        # Define a regular expression pattern to split when text is present
-        pattern = r'[a-zA-Z]+'
-
-        # Function to split a string based on the pattern
-        def split_string(text):
-            return re.split(pattern, text)
-
-        # Process each line in the list
-        split_lines = [split_string(line) for line in num]
-
-        # Flatten the list of lists into a single list
-        split_lines = [item for sublist in split_lines for item in sublist]
-
-        # Remove any empty strings
-        num = [item for item in split_lines if item]
-
-        # Print the split lines
-        print(num)
         if len(num) == 0:
             final.append("ContactNumber--")
             final.append("OrganizationNumber--")
@@ -1087,6 +888,42 @@ def multiplecards():
         # except IndexError:
         #     pass
 
+        ################################################### Email######################################################
+        import re
+        from email_scraper import scrape_emails
+        s = list(scrape_emails(horizontaltext))
+        email_id = s
+
+        # email_id = []
+        # matches = re.findall(r'[\w\.-]+@[\w\.-]+', verticaltext)
+        # for match in matches:
+        #     email_id.append(match)
+
+        #     # final.append('Email--' + match)
+        #     email_ = str(email_id).replace("[", "").replace("]", "").replace("'", "")
+        #     # final.append(email_)
+
+        #     # final.append('Email--' + email_)
+        #     # remove_list.append(email_)
+        if len(email_id) > 1:
+            final.append(
+                'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace("'",
+                                                                                                                 ""))
+            final.append(
+                'OrganizationEmail--' + str(email_id[-1]).replace("[", "").replace("]", "").replace("\\n", "").replace(
+                    "'",
+                    ""))
+        else:
+            try:
+                final.append(
+                    'ContactEmail--' + str(email_id[0]).replace("[", "").replace("]", "").replace("\\n", "").replace(
+                        "'",
+                        ""))
+                final.append('OrganizationEmail--')
+            except IndexError:
+                final.append('ContactEmail--')
+                final.append('OrganizationEmail--')
+
         ###############PINCODE############
 
         pinlst = []
@@ -1122,9 +959,9 @@ def multiplecards():
 
         # print(addrespinlst)
         import pgeocode
-        # print(line12)
+        #print(line12)
         import re
-        matche1 = re.findall(r'-\d{6}\b|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
+        matche1 = re.findall(r'-\d{6}|\b\d{6}\b|\b\d{3}  \d{3}\b|-\d{3} \d{3}|\b\d{3} \d{3}\b', line12)
         for i in matche1:
             address3 = i.replace(' ', '').replace('-', '')
             pinlst.append(address3)
@@ -1150,7 +987,7 @@ def multiplecards():
             final.append(county_name)
 
         except (IndexError, NameError):
-            final.append("PinCode1--" + " ")
+            final.append("PinCode1--")
             final.append("country_code--")
             final.append("LandMark1--")
             final.append("state_name--")
@@ -1169,11 +1006,6 @@ def multiplecards():
         df1.to_csv('path123.csv', index=False)
         df2 = pd.read_csv('path123.csv')
         print(df2)
-        if df2['Values'].isnull().all():
-            print("Column 'Column2' is empty.")
-            return 'Invalid image'
-        else:
-            pass
         df2 = df2.T
         df2.to_csv('path1.csv', index=False, header=False)
         df1 = pd.read_csv('path1.csv')
@@ -1200,7 +1032,7 @@ def multiplecards():
         imagename1 = str(imagename).split('.')
         imagename = str(imagename1[-2]).replace("[", "]")
         empty.append("FileName--" + imagename)
-        empty.append("FilePath--" + "")
+        empty.append("FilePath--"+ "")
         imageExtension = str(imagename1[-1]).replace("[", "]")
         empty.append("FileType--" + imageExtension)
         image.close()
@@ -1229,802 +1061,59 @@ def multiplecards():
         z.update(y)
         # the result is a JSON string:
         # print(json.dumps(z))
-
+        
         zlist.append(z)
         #############################################creating csv#####################################
-    # print(final)
+        #print(final)
+        #print(imagelist)
+        #final.append('image--' + str(imagelist))
+       #  import requests
+       #  import json
+
+       # # url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev
+       #  url = "https://qa.bizgaze.com/apis/v4/bizgaze/integrations/businesscards/create/list" #testing
+       #  # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test
+       #  # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create'
+       #  # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01
+       #  payload1 = json.dumps(zlist)
+       #  # print('--------------------------------------------------------------------------')
+       #  #print(payload1)
+       #  headers = {
+       #      #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a',   #dev
+       #      # 'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing
+       #      # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1',
+       #      # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
+       #      #'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo
+       #      'Authorization':'Stat e5bc6ad08f2c42feb5f98a2a521d00af',
+
+
+       #      'Content-Type': 'application/json'
+       #  }
+       #  response = requests.request("POST", url, headers=headers, data=payload1)
+       #  # print("##############################################################")
+
+       #  print(payload1)
+       #  #print(zlist)
+       #  # import os
+       #  # if 'BusinessCards Created Successfully' in response.text:
+       #  #     print('present')
+       #  #     os.remove(found)
+       #  # else:
+       #  #     print('not present')
+
+       #  df1.to_json('visitingcard.json')
+       #  data = df1.to_json('visiting.json', orient='records')
+       #  print(data)
+
+        #return render_template('index.html')
+       
+
+    #return response.text
+    #return z
+    return zlist
 
-    # print(imagelist)
-    # final.append('image--' + str(imagelist))
-    #  import requests
-    #  import json
 
-    # # url = "https://anwi.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create/list" #dev
-    #  url = "https://qa.bizgaze.com/apis/v4/bizgaze/integrations/businesscards/create/list" #testing
-    #  # url = "https://test.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create" # test
-    #  # url='http://localhost:3088/apis/v4/bizgaze/integrations/businesscards/create'
-    #  # url = 'https://c01.bizgaze.app/apis/v4/bizgaze/integrations/businesscards/create'  # C01
-    #  payload1 = json.dumps(zlist)
-    #  # print('--------------------------------------------------------------------------')
-    #  #print(payload1)
-    #  headers = {
-    #      #'Authorization': 'stat 1a936137490040c997928f485e3cdd7a',   #dev
-    #      # 'Authorization': 'stat 16516391d0074f4c8a15ea16fb49470b',#testing
-    #      # 'Authorization': 'stat 08e55fcfbaa940c8ab8145a074c444d1',
-    #      # 'Authorization': 'stat f7cdb402e01e44e5842878653946168f',  # c01
-    #      #'Authorization': 'Stat c3e11b2fcbfe455b86a1fe6efde02a69',#demo
-    #      'Authorization':'Stat e5bc6ad08f2c42feb5f98a2a521d00af',
-
-    #      'Content-Type': 'application/json'
-    #  }
-    #  response = requests.request("POST", url, headers=headers, data=payload1)
-    #  # print("##############################################################")
-
-    #  print(payload1)
-    #  #print(zlist)
-    #  # import os
-    #  # if 'BusinessCards Created Successfully' in response.text:
-    #  #     print('present')
-    #  #     os.remove(found)
-    #  # else:
-    #  #     print('not present')
-
-    #  df1.to_json('visitingcard.json')
-    #  data = df1.to_json('visiting.json', orient='records')
-    #  print(data)
-
-    # return render_template('index.html')
-
-    # return response.text
-
-    return z
-    # return zlist
-
-
-# @app.route('/upload_BusinessCards', methods=["POST"])
-# def mainfunction():
-#     Dataset = request.get_json()
-#     if len(Dataset)==1:
-#         # predict(Dataset)
-#         return multiplecards(Dataset)
-#     else:
-#         # multiplecards(Dataset)
-#         return multiplecards(Dataset)
-
-
-###################################################################################   Resume parser  ###################################################################################################
-
-@app.route("/upload_resume", methods=["POST"])
-def predict_resume():
-    Dataset = request.get_json()
-    # data = {'visiting': Dataset}
-    # a=url_list[0]
-    a = Dataset
-    # a = url_list
-    # print(a)
-    x = a['FileData']
-    # print(x)
-    y = a['FileName']
-    y = y.replace(' ', '')
-    y = y.replace('&', '')
-    y = y.replace('@', '')
-    z = a['FileType']
-    # CreatedBy=a['CreatedBy']
-
-    name = y + '.' + z
-    print(name)
-
-    # img_data = x.encode()
-
-    img_data = x.encode()
-
-    import base64
-    with open('./Resume_parser/upload_resume/' + name, "wb") as fh:
-        fh.write(base64.decodebytes(img_data))
-    # cmd = "python ./Resume_parser/resume1.0.multiprocessing.py" + " " + str('./Resume_parser/upload_resume/' + name)
-    # os.system(cmd)
-
-    # f = "./resume_upload"
-    # f = os.listdir(f)
-    f = './Resume_parser/upload_resume/' + name
-    found = './Resume_parser/upload_resume/' + name
-    print('this from resumepy file')
-    print(f)
-
-    def docx_to_txt():
-        import docx2txt
-        import glob
-        text = ''
-        for file in glob.glob(found):
-            c = docx2txt.process(file)
-            c = c.rstrip("\n")
-            toPrint = c
-            d = ' '.join(i for i in toPrint.split())
-            d = d.rstrip()
-            text += d
-        docx_to_txt.text = text
-
-    def doc_to_txt():
-        import docx2txt
-        import glob
-        text = ''
-        # for file in glob.glob(found):
-        c = docx2txt.process(f)
-        c = c.rstrip("\n")
-        toPrint = c
-        d = ' '.join(i for i in toPrint.split())
-        d = d.rstrip()
-        text += d
-        doc_to_txt.text = text
-
-    def pdf_to_txt():
-        import sys
-        import fitz
-        fname = found
-        doc = fitz.open(fname)
-        text = ""
-        for page in doc:
-            text = text + str(page.get_text())
-        pdf_to_txt.text = " ".join(text.split('\n'))
-
-    # for file in f:
-    print('checking for filetype')
-    if f.endswith('.doc'):
-        doc_to_txt()
-        x = doc_to_txt.text
-    elif f.endswith('.docx'):
-        docx_to_txt()
-        x = docx_to_txt.text
-    elif f.endswith('.pdf'):
-        pdf_to_txt()
-        x = pdf_to_txt.text
-
-    doc = nlp_model(x)
-    k = []
-    l = []
-    for ent in doc.ents:
-        # print(f'{ent.label_.upper():{30}}- {ent.text}')
-        k.append(ent.label_.upper())
-        l.append(ent.text)
-    columns = k
-    rows = [l]
-    import pandas as pd
-    data = pd.DataFrame(rows, columns=columns)
-    df = data
-
-    data = df.T
-
-    data.to_csv('./Resume_parser/Ad1.csv', index=True)
-
-    data = pd.read_csv('./Resume_parser/Ad1.csv')
-    # print(data)
-    data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
-    data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
-    data.to_csv('./Resume_parser/Ad1.csv', index=False)
-    #####################################################################################################
-    # ModelName = "text-davinci-003"
-    # prompt_value = 'find designation in key value pairs from below text?' + "/n" + str(x)
-    # max_token_value = 300
-
-    # # usertext= request.get_data()
-    # # output = usertext.decode()
-    # # print(output)
-    # import os
-    # import openai
-
-    # # print(usertext)
-    # openai.api_key = "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"
-    # # userinput='fibonacci series in python'
-    # import os
-    # import openai
-
-    # # openai.api_key = os.getenv("OPENAI_API_KEY")
-
-    # response_text = openai.Completion.create(
-    # model=ModelName,
-    # prompt=prompt_value,
-    # temperature=0,
-    # max_tokens=max_token_value,
-    # top_p=1,
-    # frequency_penalty=0,
-    # presence_penalty=0,
-    # stop=["\"\"\""]
-    # )
-    # a = response_text['choices']
-    # data = a[0]['text']
-    # data=data.replace('\n','$@$')
-    # data=data.replace('$@$$@$','')
-    # #data=data.replace(':','')
-    # print(data)
-    # data=data.replace('Designation','POSITION')
-    # data=data.split('$@$')
-    # print(data)
-    # import pandas as pd
-    # desgnaition=pd.DataFrame(data)
-    # desgnaition=desgnaition[0].str.split(':',expand=True)
-    # desgnaition.columns=['Key','Values']
-    # print(desgnaition)
-
-    # data= pd.read_csv('./Resume_parser/Ad1.csv')
-
-    # frames = [data,desgnaition]
-
-    # result = pd.concat(frames,axis=0)
-    # result.to_csv('./Resume_parser/Ad1.csv', index=False)
-
-    ########################################################################################################
-    # df2 = pd.read_csv('./Ad1.csv')
-    x1 = pd.read_csv('D:/projects/C01app/Resume_parser/AD11.csv')
-    tp = pd.read_csv('./Resume_parser/Ad1.csv')
-    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
-    merge = pd.merge(tp, x1, on='Key', how='right')
-    merge.to_csv('./Resume_parser/AD.csv', index=False)
-    df2 = pd.read_csv('./Resume_parser/AD.csv')
-    # print(df2)
-    df2 = df2.T
-
-    df2.to_csv('./Resume_parser/path.csv', index=False, header=False)
-    df1 = pd.read_csv('./Resume_parser/path.csv')
-    df1.to_json('./Resume_parser/firstjson.json', orient="index")
-    print(df1)
-
-    doc = nlp_model1(x)
-    k = []
-    l = []
-    for ent in doc.ents:
-        # print(f'{ent.label_.upper():{30}}- {ent.text}')
-        k.append(ent.label_.upper())
-        l.append(ent.text)
-    columns = k
-    rows = [l]
-    data = pd.DataFrame(rows, columns=columns)
-    df = data
-    data = df.T
-
-    data.to_csv('./Resume_parser/Ad2.csv', index=True)
-    data = pd.read_csv('./Resume_parser/Ad2.csv')
-    data.rename({data.columns[-2]: 'Key'}, axis=1, inplace=True)
-    data.rename({data.columns[-1]: 'Values'}, axis=1, inplace=True)
-    data.to_csv('./Resume_parser/Ad2.csv', index=False)
-    import pandas as pd
-    import json
-    dflist = []
-    x = pd.read_csv('D:/projects/C01app/Resume_parser/PG.csv')
-    tp = pd.read_csv('./Resume_parser/Ad2.csv')
-    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
-    merge = pd.merge(x, tp, on='Key', how='left')
-    import numpy as np
-    merge = merge.replace(np.nan, '', regex=True)
-    merge.to_csv('./Resume_parser/PGmerge.csv', index=False)
-
-    dfPG = pd.read_csv('./Resume_parser/PGmerge.csv')
-    import numpy as np
-    dfPG = dfPG.replace({np.nan: None})
-    x2 = dfPG.iloc[:, -2].tolist()
-    y2 = dfPG.iloc[:, -1].tolist()
-    z1 = dict(zip(x2, y2))
-    dflist.append(z1)
-    # u1 = json.dumps(z1)
-    import pandas as pd
-
-    x = pd.read_csv('D:/projects/C01app/Resume_parser/UG.csv')
-    tp = pd.read_csv('./Resume_parser/Ad2.csv')
-    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
-    merge = pd.merge(x, tp, on='Key', how='left')
-    import numpy as np
-    merge = merge.replace(np.nan, '', regex=True)
-    merge.to_csv('./Resume_parser/UGmerge.csv', index=False)
-
-    dfUG = pd.read_csv('./Resume_parser/UGmerge.csv')
-    import numpy as np
-    dfUG = dfUG.replace({np.nan: None})
-    x2 = dfUG.iloc[:, -2].tolist()
-    y2 = dfUG.iloc[:, -1].tolist()
-    z2 = dict(zip(x2, y2))
-    dflist.append(z2)
-    # u2 = json.dumps(z2)
-    # final = '[' + str(z1) + ',' + str(z2) + ']'
-    # return render_template('resume.html')
-
-    ############################################################################
-    import pandas as pd
-
-    x = pd.read_csv('D:/projects/C01app/Resume_parser/inter.csv')
-    tp = pd.read_csv('./Resume_parser/Ad2.csv')
-    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
-    merge = pd.merge(x, tp, on='Key', how='left')
-    import numpy as np
-    merge = merge.replace(np.nan, '', regex=True)
-    merge.to_csv('./Resume_parser/intermerge.csv', index=False)
-
-    dfinter = pd.read_csv('./Resume_parser/intermerge.csv')
-    import numpy as np
-    dfinter = dfinter.replace({np.nan: None})
-    x2 = dfinter.iloc[:, -2].tolist()
-    y2 = dfinter.iloc[:, -1].tolist()
-    z3 = dict(zip(x2, y2))
-    dflist.append(z3)
-
-    ############################################################################
-    import pandas as pd
-
-    x = pd.read_csv('D:/projects/C01app/Resume_parser/SSC.csv')
-    tp = pd.read_csv('./Resume_parser/Ad2.csv')
-    # tp = tp.loc[:, ~tp.columns.str.contains('^Unnamed')]
-    merge = pd.merge(x, tp, on='Key', how='left')
-    import numpy as np
-    merge = merge.replace(np.nan, '', regex=True)
-    merge.to_csv('./Resume_parser/sscmerge.csv', index=False)
-
-    dfssc = pd.read_csv('./Resume_parser/sscmerge.csv')
-    import numpy as np
-    dfssc = dfssc.replace({np.nan: None})
-    x2 = dfssc.iloc[:, -2].tolist()
-    y2 = dfssc.iloc[:, -1].tolist()
-    z4 = dict(zip(x2, y2))
-    dflist.append(z4)
-    ############################################Document############################################################
-    import base64
-    empty = []
-    name = f
-    image = open(name, 'rb')
-    image_read = image.read()
-    image_64_encode = base64.b64encode(image_read)
-    NULL = 'null'
-    # empty.append("ByteData--" + (NULL).strip('""'))
-    image_64_encode = image_64_encode.decode('utf-8')
-    empty.append("FileData--" + str(image_64_encode))
-    imagedata = name.split("/")
-    imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
-    imagename1 = str(imagename).split('.')
-
-    imagename = str(imagename1[-2]).replace("[", "]")
-    empty.append("FileName--" + imagename)
-    empty.append("FilePath--" + "")
-    imageExtension = str(imagename1[-1]).replace("[", "]")
-    empty.append("FileType--" + imageExtension)
-
-    import pandas as pd
-    df = pd.DataFrame(empty)
-    df = df[0].str.split("--", expand=True)
-    data1 = pd.DataFrame(df[0])
-    data2 = pd.DataFrame(df[1])
-    dt = data2.set_index(data1[0])
-
-    dt4 = dt.T
-    list = []
-    dictionary = dt4.to_dict(orient="index")
-
-    a = {
-        "FileId": 0,
-        "FileData": "",
-        "FileName": "",
-        "FileType": "",
-        "RefId": 0
-    }
-    list = []
-
-    list.append(a)
-    list.append(dictionary[1])
-
-    import json
-
-    with open('./Resume_parser/firstjson.json', 'r') as json_file:
-        json_load = json.load(json_file)
-
-        # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
-
-    nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
-    import json
-
-    # JSON data:
-    x = nothing
-
-    # python object to be appended
-    y = {"EducationDetails": dflist}
-    y1 = {"Document": list}
-    print(y)
-    # parsing JSON string:
-    z = json.loads(x)
-
-    # appending the data
-    z.update(y)
-    z.update(y1)
-
-    # the result is a JSON string:
-    # print(json.dumps(z))
-    print('##########################')
-    # print(z)
-    print('##########################')
-    import requests
-    import json
-
-    # with open('visitingcard1.json', 'r') as json_file:
-    #     json_load = json.load(json_file)
-    # url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/resumeparsing/save"  #dev
-    # # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/resumeparsing/save"
-    # #url = "https://qa.bizgaze.app/apis/v4/bizgaze/integrations/resumeparsing/save"  #testing
-    # payload1 = json.dumps(z)
-    # print('--------------------------------------------------------------------------')
-    # # print(payload1)
-    # headers = {
-    #     # 'Authorization': 'stat 53f27e671adf456e974f1d11ceb5db41',
-    #      #'Authorization': 'stat 5702ce5a77d34e0381bc2f06588d9bcc',#dev
-    #     'Authorization': 'stat ed5dd14ee2094227849f6bbe2928bff3', #testing
-    #     'Content-Type': 'application/json'
-    # }
-    # response = requests.request("POST", url, headers=headers, data=payload1)
-    # print("##############################################################")
-
-    # print(response.text)
-    # function_1.var=response
-    # a=str(response.text)
-
-    files = glob.glob('./resume_upload/*')
-    for f in files:
-        os.remove(f)
-
-    return z
-    # return 'done'
-
-
-# return render_template('resume.html')
-
-
-# @app.route('/upload_resume', methods=["POST"])
-def upload_resume():
-    if __name__ == "__main__":
-        # print(os.getpid())
-
-        url_list = []
-        Dataset = request.get_json()
-        # id = "100013660000125"
-        url_list.append(Dataset)
-        # multiprocessing
-        with multiprocessing.Pool(processes=1) as pool:
-            results = pool.map(predict_resume, url_list)
-
-        pool.close()
-        return results[0]
-
-
-@app.route("/Download_resume")
-def Download_resume():
-    # try:
-    with open("Ad1.csv", encoding="unicode_escape") as fp:
-        csv = fp.read()
-        return Response(csv, mimetype="text/csv", headers={"Content-disposition": "attachment; filename=Resume.csv"})
-
-
-##############################################################################   Invoice Parser   ###################################################################################################
-
-@app.route('/upload_invoice', methods=["POST", "GET"])
-def upload_invoice():
-    Dataset = request.get_json()
-    # data = {'visiting': Dataset}
-    # a=url_list[0]
-    a = Dataset
-
-    x = a['FileData']
-    # print(x)
-    y = a['FileName']
-    z = a['FileType']
-    # CreatedBy=a['CreatedBy']
-
-    name = y + '.' + z
-    print(name)
-
-    img_data = x.encode()
-
-    import base64
-    with open('./Invoice_parser/upload_invoice/' + name, "wb") as fh:
-        fh.write(base64.decodebytes(img_data))
-
-    # cmd = "python ./Invoice_parser/invoice.multiprocessing.py" + " " + str('./Invoice_parser/upload_invoice/' + name)
-    # os.system(cmd)
-    #####################################################################################################################################
-
-    name = './Invoice_parser/upload_invoice/' + name
-    extension = name.split('.')[-1]
-
-    def image_to_text():
-        print('#######################  image-to-pdf   ################')
-
-        import cv2
-        import numpy as np
-        fname = name
-        print(fname)
-        import pytesseract as tess
-        from PIL import Image
-
-        tess.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
-        img = cv2.imread(fname)
-        # img = cv2.resize(img, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
-
-        # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-
-        # kernel = np.ones((1, 1), np.uint8)
-        # img = cv2.dilate(img, kernel, iterations=1)
-        # img = cv2.erode(img, kernel, iterations=1)
-
-        # img=cv2.threshold(cv2.GaussianBlur(img, (5, 5), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
-        pdf = tess.image_to_pdf_or_hocr(img, extension="pdf")
-        with open(Current_Working_Directory + "/Invoice_parser/demo.pdf", "w+b", ) as f:
-            f.write(pdf)
-        print('demo created')
-        import fitz
-        fname = Current_Working_Directory + '/Invoice_parser/demo.pdf'
-        doc = fitz.open(fname)
-        text = ""
-        for page in doc:
-            text = text + str(page.get_text())
-        image_to_text.text = " ".join(text.split("\n"))
-
-        # result = ocr.ocr( Current_Working_Directory + "/Invoice_parser/demo.pdf" , cls=True)
-        # result = result[0]
-
-        # txts = [line[1][0] for line in result]
-
-        # image_to_text.text = ""
-        # for i in txts:
-        #     if len(i) < 4:
-        #         continue
-        #         # print(i+"\n")
-        #     image_to_text.text = image_to_text.text + str(i) + "\n"
-
-    def pdf_to_text():
-        import fitz
-        fname = name
-        doc = fitz.open(fname)
-        text = ""
-        for page in doc:
-            text = text + str(page.get_text())
-        pdf_to_text.text = " ".join(text.split("\n"))
-
-    extensionlist = ['JPEG', 'jpg', 'png', 'JPG', 'PNG', 'jpeg']
-
-    if extension in extensionlist:
-        print('image' + extension)
-        image_to_text()
-        x = image_to_text.text
-
-    else:
-        print('pdf' + extension)
-        pdf_to_text()
-        x = pdf_to_text.text
-
-    import spacy
-    import sys
-    # import fitz
-    # fname = "uploads/0.pdf"
-    # doc = fitz.open(fname)
-    # text = ""
-    # for page in doc:
-    #     text = text + str(page.get_text())
-    # fitz = " ".join(text.split("\n"))
-    # # print(fitz)
-    import pandas as pd
-
-    doc = nlp_model1(x)
-    k = []
-    l = []
-    for ent in doc.ents:
-        # print(f"{ent.label_.upper():{30}}- {ent.text}")
-        k.append(ent.label_.upper())
-        l.append(ent.text)
-    columns = k
-    rows = [l]
-    data = pd.DataFrame(rows, columns=columns)
-    df = data
-    df = data.T
-
-    df.to_csv(Current_Working_Directory + "/Invoice_parser/Invoice.csv")
-    import pandas as pd
-    df = pd.read_csv(Current_Working_Directory + "/Invoice_parser/Invoice.csv")
-    # df.head()
-    # df = df.T
-    # new_header = df.iloc[0]  # grab the first row for the header
-    # df = df[1:]  # take the data less the header row
-    # df.columns = new_header
-    # def df_column_uniquify(df):
-    #     df_columns = df.columns
-    #     new_columns = []
-    #     for item in df_columns:
-    #         counter = 0
-    #         newitem = item
-    #         while newitem in new_columns:
-    #             counter += 1
-    #             newitem = "{}_{}".format(item, counter)
-    #         new_columns.append(newitem)
-    #     df.columns = new_columns
-    #     return df.T
-    # df = df_column_uniquify(df)
-    # # df=df.T
-    # df.to_csv('final.csv')
-    # df = pd.read_csv('final.csv')
-    df.rename({df.columns[-2]: 'Key'}, axis=1, inplace=True)
-    df.rename({df.columns[-1]: 'Values'}, axis=1, inplace=True)
-    df['Key'] = df['Key'].str.replace('/', '')
-    df['Key'] = df['Key'].str.replace(' ', '')
-    df.to_csv(Current_Working_Directory + '/Invoice_parser/final.csv', index=False)
-    import pandas as pd
-    x1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final.csv')
-    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/finalwithcolen.csv')
-    merge = pd.merge(x1, tp, on='Key', how='right')
-    merge1 = merge
-
-    merge['Values'] = merge['Values'].astype(str)
-    merge = merge['Values'].str.split(":", expand=True)
-    merge.rename({merge.columns[-1]: 'Values'}, axis=1, inplace=True)
-    frames = [merge1['Key'], merge['Values']]
-    result = pd.concat(frames, axis=1)
-    x1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final.csv')
-    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/finalwithoutcolen.csv')
-    merged = pd.merge(x1, tp, on='Key', how='right')
-    frames = [result, merged]
-    result1 = pd.concat(frames)
-    result1.to_csv(Current_Working_Directory + '/Invoice_parser/final1.csv', index=False)
-
-    x1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/main.csv')
-    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final1.csv')
-    # tp = pd.read_csv(Current_Working_Directory + 'Invoice_parser/final.csv')
-    tp['Key'] = tp['Key'].astype(str)
-    tp['Values'] = tp['Values'].astype(str)
-    tp['Key'] = tp['Key'].str.strip()
-    tp['Values'] = tp['Values'].str.strip()
-
-    merge = pd.merge(tp, x1, on='Key', how='right')
-    merge.to_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv', index=False)
-    df2 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv')
-
-    # Import writer class from csv module
-    from csv import writer
-
-    List = ['PlantCode', " "]
-    with open(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv', 'a') as f_object:
-        writer_object = writer(f_object)
-        writer_object.writerow(List)
-        f_object.close()
-        # print(df2)
-    df2 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv')
-    print(df2)
-    df2 = df2.T
-
-    df2.to_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv', index=False, header=False)
-
-    df1 = pd.read_csv(Current_Working_Directory + '/Invoice_parser/invoicewithouttable.csv')
-    df1.to_json(Current_Working_Directory + '/Invoice_parser/firstjson.json', orient="index")
-    import pandas as pd
-    x = pd.read_csv(Current_Working_Directory + '/Invoice_parser/final.csv')
-    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/item1.csv')
-    x['Values'] = x['Values'].str.strip()
-    merge = pd.merge(tp, x, on='Key', how='inner')
-    merge = merge.groupby('Key').agg({
-        'Values': '/'.join,
-    }).reset_index()
-    z = merge['Values'].str.split('/', expand=True)
-    frames = [merge, z]
-    result1 = pd.concat(frames, axis=1)
-    result1 = result1.drop(['Values'], axis=1)
-    import pandas as pd
-    tp = pd.read_csv(Current_Working_Directory + '/Invoice_parser/item1.csv')
-    merge = pd.merge(tp, result1, on='Key', how='inner')
-    merge = merge.T
-    new_header = merge.iloc[0]  # grab the first row for the header
-    merge = merge[1:]  # take the data less the header row
-    merge.columns = new_header
-
-    merge = merge.to_dict('records')
-    invoice_Item = merge
-    print(invoice_Item)
-
-    ####################################Document############################################################
-
-    import base64
-    empty = []
-    # name = found
-    image = open(name, 'rb')
-    image_read = image.read()
-    image_64_encode = base64.b64encode(image_read)
-    NULL = 'null'
-    # empty.append("ByteData--" + (NULL).strip('""'))
-    image_64_encode = image_64_encode.decode('utf-8')
-    empty.append("FileData--" + str(image_64_encode))
-    imagedata = name.split("/")
-    imagename = str(imagedata[-1]).replace('"', '').replace("[", "").replace("]", "")
-    imagename1 = str(imagename).split('.')
-    imagename = str(imagename1[-2]).replace("[", "]")
-    empty.append("FileName--" + imagename)
-    empty.append("FilePath--" + name)
-    imageExtension = str(imagename1[-1]).replace("[", "]")
-    empty.append("FileType--" + imageExtension)
-    import pandas as pd
-    df = pd.DataFrame(empty)
-    df = df[0].str.split("--", expand=True)
-    data1 = pd.DataFrame(df[0])
-    data2 = pd.DataFrame(df[1])
-    dt = data2.set_index(data1[0])
-    dt4 = dt.T
-    list = []
-    dictionary = dt4.to_dict(orient="index")
-
-    a = {
-        "FileId": 0,
-        "FileData": "",
-        "FileName": "",
-        "FileType": "",
-        "RefId": 0
-    }
-    list = []
-    list.append(a)
-    list.append(dictionary[1])
-    import json
-    with open(Current_Working_Directory + '/Invoice_parser/firstjson.json', 'r') as json_file:
-        json_load = json.load(json_file)
-        # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/businesscards/create"
-    nothing = json.dumps(json_load).replace("]", "").replace("[", "").replace('{"0":', '').replace('}}', '}')
-    import json
-
-    # JSON data:
-    x = nothing
-    # python object to be appended
-    y = {"InvoiceItems": invoice_Item}
-    y1 = {"Document": list}
-    # parsing JSON string:
-    z = json.loads(x)
-    # appending the data
-    z.update(y)
-    z.update(y1)
-    # print(z)
-    # the result is a JSON string:
-    # print(json.dumps(z))
-    # print('##########################')
-    # print(z)
-    # print('##########################')
-    # import requests
-    # import json
-    # # with open('visitingcard1.json', 'r') as json_file:
-    # #     json_load = json.load(json_file)
-    # url = "https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
-    # #url="https://test.bizgaze.app:8443/apis/v4/bizgaze/integrations/invoice/createsalesinvoice"
-    # payload1 = json.dumps(z)
-    # print('--------------------------------------------------------------------------')
-    # print(payload1)
-    # headers = {
-    #     'Authorization': 'stat 089166c35d4c4d7d941c99d6f8986834',
-    #     'Content-Type': 'application/json'
-    # }
-    # response = requests.request("POST", url, headers=headers, data=payload1)
-    # print("##############################################################")
-    # print(response.text)
-    # import glob
-    # files = glob.glob(
-    #     "upload_invoice/*"
-    # )
-    # for f in files:
-    #     os.remove(f)
-    # files = glob.glob(
-    #     "uploads/*"
-    # )
-    # for f in files:
-    #     os.remove(f)
-
-    return z
-
-    # return render_template('invoice.html')
-
-
-@app.route("/Download_invoice")
-def Download_invoice():
-    pass
-
-
-@app.route("/Table")
-def Table():
-    pass
 
 
 if __name__ == "__main__":
-    app.run(host='0.0.0.0', port=1112)
-
-
+    app.run(host='0.0.0.0', port=1112)
\ No newline at end of file