Browse Source

Upload files to 'Supportgpt'

SadhulaSaiKumar 1 year ago
parent
commit
c5a70c9a68
1 changed files with 31 additions and 0 deletions
  1. 31
    0
      Supportgpt/ingest.py

+ 31
- 0
Supportgpt/ingest.py View File

@@ -0,0 +1,31 @@
1
+from langchain.embeddings import HuggingFaceEmbeddings
2
+from langchain.vectorstores import FAISS
3
+from langchain.document_loaders import PyPDFLoader, DirectoryLoader
4
+from langchain.text_splitter import RecursiveCharacterTextSplitter 
5
+from langchain.document_loaders.csv_loader import CSVLoader
6
+
7
+
8
+DATA_PATH = 'data/'
9
+DB_FAISS_PATH = 'vectorstore/db_faiss'
10
+
11
+# Create vector database
12
+def create_vector_db():
13
+    loader = CSVLoader(file_path="./supportqa.csv", encoding='iso-8859-1', source_column="Question")
14
+    # loader = DirectoryLoader(DATA_PATH,
15
+    #                          glob='*.pdf',
16
+    #                          loader_cls=PyPDFLoader)
17
+
18
+    documents = loader.load()
19
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
20
+                                                   chunk_overlap=50)
21
+    texts = text_splitter.split_documents(documents)
22
+
23
+    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
24
+                                       model_kwargs={'device': 'cpu'})
25
+
26
+    db = FAISS.from_documents(texts, embeddings)
27
+    db.save_local(DB_FAISS_PATH)
28
+
29
+if __name__ == "__main__":
30
+    create_vector_db()
31
+

Loading…
Cancel
Save