Coverage for mindsdb / integrations / utilities / rag / loaders / vector_store_loader / vector_store_loader.py: 56%
39 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
2from langchain_core.embeddings import Embeddings
3from langchain_community.vectorstores import Chroma, PGVector
4from langchain_core.vectorstores import VectorStore
6from pydantic import BaseModel
8from mindsdb.integrations.utilities.rag.settings import VectorStoreType, VectorStoreConfig
9from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.MDBVectorStore import MDBVectorStore
10from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.pgvector import PGVectorMDB
11from mindsdb.utilities import log
14logger = log.getLogger(__name__)
17class VectorStoreLoader(BaseModel):
18 embedding_model: Embeddings
19 vector_store: VectorStore = None
20 config: VectorStoreConfig = None
22 class Config:
23 arbitrary_types_allowed = True
24 extra = "forbid"
25 validate_assignment = True
27 def load(self) -> VectorStore:
28 """
29 Loads the vector store based on the provided config and embeddings model
30 :return:
31 """
32 if self.config.is_sparse is not None and self.config.vector_size is not None and self.config.kb_table is not None:
33 # Only use PGVector store for sparse vectors.
34 db_handler = self.config.kb_table.get_vector_db()
35 db_args = db_handler.connection_args
36 # Assume we are always using PGVector & psycopg2.
37 connection_str = f"postgresql+psycopg2://{db_args.get('user')}:{db_args.get('password')}@{db_args.get('host')}:{db_args.get('port')}/{db_args.get('dbname', db_args.get('database'))}"
39 return PGVectorMDB(
40 connection_string=connection_str,
41 collection_name=self.config.kb_table._kb.vector_database_table,
42 embedding_function=self.embedding_model,
43 is_sparse=self.config.is_sparse,
44 vector_size=self.config.vector_size
45 )
46 return MDBVectorStore(kb_table=self.config.kb_table)
49class VectorStoreFactory:
50 @staticmethod
51 def create(embedding_model: Embeddings, config: VectorStoreConfig):
53 if config.vector_store_type == VectorStoreType.CHROMA:
54 return VectorStoreFactory._load_chromadb_store(embedding_model, config)
55 elif config.vector_store_type == VectorStoreType.PGVECTOR:
56 return VectorStoreFactory._load_pgvector_store(embedding_model, config)
57 else:
58 raise ValueError(f"Invalid vector store type, must be one either {VectorStoreType.__members__.keys()}")
60 @staticmethod
61 def _load_chromadb_store(embedding_model: Embeddings, settings) -> Chroma:
62 return Chroma(
63 persist_directory=settings.persist_directory,
64 collection_name=settings.collection_name,
65 embedding_function=embedding_model,
66 )
68 @staticmethod
69 def _load_pgvector_store(embedding_model: Embeddings, settings) -> PGVector:
70 from .pgvector import PGVectorMDB
71 return PGVectorMDB(
72 connection_string=settings.connection_string,
73 collection_name=settings.collection_name,
74 embedding_function=embedding_model,
75 is_sparse=settings.is_sparse,
76 vector_size=settings.vector_size
77 )