Coverage for mindsdb / integrations / utilities / rag / loaders / vector_store_loader / vector_store_loader.py: 56%

39 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1 

2from langchain_core.embeddings import Embeddings 

3from langchain_community.vectorstores import Chroma, PGVector 

4from langchain_core.vectorstores import VectorStore 

5 

6from pydantic import BaseModel 

7 

8from mindsdb.integrations.utilities.rag.settings import VectorStoreType, VectorStoreConfig 

9from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.MDBVectorStore import MDBVectorStore 

10from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.pgvector import PGVectorMDB 

11from mindsdb.utilities import log 

12 

13 

14logger = log.getLogger(__name__) 

15 

16 

17class VectorStoreLoader(BaseModel): 

18 embedding_model: Embeddings 

19 vector_store: VectorStore = None 

20 config: VectorStoreConfig = None 

21 

22 class Config: 

23 arbitrary_types_allowed = True 

24 extra = "forbid" 

25 validate_assignment = True 

26 

27 def load(self) -> VectorStore: 

28 """ 

29 Loads the vector store based on the provided config and embeddings model 

30 :return: 

31 """ 

32 if self.config.is_sparse is not None and self.config.vector_size is not None and self.config.kb_table is not None: 

33 # Only use PGVector store for sparse vectors. 

34 db_handler = self.config.kb_table.get_vector_db() 

35 db_args = db_handler.connection_args 

36 # Assume we are always using PGVector & psycopg2. 

37 connection_str = f"postgresql+psycopg2://{db_args.get('user')}:{db_args.get('password')}@{db_args.get('host')}:{db_args.get('port')}/{db_args.get('dbname', db_args.get('database'))}" 

38 

39 return PGVectorMDB( 

40 connection_string=connection_str, 

41 collection_name=self.config.kb_table._kb.vector_database_table, 

42 embedding_function=self.embedding_model, 

43 is_sparse=self.config.is_sparse, 

44 vector_size=self.config.vector_size 

45 ) 

46 return MDBVectorStore(kb_table=self.config.kb_table) 

47 

48 

49class VectorStoreFactory: 

50 @staticmethod 

51 def create(embedding_model: Embeddings, config: VectorStoreConfig): 

52 

53 if config.vector_store_type == VectorStoreType.CHROMA: 

54 return VectorStoreFactory._load_chromadb_store(embedding_model, config) 

55 elif config.vector_store_type == VectorStoreType.PGVECTOR: 

56 return VectorStoreFactory._load_pgvector_store(embedding_model, config) 

57 else: 

58 raise ValueError(f"Invalid vector store type, must be one either {VectorStoreType.__members__.keys()}") 

59 

60 @staticmethod 

61 def _load_chromadb_store(embedding_model: Embeddings, settings) -> Chroma: 

62 return Chroma( 

63 persist_directory=settings.persist_directory, 

64 collection_name=settings.collection_name, 

65 embedding_function=embedding_model, 

66 ) 

67 

68 @staticmethod 

69 def _load_pgvector_store(embedding_model: Embeddings, settings) -> PGVector: 

70 from .pgvector import PGVectorMDB 

71 return PGVectorMDB( 

72 connection_string=settings.connection_string, 

73 collection_name=settings.collection_name, 

74 embedding_function=embedding_model, 

75 is_sparse=settings.is_sparse, 

76 vector_size=settings.vector_size 

77 )