Coverage for mindsdb / integrations / handlers / langchain_embedding_handler / fastapi_embeddings.py: 32%

23 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1from typing import Any, List 

2from langchain_core.embeddings import Embeddings 

3import requests 

4 

5 

6class FastAPIEmbeddings(Embeddings): 

7 """An embedding extension that interfaces with FAST API. Useful for custom serving solutions.""" 

8 

9 def __init__( 

10 self, 

11 api_base: str, 

12 model: str, 

13 batch_size: int = 32, 

14 **kwargs: Any, 

15 ): 

16 """Initialize the embeddings class. 

17 

18 Args: 

19 api_base: Base URL for the VLLM server 

20 model: Model name/path to use for embeddings 

21 batch_size: Batch size for generating embeddings 

22 """ 

23 super().__init__() 

24 self.api_base = api_base 

25 self.model = model 

26 self.batch_size = batch_size 

27 

28 # initialize requests here with the api_base 

29 

30 def _get_embeddings(self, texts: List[str]) -> List[str]: 

31 """Get embeddings for a batch of text chunks. 

32 

33 Returns: 

34 List of embeddings as strings. For sparse vectors, returns strings in format 

35 "{key:value,...}/size" where size is the dimension of the vector space. 

36 """ 

37 

38 headers = {"accept": "application/json", "Content-Type": "application/json"} 

39 

40 data = { 

41 "input": texts, 

42 "model": self.model 

43 } 

44 

45 response = requests.post(self.api_base, headers=headers, json=data) 

46 

47 response.raise_for_status() 

48 

49 embeddings = [] 

50 for response_dict in response.json()["data"]: 

51 embedding = response_dict["embedding"] 

52 embeddings.append(embedding) 

53 

54 return embeddings 

55 

56 def embed_documents(self, texts: List[str]) -> List[str]: 

57 """Embed a list of documents using vLLM. 

58 

59 Args: 

60 texts: List of documents to embed 

61 

62 Returns: 

63 List of embeddings as strings, one for each document. 

64 For sparse embeddings, returns strings in format "{key:value,...}/size" 

65 For dense embeddings, returns JSON strings of float lists 

66 """ 

67 

68 return self._get_embeddings(texts) 

69 

70 def embed_query(self, text: str) -> str: 

71 """Embed a single query text using vLLM. 

72 

73 Args: 

74 text: Query text to embed 

75 

76 Returns: 

77 Query embedding as a string. 

78 For sparse embeddings, returns string in format "{key:value,...}/size" 

79 For dense embeddings, returns JSON string of float list 

80 """ 

81 

82 return self._get_embeddings([text])[0]