Coverage for mindsdb / integrations / handlers / huggingface_api_handler / huggingface_api_handler.py: 0%
130 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import json
2from typing import Optional, Dict
3import pandas as pd
4from huggingface_hub import HfApi
5from huggingface_hub import hf_hub_download
6from hugging_py_face import NLP, ComputerVision, AudioProcessing, get_in_df_supported_tasks
8from mindsdb.integrations.libs.base import BaseMLEngine
9from mindsdb.integrations.utilities.handler_utils import get_api_key
10from .exceptions import UnsupportedTaskException, InsufficientParametersException
13class HuggingFaceInferenceAPIHandler(BaseMLEngine):
14 """
15 Integration with the Hugging Face Inference API.
16 """
18 name = 'huggingface_api'
20 @staticmethod
21 def create_validation(target, args=None, **kwargs):
22 args = args['using']
24 if 'input_column' not in args:
25 raise InsufficientParametersException('input_column has to be specified')
27 if 'model_name' not in args:
28 # detect model by task
29 task = args.get('task')
30 if task is None:
31 raise InsufficientParametersException('model_name or task have to be specified')
33 args['model_name'] = None
34 else:
35 # detect task by model
36 hf_api = HfApi()
37 metadata = hf_api.model_info(args['model_name'])
39 if 'task' not in args:
40 args['task'] = metadata.pipeline_tag
42 if args['task'] not in get_in_df_supported_tasks():
43 raise UnsupportedTaskException(f'The task {args["task"]} is not supported by the Hugging Face Inference API engine.')
45 if args['task'] == 'zero-shot-classification':
46 if 'candidate_labels' not in args:
47 raise Exception('"candidate_labels" is required for zero-shot-classification')
49 if args['task'] == 'sentence-similarity':
50 if 'input_column2' not in args:
51 raise InsufficientParametersException('input_column2 has to be specified')
53 def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
54 if 'using' not in args:
55 raise InsufficientParametersException("Hugging Face Inference engine requires a USING clause! Refer to its documentation for more details.")
57 args = args['using']
58 args['target'] = target
60 if 'options' not in args:
61 args['options'] = {}
63 if 'parameters' not in args:
64 args['parameters'] = {}
66 if args['model_name'] is not None:
67 # config.json
68 config = {}
69 try:
70 config_path = hf_hub_download(args['model_name'], 'config.json')
71 config = json.load(open(config_path))
72 except Exception:
73 pass
75 if 'max_length' in args:
76 args['options']['max_length'] = args['max_length']
77 elif 'max_position_embeddings' in config:
78 args['options']['max_length'] = config['max_position_embeddings']
79 elif 'max_length' in config:
80 args['options']['max_length'] = config['max_length']
82 labels_default = config.get('id2label', {})
83 labels_map = {}
84 if 'labels' in args:
85 for num, value in labels_default.items():
86 if num.isdigit():
87 num = int(num)
88 labels_map[value] = args['labels'][num]
89 args['labels_map'] = labels_map
90 if 'task_specific_params' in config:
91 args['task_specific_params'] = config['task_specific_params']
93 # for summarization
94 if 'min_output_length' in args:
95 args['options']['min_output_length'] = args['min_output_length']
97 if 'max_output_length' in args:
98 args['options']['max_output_length'] = args['max_output_length']
100 self.model_storage.json_set('args', args)
102 def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
103 args = self.model_storage.json_get('args')
104 api_key = get_api_key('huggingface_api', args, self.engine_storage, strict=False)
106 input_column = args['input_column']
107 model_name = args['model_name']
108 endpoint = args['endpoint'] if 'endpoint' in args else None
109 options = args['options'] if 'options' in args else None
110 parameters = args['parameters'] if 'parameters' in args else None
112 if args['task'] == 'text-classification':
113 nlp = NLP(api_key, endpoint)
114 result_df = nlp.text_classification_in_df(
115 df,
116 input_column,
117 options,
118 model_name,
119 )
120 labels_map = args.get('labels_map')
122 result_df['predictions'] = result_df['predictions'].apply(lambda x: labels_map.get(x, x))
124 elif args['task'] == 'fill-mask':
125 nlp = NLP(api_key, endpoint)
126 result_df = nlp.fill_mask_in_df(
127 df,
128 input_column,
129 options,
130 model_name
131 )
133 elif args['task'] == 'summarization':
134 nlp = NLP(api_key, endpoint)
135 result_df = nlp.summarization_in_df(
136 df,
137 input_column,
138 parameters,
139 options,
140 model_name
141 )
143 elif args['task'] == 'text-generation':
144 nlp = NLP(api_key, endpoint)
145 result_df = nlp.text_generation_in_df(
146 df,
147 input_column,
148 parameters,
149 options,
150 model_name
151 )
153 elif args['task'] == 'question-answering':
154 nlp = NLP(api_key, endpoint)
155 result_df = nlp.question_answering_in_df(
156 df,
157 input_column,
158 args['context_column'],
159 model_name
160 )
162 elif args['task'] == 'sentence-similarity':
163 nlp = NLP(api_key, endpoint)
164 result_df = nlp.sentence_similarity_in_df(
165 df,
166 input_column,
167 args['input_column2'],
168 options,
169 model_name
170 )
172 elif args['task'] == 'zero-shot-classification':
173 nlp = NLP(api_key, endpoint)
174 result_df = nlp.zero_shot_classification_in_df(
175 df,
176 input_column,
177 args['candidate_labels'],
178 parameters,
179 options,
180 model_name
181 )
183 elif args['task'] == 'translation':
184 lang_in = args['lang_input']
185 lang_out = args['lang_output']
187 input_origin = None
188 if 'task_specific_params' in args:
189 task = f"translation_{lang_in}_to_{lang_out}"
190 if task in args['task_specific_params'] and 'prefix' in args['task_specific_params'][task]:
191 # inject prefix to data
192 prefix = args['task_specific_params'][task]['prefix']
193 input_origin = df[input_column]
194 df[input_column] = prefix + input_origin
195 # don't pick up model in hugging_py_face
196 lang_in = lang_out = None
198 nlp = NLP(api_key, endpoint)
199 result_df = nlp.translation_in_df(
200 df,
201 input_column,
202 lang_in,
203 lang_out,
204 options,
205 model_name
206 )
207 if input_origin is not None:
208 df[input_column] = input_origin
210 elif args['task'] == 'image-classification':
211 cp = ComputerVision(api_key, endpoint)
212 result_df = cp.image_classification_in_df(
213 df,
214 input_column,
215 model_name
216 )
218 elif args['task'] == 'object-detection':
219 cp = ComputerVision(api_key, endpoint)
220 result_df = cp.object_detection_in_df(
221 df,
222 input_column,
223 model_name
224 )
226 elif args['task'] == 'automatic-speech-recognition':
227 ap = AudioProcessing(api_key, endpoint)
228 result_df = ap.automatic_speech_recognition_in_df(
229 df,
230 input_column,
231 model_name
232 )
234 elif args['task'] == 'audio-classification':
235 ap = AudioProcessing(api_key, endpoint)
236 result_df = ap.audio_classification_in_df(
237 df,
238 input_column,
239 model_name
240 )
242 result_df = result_df.rename(columns={'predictions': args['target']})
243 return result_df