Coverage for mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api

1import json

2from typing import Optional, Dict

3import pandas as pd

4from huggingface_hub import HfApi

5from huggingface_hub import hf_hub_download

6from hugging_py_face import NLP, ComputerVision, AudioProcessing, get_in_df_supported_tasks

8from mindsdb.integrations.libs.base import BaseMLEngine

9from mindsdb.integrations.utilities.handler_utils import get_api_key

10from .exceptions import UnsupportedTaskException, InsufficientParametersException

13class HuggingFaceInferenceAPIHandler(BaseMLEngine):

14 """

15 Integration with the Hugging Face Inference API.

16 """

18 name = 'huggingface_api'

20 @staticmethod

21 def create_validation(target, args=None, **kwargs):

22 args = args['using']

24 if 'input_column' not in args:

25 raise InsufficientParametersException('input_column has to be specified')

27 if 'model_name' not in args:

28 # detect model by task

29 task = args.get('task')

30 if task is None:

31 raise InsufficientParametersException('model_name or task have to be specified')

33 args['model_name'] = None

34 else:

35 # detect task by model

36 hf_api = HfApi()

37 metadata = hf_api.model_info(args['model_name'])

39 if 'task' not in args:

40 args['task'] = metadata.pipeline_tag

42 if args['task'] not in get_in_df_supported_tasks():

43 raise UnsupportedTaskException(f'The task {args["task"]} is not supported by the Hugging Face Inference API engine.')

45 if args['task'] == 'zero-shot-classification':

46 if 'candidate_labels' not in args:

47 raise Exception('"candidate_labels" is required for zero-shot-classification')

49 if args['task'] == 'sentence-similarity':

50 if 'input_column2' not in args:

51 raise InsufficientParametersException('input_column2 has to be specified')

53 def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:

54 if 'using' not in args:

55 raise InsufficientParametersException("Hugging Face Inference engine requires a USING clause! Refer to its documentation for more details.")

57 args = args['using']

58 args['target'] = target

60 if 'options' not in args:

61 args['options'] = {}

63 if 'parameters' not in args:

64 args['parameters'] = {}

66 if args['model_name'] is not None:

67 # config.json

68 config = {}

69 try:

70 config_path = hf_hub_download(args['model_name'], 'config.json')

71 config = json.load(open(config_path))

72 except Exception:

73 pass

75 if 'max_length' in args:

76 args['options']['max_length'] = args['max_length']

77 elif 'max_position_embeddings' in config:

78 args['options']['max_length'] = config['max_position_embeddings']

79 elif 'max_length' in config:

80 args['options']['max_length'] = config['max_length']

82 labels_default = config.get('id2label', {})

83 labels_map = {}

84 if 'labels' in args:

85 for num, value in labels_default.items():

86 if num.isdigit():

87 num = int(num)

88 labels_map[value] = args['labels'][num]

89 args['labels_map'] = labels_map

90 if 'task_specific_params' in config:

91 args['task_specific_params'] = config['task_specific_params']

93 # for summarization

94 if 'min_output_length' in args:

95 args['options']['min_output_length'] = args['min_output_length']

97 if 'max_output_length' in args:

98 args['options']['max_output_length'] = args['max_output_length']

100 self.model_storage.json_set('args', args)

101

102 def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:

103 args = self.model_storage.json_get('args')

104 api_key = get_api_key('huggingface_api', args, self.engine_storage, strict=False)

105

106 input_column = args['input_column']

107 model_name = args['model_name']

108 endpoint = args['endpoint'] if 'endpoint' in args else None

109 options = args['options'] if 'options' in args else None

110 parameters = args['parameters'] if 'parameters' in args else None

111

112 if args['task'] == 'text-classification':

113 nlp = NLP(api_key, endpoint)

114 result_df = nlp.text_classification_in_df(

115 df,

116 input_column,

117 options,

118 model_name,

119 )

120 labels_map = args.get('labels_map')

121

122 result_df['predictions'] = result_df['predictions'].apply(lambda x: labels_map.get(x, x))

123

124 elif args['task'] == 'fill-mask':

125 nlp = NLP(api_key, endpoint)

126 result_df = nlp.fill_mask_in_df(

127 df,

128 input_column,

129 options,

130 model_name

131 )

132

133 elif args['task'] == 'summarization':

134 nlp = NLP(api_key, endpoint)

135 result_df = nlp.summarization_in_df(

136 df,

137 input_column,

138 parameters,

139 options,

140 model_name

141 )

142

143 elif args['task'] == 'text-generation':

144 nlp = NLP(api_key, endpoint)

145 result_df = nlp.text_generation_in_df(

146 df,

147 input_column,

148 parameters,

149 options,

150 model_name

151 )

152

153 elif args['task'] == 'question-answering':

154 nlp = NLP(api_key, endpoint)

155 result_df = nlp.question_answering_in_df(

156 df,

157 input_column,

158 args['context_column'],

159 model_name

160 )

161

162 elif args['task'] == 'sentence-similarity':

163 nlp = NLP(api_key, endpoint)

164 result_df = nlp.sentence_similarity_in_df(

165 df,

166 input_column,

167 args['input_column2'],

168 options,

169 model_name

170 )

171

172 elif args['task'] == 'zero-shot-classification':

173 nlp = NLP(api_key, endpoint)

174 result_df = nlp.zero_shot_classification_in_df(

175 df,

176 input_column,

177 args['candidate_labels'],

178 parameters,

179 options,

180 model_name

181 )

182

183 elif args['task'] == 'translation':

184 lang_in = args['lang_input']

185 lang_out = args['lang_output']

186

187 input_origin = None

188 if 'task_specific_params' in args:

189 task = f"translation_{lang_in}_to_{lang_out}"

190 if task in args['task_specific_params'] and 'prefix' in args['task_specific_params'][task]:

191 # inject prefix to data

192 prefix = args['task_specific_params'][task]['prefix']

193 input_origin = df[input_column]

194 df[input_column] = prefix + input_origin

195 # don't pick up model in hugging_py_face

196 lang_in = lang_out = None

197

198 nlp = NLP(api_key, endpoint)

199 result_df = nlp.translation_in_df(

200 df,

201 input_column,

202 lang_in,

203 lang_out,

204 options,

205 model_name

206 )

207 if input_origin is not None:

208 df[input_column] = input_origin

209

210 elif args['task'] == 'image-classification':

211 cp = ComputerVision(api_key, endpoint)

212 result_df = cp.image_classification_in_df(

213 df,

214 input_column,

215 model_name

216 )

217

218 elif args['task'] == 'object-detection':

219 cp = ComputerVision(api_key, endpoint)

220 result_df = cp.object_detection_in_df(

221 df,

222 input_column,

223 model_name

224 )

225

226 elif args['task'] == 'automatic-speech-recognition':

227 ap = AudioProcessing(api_key, endpoint)

228 result_df = ap.automatic_speech_recognition_in_df(

229 df,

230 input_column,

231 model_name

232 )

233

234 elif args['task'] == 'audio-classification':

235 ap = AudioProcessing(api_key, endpoint)

236 result_df = ap.audio_classification_in_df(

237 df,

238 input_column,

239 model_name

240 )

241

242 result_df = result_df.rename(columns={'predictions': args['target']})

243 return result_df

Coverage for mindsdb / integrations / handlers / huggingface_api_handler / huggingface_api_handler.py: 0%

130 statements