Coverage for mindsdb / integrations / handlers / monkeylearn_handler / monkeylearn_handler.py: 0%
63 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1from typing import Optional, Dict
2import pandas as pd
3import requests
5from mindsdb.integrations.libs.base import BaseMLEngine
6from mindsdb.integrations.utilities.handler_utils import get_api_key
9class monkeylearnHandler(BaseMLEngine):
10 name = "monkeylearn"
12 @staticmethod
13 def create_validations(self, args=None, **kwargs):
15 if "using" in args:
16 args = args["using"]
18 if "monkeylearn_api_key" not in args:
19 raise Exception("monkeylearn_api_key not found")
20 api_key = get_api_key('monkeylearn', args["using"], self.engine_storage, strict=False)
21 if "model_id" in args:
22 if "cl_" not in args["model_id"]:
23 raise Exception("Classifier tasks are only supported currently")
24 else:
25 raise Exception("Enter the model_id of model you want use")
26 model_id = args["model_id"]
27 # Check whether the model_id given by user exists in the user account or monkeylearn pre-trained models
28 url = 'https://api.monkeylearn.com/v3/classifiers/'
29 response = requests.get(url, headers={'Authorization': 'Token {}'.format(api_key)})
30 if response.status_code == 200:
31 models = response.json()
32 models_list = [model['id'] for model in models]
33 else:
34 raise Exception(f"Server response {response.status_code}")
36 if model_id not in models_list:
37 raise Exception(f"Model_id {args['model_id']} not found in MonkeyLearn pre-trained models")
39 def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
40 if "using" in args:
41 args = args['using']
43 self.model_storage.json_set('args', args)
45 def predict(self, df, args=None):
46 args = self.model_storage.json_get('args')
47 input_column = args['input_column']
48 if input_column not in df.columns:
49 raise RuntimeError(f"input columns {input_column} not found ")
50 input_list = df[input_column]
51 if len(input_list) > 500:
52 raise Exception("Classifier only supports 500 data elements in list")
53 ml = get_api_key('monkeylearn', args["using"], self.engine_storage, strict=False)
54 df_list = []
55 for text in input_list:
56 pred_dict = {}
57 classifier_response = ml.classifiers.classify(args['model_id'], [text])
58 for res_dict in classifier_response.body:
59 if res_dict.get("error") is True:
60 raise Exception(res_dict["error_detail"])
61 pred_dict['classification'] = res_dict['classifications']
62 pred_dict['tag'] = res_dict['classifications'][0]['tag_name']
63 df_list.append(pd.DataFrame([pred_dict]))
64 pred_df = pd.concat(df_list)
65 return pred_df
67 def describe(self, attribute: Optional[str] = None) -> pd.DataFrame:
68 args = self.model_storage.json_get('args')
69 ml = get_api_key('monkeylearn', args["using"], self.engine_storage, strict=False)
70 response = ml.classifiers.detail(args['model_id'])
71 description = {}
72 description['name'] = response.body['name']
73 description['model_version'] = response.body['model_version']
74 description['date_created'] = response.body['created']
75 # pre-trained monkeylearn models guide about what industries they can be used
76 description['industries'] = response.body['industries']
77 des_df = pd.DataFrame([description])
78 return des_df