Coverage for mindsdb / integrations / handlers / monkeylearn_handler / monkeylearn_handler.py: 0%

63 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1from typing import Optional, Dict 

2import pandas as pd 

3import requests 

4 

5from mindsdb.integrations.libs.base import BaseMLEngine 

6from mindsdb.integrations.utilities.handler_utils import get_api_key 

7 

8 

9class monkeylearnHandler(BaseMLEngine): 

10 name = "monkeylearn" 

11 

12 @staticmethod 

13 def create_validations(self, args=None, **kwargs): 

14 

15 if "using" in args: 

16 args = args["using"] 

17 

18 if "monkeylearn_api_key" not in args: 

19 raise Exception("monkeylearn_api_key not found") 

20 api_key = get_api_key('monkeylearn', args["using"], self.engine_storage, strict=False) 

21 if "model_id" in args: 

22 if "cl_" not in args["model_id"]: 

23 raise Exception("Classifier tasks are only supported currently") 

24 else: 

25 raise Exception("Enter the model_id of model you want use") 

26 model_id = args["model_id"] 

27 # Check whether the model_id given by user exists in the user account or monkeylearn pre-trained models 

28 url = 'https://api.monkeylearn.com/v3/classifiers/' 

29 response = requests.get(url, headers={'Authorization': 'Token {}'.format(api_key)}) 

30 if response.status_code == 200: 

31 models = response.json() 

32 models_list = [model['id'] for model in models] 

33 else: 

34 raise Exception(f"Server response {response.status_code}") 

35 

36 if model_id not in models_list: 

37 raise Exception(f"Model_id {args['model_id']} not found in MonkeyLearn pre-trained models") 

38 

39 def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: 

40 if "using" in args: 

41 args = args['using'] 

42 

43 self.model_storage.json_set('args', args) 

44 

45 def predict(self, df, args=None): 

46 args = self.model_storage.json_get('args') 

47 input_column = args['input_column'] 

48 if input_column not in df.columns: 

49 raise RuntimeError(f"input columns {input_column} not found ") 

50 input_list = df[input_column] 

51 if len(input_list) > 500: 

52 raise Exception("Classifier only supports 500 data elements in list") 

53 ml = get_api_key('monkeylearn', args["using"], self.engine_storage, strict=False) 

54 df_list = [] 

55 for text in input_list: 

56 pred_dict = {} 

57 classifier_response = ml.classifiers.classify(args['model_id'], [text]) 

58 for res_dict in classifier_response.body: 

59 if res_dict.get("error") is True: 

60 raise Exception(res_dict["error_detail"]) 

61 pred_dict['classification'] = res_dict['classifications'] 

62 pred_dict['tag'] = res_dict['classifications'][0]['tag_name'] 

63 df_list.append(pd.DataFrame([pred_dict])) 

64 pred_df = pd.concat(df_list) 

65 return pred_df 

66 

67 def describe(self, attribute: Optional[str] = None) -> pd.DataFrame: 

68 args = self.model_storage.json_get('args') 

69 ml = get_api_key('monkeylearn', args["using"], self.engine_storage, strict=False) 

70 response = ml.classifiers.detail(args['model_id']) 

71 description = {} 

72 description['name'] = response.body['name'] 

73 description['model_version'] = response.body['model_version'] 

74 description['date_created'] = response.body['created'] 

75 # pre-trained monkeylearn models guide about what industries they can be used 

76 description['industries'] = response.body['industries'] 

77 des_df = pd.DataFrame([description]) 

78 return des_df