Coverage for mindsdb / integrations / handlers / pycaret_handler / test / test_pycaret.py: 0%

78 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1import importlib 

2import time 

3from unittest.mock import patch 

4import pandas as pd 

5import pytest 

6 

7from mindsdb_sql_parser import parse_sql 

8from tests.unit.executor_test_base import BaseExecutorTest 

9 

10try: 

11 importlib.import_module("pycaret") 

12 PYCARET_INSTALLED = True 

13except ImportError: 

14 PYCARET_INSTALLED = False 

15 

16 

17@pytest.mark.skipif(not PYCARET_INSTALLED, reason="pycaret is not installed") 

18class TestPyCaret(BaseExecutorTest): 

19 

20 def wait_predictor(self, project, name): 

21 done = False 

22 for attempt in range(200): 

23 ret = self.run_sql( 

24 f"select * from {project}.models where name='{name}'" 

25 ) 

26 if not ret.empty: 

27 if ret['STATUS'][0] == 'complete': 

28 done = True 

29 break 

30 elif ret['STATUS'][0] == 'error': 

31 break 

32 time.sleep(0.5) 

33 if not done: 

34 raise RuntimeError("predictor wasn't created") 

35 

36 def run_sql(self, sql): 

37 ret = self.command_executor.execute_command( 

38 parse_sql(sql) 

39 ) 

40 assert ret.error_code is None 

41 if ret.data is not None: 

42 return ret.data.to_df() 

43 

44 @patch('mindsdb.integrations.handlers.postgres_handler.Handler') 

45 def test_classifier(self, mock_handler): 

46 df = pd.DataFrame({ 

47 'sepal_length': [5.1, 4.9, 4.7, 4.6, 6.4, 6.9, 5.5, 6.5, 7.7, 6.3, 6.7, 7.2], 

48 'sepal_width': [3.5, 3.0, 3.2, 3.1, 3.2, 3.1, 2.3, 2.8, 2.8, 2.7, 3.3, 3.2], 

49 'petal_length': [1.4, 4.0, 1.3, 1.5, 4.5, 4.9, 4.0, 4.6, 6.7, 4.9, 5.7, 6.0], 

50 'petal_width': [0.2, 0.2, 0.2, 0.2, 1.5, 1.5, 1.3, 1.5, 2.0, 1.8, 2.1, 1.8], 

51 'species': ['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica'] 

52 }) 

53 self.set_handler(mock_handler, name='pg', tables={'iris': df}) 

54 

55 # create project 

56 self.run_sql('create database proj;') 

57 

58 # create predictor 

59 self.run_sql(''' 

60 CREATE MODEL proj.my_pycaret_class_model 

61 FROM pg 

62 (SELECT sepal_length, sepal_width, petal_length, petal_width, species FROM iris) 

63 PREDICT species 

64 USING 

65 engine = 'pycaret', 

66 model_type = 'classification', 

67 model_name = 'xgboost', 

68 setup_session_id = 123, 

69 setup_fold = 2; 

70 ''') 

71 self.wait_predictor('proj', 'my_pycaret_class_model') 

72 

73 # run predict 

74 ret = self.run_sql(''' 

75 SELECT prediction_label 

76 FROM pg.iris as t 

77 JOIN proj.my_pycaret_class_model AS m; 

78 ''') 

79 

80 assert ret['prediction_label'].iloc[0] == 'Iris-setosa' 

81 

82 @patch('mindsdb.integrations.handlers.postgres_handler.Handler') 

83 def test_regression(self, mock_handler): 

84 df = pd.DataFrame({ 

85 'age': [19, 18, 28, 33, 32, 31, 46, 37], 

86 'sex': ['female', 'male', 'male', 'male', 'male', 'female', 'female', 'female'], 

87 'bmi': [27.9, 33.77, 33, 22.705, 28.88, 25.74, 33.44, 27.74], 

88 'children': [0, 1, 3, 0, 0, 0, 1, 3], 

89 'smoker': ['yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no'], 

90 'region': ['southwest', 'southeast', 'southeast', 'northwest', 'northwest', 'southeast', 'southeast', 'northwest'], 

91 'charges': [16884.924, 1725.5523, 4449.462, 21984.47061, 3866.8552, 3756.6216, 8240.5896, 7281.5056] 

92 }) 

93 

94 self.set_handler(mock_handler, name='pg', tables={'insurance': df}) 

95 

96 # create project 

97 self.run_sql('create database proj;') 

98 

99 # create predictor 

100 self.run_sql(''' 

101 CREATE MODEL proj.my_pycaret_regr_model 

102 FROM pg 

103 (SELECT age, sex, bmi, children, smoker, region, charges FROM insurance) 

104 PREDICT charges 

105 USING 

106 engine = 'pycaret', 

107 model_type = 'regression', 

108 model_name = 'xgboost', 

109 setup_session_id = 123, 

110 setup_fold = 2; 

111 ''') 

112 self.wait_predictor('proj', 'my_pycaret_regr_model') 

113 

114 # run predict 

115 ret = self.run_sql(''' 

116 SELECT prediction_label 

117 FROM pg.insurance as t 

118 JOIN proj.my_pycaret_regr_model AS m; 

119 ''') 

120 

121 assert int(ret['prediction_label'].iloc[0]) == 3822 

122 

123 @patch('mindsdb.integrations.handlers.postgres_handler.Handler') 

124 @pytest.mark.skip(reason="MindsDB recognizes 'Anomaly' as a keyword so it fails to fetch Anomaly column") 

125 def test_anomaly(self, mock_handler): 

126 df = pd.DataFrame({ 

127 'Col1': [0.263995357, 0.764928588, 0.13842355, 0.935242061, 0.605866573, 0.518789697, 0.912225161, 0.608234451, 0.723781923, 0.73359095], 

128 'Col2': [0.546092303, 0.65397459, 0.065575135, 0.227771913, 0.845269445, 0.837065879, 0.272378939, 0.331678698, 0.429296975, 0.367422001], 

129 'Col3': [0.336714104, 0.538842451, 0.192801069, 0.553562822, 0.074514511, 0.332993162, 0.365792205, 0.861309323, 0.899016587, 0.088600152], 

130 'Col4': [0.092107835, 0.995016662, 0.014465045, 0.176370646, 0.241530075, 0.514723634, 0.562208164, 0.158963258, 0.073715215, 0.208463224], 

131 'Col5': [0.325261175, 0.805967636, 0.957033424, 0.331664957, 0.307923366, 0.355314772, 0.50189852, 0.558449452, 0.885169295, 0.182754409], 

132 'Col6': [0.212464853, 0.780304761, 0.458443656, 0.634508561, 0.373030452, 0.465650668, 0.413997158, 0.013080054, 0.570250227, 0.736672363], 

133 'Col7': [0.258565714, 0.437317789, 0.559647989, 0.109202597, 0.994553306, 0.896994183, 0.488468506, 0.251942977, 0.017265143, 0.538513303], 

134 'Col8': [0.869236755, 0.277978893, 0.42307639, 0.11247202, 0.183727053, 0.034959735, 0.111113968, 0.249329646, 0.550683376, 0.049843054], 

135 'Col9': [0.197077957, 0.843918225, 0.24339588, 0.281278233, 0.329148141, 0.73458152, 0.191947043, 0.927804425, 0.71326865, 0.891548497], 

136 'Col10': [0.292984504, 0.70343162, 0.43962138, 0.107867968, 0.922947409, 0.25345779, 0.29565178, 0.355286799, 0.980911322, 0.308864217] 

137 }) 

138 

139 self.set_handler(mock_handler, name='pg', tables={'anomaly': df}) 

140 

141 # create project 

142 self.run_sql('create database proj;') 

143 

144 # create predictor 

145 self.run_sql(''' 

146 CREATE MODEL proj.my_pycaret_anom_model 

147 FROM pg 

148 (SELECT Col1, Col2, Col3, Col4, Col5, Col6, Col7, Col8, Col9, Col10 FROM anomaly) 

149 PREDICT Col10 

150 USING 

151 engine = 'pycaret', 

152 model_type = 'anomaly', 

153 model_name = 'iforest', 

154 setup_session_id = 123, 

155 setup_fold = 2; 

156 ''') 

157 self.wait_predictor('proj', 'my_pycaret_anom_model') 

158 

159 # run predict 

160 # TODO: is there a workaround for this? (it works when ran in web UI) 

161 ret = self.run_sql(''' 

162 SELECT m.Anomaly 

163 FROM pg.anomaly as t 

164 JOIN proj.my_pycaret_anom_model AS m; 

165 ''') 

166 

167 assert int(ret['Anomaly'].iloc[0]) == 0 

168 

169 @patch('mindsdb.integrations.handlers.postgres_handler.Handler') 

170 def test_cluster(self, mock_handler): 

171 df = pd.DataFrame({ 

172 'Age': [58, 59, 62, 59, 87, 29, 54, 87], 

173 'Income': [77769, 81799, 74751, 74373, 17760, 13157, 76500, 42592], 

174 'SpendingScore': [0.7913287771988531, 0.7910820467274178, 0.7026569520102857, 0.7656795619984281, 0.3487775484305076, 0.8470341025128374, 0.7851978501165687, 0.3552896820382753], 

175 'Savings': [6559.8299230048315, 5417.661426197439, 9258.992965034067, 7346.334503537976, 16869.507130301474, 3535.5143522162816, 6878.884248553975, 18086.287157859304] 

176 }) 

177 

178 self.set_handler(mock_handler, name='pg', tables={'jewellery': df}) 

179 

180 # create project 

181 self.run_sql('create database proj;') 

182 

183 # create predictor 

184 self.run_sql(''' 

185 CREATE MODEL proj.my_pycaret_cluster_model 

186 FROM pg 

187 (SELECT Age, Income, SpendingScore, Savings FROM jewellery) 

188 PREDICT Savings 

189 USING 

190 engine = 'pycaret', 

191 model_type = 'clustering', 

192 model_name = 'kmeans', 

193 setup_session_id = 123; 

194 ''') 

195 self.wait_predictor('proj', 'my_pycaret_cluster_model') 

196 

197 # run predict 

198 ret = self.run_sql(''' 

199 SELECT m.Cluster 

200 FROM pg.jewellery as t 

201 JOIN proj.my_pycaret_cluster_model AS m; 

202 ''') 

203 

204 assert ret['Cluster'].iloc[0] == "Cluster 0" 

205 

206 @patch('mindsdb.integrations.handlers.postgres_handler.Handler') 

207 def test_timeseries(self, mock_handler): 

208 df = pd.DataFrame({ 

209 'Year': [1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950], 

210 'Month': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8], 

211 'Passengers': [112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115, 126, 141, 135, 125, 149, 170, 170] 

212 }) 

213 

214 self.set_handler(mock_handler, name='pg', tables={'airline': df}) 

215 

216 # create project 

217 self.run_sql('create database proj;') 

218 

219 # create predictor 

220 self.run_sql(''' 

221 CREATE MODEL proj.my_pycaret_timeseries_model 

222 FROM pg 

223 (SELECT Year, Month, Passengers FROM airline) 

224 PREDICT Passengers 

225 USING 

226 engine = 'pycaret', 

227 model_type = 'time_series', 

228 model_name = 'naive', 

229 setup_fh = 3, 

230 predict_fh = 36, 

231 setup_session_id = 123; 

232 ''') 

233 self.wait_predictor('proj', 'my_pycaret_timeseries_model') 

234 

235 # run predict 

236 ret = self.run_sql(''' 

237 SELECT m.y_pred 

238 FROM pg.airline as t 

239 JOIN proj.my_pycaret_timeseries_model AS m; 

240 ''') 

241 

242 assert int(ret['y_pred'].iloc[0]) == 125