Coverage for mindsdb / integrations / handlers / pycaret_handler / test / test_pycaret.py: 0%
78 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import importlib
2import time
3from unittest.mock import patch
4import pandas as pd
5import pytest
7from mindsdb_sql_parser import parse_sql
8from tests.unit.executor_test_base import BaseExecutorTest
10try:
11 importlib.import_module("pycaret")
12 PYCARET_INSTALLED = True
13except ImportError:
14 PYCARET_INSTALLED = False
17@pytest.mark.skipif(not PYCARET_INSTALLED, reason="pycaret is not installed")
18class TestPyCaret(BaseExecutorTest):
20 def wait_predictor(self, project, name):
21 done = False
22 for attempt in range(200):
23 ret = self.run_sql(
24 f"select * from {project}.models where name='{name}'"
25 )
26 if not ret.empty:
27 if ret['STATUS'][0] == 'complete':
28 done = True
29 break
30 elif ret['STATUS'][0] == 'error':
31 break
32 time.sleep(0.5)
33 if not done:
34 raise RuntimeError("predictor wasn't created")
36 def run_sql(self, sql):
37 ret = self.command_executor.execute_command(
38 parse_sql(sql)
39 )
40 assert ret.error_code is None
41 if ret.data is not None:
42 return ret.data.to_df()
44 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')
45 def test_classifier(self, mock_handler):
46 df = pd.DataFrame({
47 'sepal_length': [5.1, 4.9, 4.7, 4.6, 6.4, 6.9, 5.5, 6.5, 7.7, 6.3, 6.7, 7.2],
48 'sepal_width': [3.5, 3.0, 3.2, 3.1, 3.2, 3.1, 2.3, 2.8, 2.8, 2.7, 3.3, 3.2],
49 'petal_length': [1.4, 4.0, 1.3, 1.5, 4.5, 4.9, 4.0, 4.6, 6.7, 4.9, 5.7, 6.0],
50 'petal_width': [0.2, 0.2, 0.2, 0.2, 1.5, 1.5, 1.3, 1.5, 2.0, 1.8, 2.1, 1.8],
51 'species': ['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica']
52 })
53 self.set_handler(mock_handler, name='pg', tables={'iris': df})
55 # create project
56 self.run_sql('create database proj;')
58 # create predictor
59 self.run_sql('''
60 CREATE MODEL proj.my_pycaret_class_model
61 FROM pg
62 (SELECT sepal_length, sepal_width, petal_length, petal_width, species FROM iris)
63 PREDICT species
64 USING
65 engine = 'pycaret',
66 model_type = 'classification',
67 model_name = 'xgboost',
68 setup_session_id = 123,
69 setup_fold = 2;
70 ''')
71 self.wait_predictor('proj', 'my_pycaret_class_model')
73 # run predict
74 ret = self.run_sql('''
75 SELECT prediction_label
76 FROM pg.iris as t
77 JOIN proj.my_pycaret_class_model AS m;
78 ''')
80 assert ret['prediction_label'].iloc[0] == 'Iris-setosa'
82 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')
83 def test_regression(self, mock_handler):
84 df = pd.DataFrame({
85 'age': [19, 18, 28, 33, 32, 31, 46, 37],
86 'sex': ['female', 'male', 'male', 'male', 'male', 'female', 'female', 'female'],
87 'bmi': [27.9, 33.77, 33, 22.705, 28.88, 25.74, 33.44, 27.74],
88 'children': [0, 1, 3, 0, 0, 0, 1, 3],
89 'smoker': ['yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
90 'region': ['southwest', 'southeast', 'southeast', 'northwest', 'northwest', 'southeast', 'southeast', 'northwest'],
91 'charges': [16884.924, 1725.5523, 4449.462, 21984.47061, 3866.8552, 3756.6216, 8240.5896, 7281.5056]
92 })
94 self.set_handler(mock_handler, name='pg', tables={'insurance': df})
96 # create project
97 self.run_sql('create database proj;')
99 # create predictor
100 self.run_sql('''
101 CREATE MODEL proj.my_pycaret_regr_model
102 FROM pg
103 (SELECT age, sex, bmi, children, smoker, region, charges FROM insurance)
104 PREDICT charges
105 USING
106 engine = 'pycaret',
107 model_type = 'regression',
108 model_name = 'xgboost',
109 setup_session_id = 123,
110 setup_fold = 2;
111 ''')
112 self.wait_predictor('proj', 'my_pycaret_regr_model')
114 # run predict
115 ret = self.run_sql('''
116 SELECT prediction_label
117 FROM pg.insurance as t
118 JOIN proj.my_pycaret_regr_model AS m;
119 ''')
121 assert int(ret['prediction_label'].iloc[0]) == 3822
123 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')
124 @pytest.mark.skip(reason="MindsDB recognizes 'Anomaly' as a keyword so it fails to fetch Anomaly column")
125 def test_anomaly(self, mock_handler):
126 df = pd.DataFrame({
127 'Col1': [0.263995357, 0.764928588, 0.13842355, 0.935242061, 0.605866573, 0.518789697, 0.912225161, 0.608234451, 0.723781923, 0.73359095],
128 'Col2': [0.546092303, 0.65397459, 0.065575135, 0.227771913, 0.845269445, 0.837065879, 0.272378939, 0.331678698, 0.429296975, 0.367422001],
129 'Col3': [0.336714104, 0.538842451, 0.192801069, 0.553562822, 0.074514511, 0.332993162, 0.365792205, 0.861309323, 0.899016587, 0.088600152],
130 'Col4': [0.092107835, 0.995016662, 0.014465045, 0.176370646, 0.241530075, 0.514723634, 0.562208164, 0.158963258, 0.073715215, 0.208463224],
131 'Col5': [0.325261175, 0.805967636, 0.957033424, 0.331664957, 0.307923366, 0.355314772, 0.50189852, 0.558449452, 0.885169295, 0.182754409],
132 'Col6': [0.212464853, 0.780304761, 0.458443656, 0.634508561, 0.373030452, 0.465650668, 0.413997158, 0.013080054, 0.570250227, 0.736672363],
133 'Col7': [0.258565714, 0.437317789, 0.559647989, 0.109202597, 0.994553306, 0.896994183, 0.488468506, 0.251942977, 0.017265143, 0.538513303],
134 'Col8': [0.869236755, 0.277978893, 0.42307639, 0.11247202, 0.183727053, 0.034959735, 0.111113968, 0.249329646, 0.550683376, 0.049843054],
135 'Col9': [0.197077957, 0.843918225, 0.24339588, 0.281278233, 0.329148141, 0.73458152, 0.191947043, 0.927804425, 0.71326865, 0.891548497],
136 'Col10': [0.292984504, 0.70343162, 0.43962138, 0.107867968, 0.922947409, 0.25345779, 0.29565178, 0.355286799, 0.980911322, 0.308864217]
137 })
139 self.set_handler(mock_handler, name='pg', tables={'anomaly': df})
141 # create project
142 self.run_sql('create database proj;')
144 # create predictor
145 self.run_sql('''
146 CREATE MODEL proj.my_pycaret_anom_model
147 FROM pg
148 (SELECT Col1, Col2, Col3, Col4, Col5, Col6, Col7, Col8, Col9, Col10 FROM anomaly)
149 PREDICT Col10
150 USING
151 engine = 'pycaret',
152 model_type = 'anomaly',
153 model_name = 'iforest',
154 setup_session_id = 123,
155 setup_fold = 2;
156 ''')
157 self.wait_predictor('proj', 'my_pycaret_anom_model')
159 # run predict
160 # TODO: is there a workaround for this? (it works when ran in web UI)
161 ret = self.run_sql('''
162 SELECT m.Anomaly
163 FROM pg.anomaly as t
164 JOIN proj.my_pycaret_anom_model AS m;
165 ''')
167 assert int(ret['Anomaly'].iloc[0]) == 0
169 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')
170 def test_cluster(self, mock_handler):
171 df = pd.DataFrame({
172 'Age': [58, 59, 62, 59, 87, 29, 54, 87],
173 'Income': [77769, 81799, 74751, 74373, 17760, 13157, 76500, 42592],
174 'SpendingScore': [0.7913287771988531, 0.7910820467274178, 0.7026569520102857, 0.7656795619984281, 0.3487775484305076, 0.8470341025128374, 0.7851978501165687, 0.3552896820382753],
175 'Savings': [6559.8299230048315, 5417.661426197439, 9258.992965034067, 7346.334503537976, 16869.507130301474, 3535.5143522162816, 6878.884248553975, 18086.287157859304]
176 })
178 self.set_handler(mock_handler, name='pg', tables={'jewellery': df})
180 # create project
181 self.run_sql('create database proj;')
183 # create predictor
184 self.run_sql('''
185 CREATE MODEL proj.my_pycaret_cluster_model
186 FROM pg
187 (SELECT Age, Income, SpendingScore, Savings FROM jewellery)
188 PREDICT Savings
189 USING
190 engine = 'pycaret',
191 model_type = 'clustering',
192 model_name = 'kmeans',
193 setup_session_id = 123;
194 ''')
195 self.wait_predictor('proj', 'my_pycaret_cluster_model')
197 # run predict
198 ret = self.run_sql('''
199 SELECT m.Cluster
200 FROM pg.jewellery as t
201 JOIN proj.my_pycaret_cluster_model AS m;
202 ''')
204 assert ret['Cluster'].iloc[0] == "Cluster 0"
206 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')
207 def test_timeseries(self, mock_handler):
208 df = pd.DataFrame({
209 'Year': [1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950],
210 'Month': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8],
211 'Passengers': [112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115, 126, 141, 135, 125, 149, 170, 170]
212 })
214 self.set_handler(mock_handler, name='pg', tables={'airline': df})
216 # create project
217 self.run_sql('create database proj;')
219 # create predictor
220 self.run_sql('''
221 CREATE MODEL proj.my_pycaret_timeseries_model
222 FROM pg
223 (SELECT Year, Month, Passengers FROM airline)
224 PREDICT Passengers
225 USING
226 engine = 'pycaret',
227 model_type = 'time_series',
228 model_name = 'naive',
229 setup_fh = 3,
230 predict_fh = 36,
231 setup_session_id = 123;
232 ''')
233 self.wait_predictor('proj', 'my_pycaret_timeseries_model')
235 # run predict
236 ret = self.run_sql('''
237 SELECT m.y_pred
238 FROM pg.airline as t
239 JOIN proj.my_pycaret_timeseries_model AS m;
240 ''')
242 assert int(ret['y_pred'].iloc[0]) == 125