Coverage for mindsdb/integrations/handlers/pycaret_handler/test/test

1import importlib

2import time

3from unittest.mock import patch

4import pandas as pd

5import pytest

7from mindsdb_sql_parser import parse_sql

8from tests.unit.executor_test_base import BaseExecutorTest

10try:

11 importlib.import_module("pycaret")

12 PYCARET_INSTALLED = True

13except ImportError:

14 PYCARET_INSTALLED = False

17@pytest.mark.skipif(not PYCARET_INSTALLED, reason="pycaret is not installed")

18class TestPyCaret(BaseExecutorTest):

20 def wait_predictor(self, project, name):

21 done = False

22 for attempt in range(200):

23 ret = self.run_sql(

24 f"select * from {project}.models where name='{name}'"

25 )

26 if not ret.empty:

27 if ret['STATUS'][0] == 'complete':

28 done = True

29 break

30 elif ret['STATUS'][0] == 'error':

31 break

32 time.sleep(0.5)

33 if not done:

34 raise RuntimeError("predictor wasn't created")

36 def run_sql(self, sql):

37 ret = self.command_executor.execute_command(

38 parse_sql(sql)

39 )

40 assert ret.error_code is None

41 if ret.data is not None:

42 return ret.data.to_df()

44 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')

45 def test_classifier(self, mock_handler):

46 df = pd.DataFrame({

47 'sepal_length': [5.1, 4.9, 4.7, 4.6, 6.4, 6.9, 5.5, 6.5, 7.7, 6.3, 6.7, 7.2],

48 'sepal_width': [3.5, 3.0, 3.2, 3.1, 3.2, 3.1, 2.3, 2.8, 2.8, 2.7, 3.3, 3.2],

49 'petal_length': [1.4, 4.0, 1.3, 1.5, 4.5, 4.9, 4.0, 4.6, 6.7, 4.9, 5.7, 6.0],

50 'petal_width': [0.2, 0.2, 0.2, 0.2, 1.5, 1.5, 1.3, 1.5, 2.0, 1.8, 2.1, 1.8],

51 'species': ['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica']

52 })

53 self.set_handler(mock_handler, name='pg', tables={'iris': df})

55 # create project

56 self.run_sql('create database proj;')

58 # create predictor

59 self.run_sql('''

60 CREATE MODEL proj.my_pycaret_class_model

61 FROM pg

62 (SELECT sepal_length, sepal_width, petal_length, petal_width, species FROM iris)

63 PREDICT species

64 USING

65 engine = 'pycaret',

66 model_type = 'classification',

67 model_name = 'xgboost',

68 setup_session_id = 123,

69 setup_fold = 2;

70 ''')

71 self.wait_predictor('proj', 'my_pycaret_class_model')

73 # run predict

74 ret = self.run_sql('''

75 SELECT prediction_label

76 FROM pg.iris as t

77 JOIN proj.my_pycaret_class_model AS m;

78 ''')

80 assert ret['prediction_label'].iloc[0] == 'Iris-setosa'

82 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')

83 def test_regression(self, mock_handler):

84 df = pd.DataFrame({

85 'age': [19, 18, 28, 33, 32, 31, 46, 37],

86 'sex': ['female', 'male', 'male', 'male', 'male', 'female', 'female', 'female'],

87 'bmi': [27.9, 33.77, 33, 22.705, 28.88, 25.74, 33.44, 27.74],

88 'children': [0, 1, 3, 0, 0, 0, 1, 3],

89 'smoker': ['yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],

90 'region': ['southwest', 'southeast', 'southeast', 'northwest', 'northwest', 'southeast', 'southeast', 'northwest'],

91 'charges': [16884.924, 1725.5523, 4449.462, 21984.47061, 3866.8552, 3756.6216, 8240.5896, 7281.5056]

92 })

94 self.set_handler(mock_handler, name='pg', tables={'insurance': df})

96 # create project

97 self.run_sql('create database proj;')

99 # create predictor

100 self.run_sql('''

101 CREATE MODEL proj.my_pycaret_regr_model

102 FROM pg

103 (SELECT age, sex, bmi, children, smoker, region, charges FROM insurance)

104 PREDICT charges

105 USING

106 engine = 'pycaret',

107 model_type = 'regression',

108 model_name = 'xgboost',

109 setup_session_id = 123,

110 setup_fold = 2;

111 ''')

112 self.wait_predictor('proj', 'my_pycaret_regr_model')

113

114 # run predict

115 ret = self.run_sql('''

116 SELECT prediction_label

117 FROM pg.insurance as t

118 JOIN proj.my_pycaret_regr_model AS m;

119 ''')

120

121 assert int(ret['prediction_label'].iloc[0]) == 3822

122

123 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')

124 @pytest.mark.skip(reason="MindsDB recognizes 'Anomaly' as a keyword so it fails to fetch Anomaly column")

125 def test_anomaly(self, mock_handler):

126 df = pd.DataFrame({

127 'Col1': [0.263995357, 0.764928588, 0.13842355, 0.935242061, 0.605866573, 0.518789697, 0.912225161, 0.608234451, 0.723781923, 0.73359095],

128 'Col2': [0.546092303, 0.65397459, 0.065575135, 0.227771913, 0.845269445, 0.837065879, 0.272378939, 0.331678698, 0.429296975, 0.367422001],

129 'Col3': [0.336714104, 0.538842451, 0.192801069, 0.553562822, 0.074514511, 0.332993162, 0.365792205, 0.861309323, 0.899016587, 0.088600152],

130 'Col4': [0.092107835, 0.995016662, 0.014465045, 0.176370646, 0.241530075, 0.514723634, 0.562208164, 0.158963258, 0.073715215, 0.208463224],

131 'Col5': [0.325261175, 0.805967636, 0.957033424, 0.331664957, 0.307923366, 0.355314772, 0.50189852, 0.558449452, 0.885169295, 0.182754409],

132 'Col6': [0.212464853, 0.780304761, 0.458443656, 0.634508561, 0.373030452, 0.465650668, 0.413997158, 0.013080054, 0.570250227, 0.736672363],

133 'Col7': [0.258565714, 0.437317789, 0.559647989, 0.109202597, 0.994553306, 0.896994183, 0.488468506, 0.251942977, 0.017265143, 0.538513303],

134 'Col8': [0.869236755, 0.277978893, 0.42307639, 0.11247202, 0.183727053, 0.034959735, 0.111113968, 0.249329646, 0.550683376, 0.049843054],

135 'Col9': [0.197077957, 0.843918225, 0.24339588, 0.281278233, 0.329148141, 0.73458152, 0.191947043, 0.927804425, 0.71326865, 0.891548497],

136 'Col10': [0.292984504, 0.70343162, 0.43962138, 0.107867968, 0.922947409, 0.25345779, 0.29565178, 0.355286799, 0.980911322, 0.308864217]

137 })

138

139 self.set_handler(mock_handler, name='pg', tables={'anomaly': df})

140

141 # create project

142 self.run_sql('create database proj;')

143

144 # create predictor

145 self.run_sql('''

146 CREATE MODEL proj.my_pycaret_anom_model

147 FROM pg

148 (SELECT Col1, Col2, Col3, Col4, Col5, Col6, Col7, Col8, Col9, Col10 FROM anomaly)

149 PREDICT Col10

150 USING

151 engine = 'pycaret',

152 model_type = 'anomaly',

153 model_name = 'iforest',

154 setup_session_id = 123,

155 setup_fold = 2;

156 ''')

157 self.wait_predictor('proj', 'my_pycaret_anom_model')

158

159 # run predict

160 # TODO: is there a workaround for this? (it works when ran in web UI)

161 ret = self.run_sql('''

162 SELECT m.Anomaly

163 FROM pg.anomaly as t

164 JOIN proj.my_pycaret_anom_model AS m;

165 ''')

166

167 assert int(ret['Anomaly'].iloc[0]) == 0

168

169 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')

170 def test_cluster(self, mock_handler):

171 df = pd.DataFrame({

172 'Age': [58, 59, 62, 59, 87, 29, 54, 87],

173 'Income': [77769, 81799, 74751, 74373, 17760, 13157, 76500, 42592],

174 'SpendingScore': [0.7913287771988531, 0.7910820467274178, 0.7026569520102857, 0.7656795619984281, 0.3487775484305076, 0.8470341025128374, 0.7851978501165687, 0.3552896820382753],

175 'Savings': [6559.8299230048315, 5417.661426197439, 9258.992965034067, 7346.334503537976, 16869.507130301474, 3535.5143522162816, 6878.884248553975, 18086.287157859304]

176 })

177

178 self.set_handler(mock_handler, name='pg', tables={'jewellery': df})

179

180 # create project

181 self.run_sql('create database proj;')

182

183 # create predictor

184 self.run_sql('''

185 CREATE MODEL proj.my_pycaret_cluster_model

186 FROM pg

187 (SELECT Age, Income, SpendingScore, Savings FROM jewellery)

188 PREDICT Savings

189 USING

190 engine = 'pycaret',

191 model_type = 'clustering',

192 model_name = 'kmeans',

193 setup_session_id = 123;

194 ''')

195 self.wait_predictor('proj', 'my_pycaret_cluster_model')

196

197 # run predict

198 ret = self.run_sql('''

199 SELECT m.Cluster

200 FROM pg.jewellery as t

201 JOIN proj.my_pycaret_cluster_model AS m;

202 ''')

203

204 assert ret['Cluster'].iloc[0] == "Cluster 0"

205

206 @patch('mindsdb.integrations.handlers.postgres_handler.Handler')

207 def test_timeseries(self, mock_handler):

208 df = pd.DataFrame({

209 'Year': [1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950],

210 'Month': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8],

211 'Passengers': [112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115, 126, 141, 135, 125, 149, 170, 170]

212 })

213

214 self.set_handler(mock_handler, name='pg', tables={'airline': df})

215

216 # create project

217 self.run_sql('create database proj;')

218

219 # create predictor

220 self.run_sql('''

221 CREATE MODEL proj.my_pycaret_timeseries_model

222 FROM pg

223 (SELECT Year, Month, Passengers FROM airline)

224 PREDICT Passengers

225 USING

226 engine = 'pycaret',

227 model_type = 'time_series',

228 model_name = 'naive',

229 setup_fh = 3,

230 predict_fh = 36,

231 setup_session_id = 123;

232 ''')

233 self.wait_predictor('proj', 'my_pycaret_timeseries_model')

234

235 # run predict

236 ret = self.run_sql('''

237 SELECT m.y_pred

238 FROM pg.airline as t

239 JOIN proj.my_pycaret_timeseries_model AS m;

240 ''')

241

242 assert int(ret['y_pred'].iloc[0]) == 125

Coverage for mindsdb / integrations / handlers / pycaret_handler / test / test_pycaret.py: 0%

78 statements