Coverage for mindsdb / integrations / handlers / openai_handler / tests / test_openai_handler.py: 0%

108 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1import os 

2import pytest 

3import pandas as pd 

4from unittest.mock import patch 

5 

6from tests.unit.ml_handlers.base_ml_test import BaseMLAPITest 

7 

8 

9@pytest.mark.skipif(os.environ.get('MDB_TEST_MDB_OPENAI_API_KEY') is None, reason='Missing API key!') 

10class TestOpenAI(BaseMLAPITest): 

11 """ 

12 Integration tests for the OpenAI handler. 

13 """ 

14 

15 def setup_method(self): 

16 """ 

17 Setup test environment by creating a project and an OpenAI engine. 

18 """ 

19 super().setup_method() 

20 self.run_sql("CREATE DATABASE proj") 

21 self.run_sql( 

22 f""" 

23 CREATE ML_ENGINE openai_engine 

24 FROM openai 

25 USING 

26 openai_api_key = '{self.get_api_key('MDB_TEST_MDB_OPENAI_API_KEY')}'; 

27 """ 

28 ) 

29 

30 def test_create_model_with_unsupported_model_raises_exception(self): 

31 """ 

32 Test if CREATE MODEL raises an exception with an unsupported model. 

33 """ 

34 self.run_sql( 

35 """ 

36 CREATE MODEL proj.test_openaai_unsupported_model_model 

37 PREDICT answer 

38 USING 

39 engine='openai_engine', 

40 model_name='this-model-does-not-exist', 

41 prompt_template='dummy_prompt_template'; 

42 """ 

43 ) 

44 with pytest.raises(Exception) as excinfo: 

45 self.wait_predictor("proj", "test_openaai_unsupported_model_model") 

46 

47 assert "Invalid model name." in str(excinfo.value) 

48 

49 def test_full_flow_in_default_mode_with_question_column_for_single_prediction_runs_no_errors(self): 

50 """ 

51 Test the full flow in default mode with a question column for a single prediction. 

52 """ 

53 self.run_sql( 

54 """ 

55 CREATE MODEL proj.test_openai_single_full_flow_default_mode_question_column 

56 PREDICT answer 

57 USING 

58 engine='openai_engine', 

59 question_column='question'; 

60 """ 

61 ) 

62 

63 self.wait_predictor("proj", "test_openai_single_full_flow_default_mode_question_column") 

64 

65 result_df = self.run_sql( 

66 """ 

67 SELECT answer 

68 FROM proj.test_openai_single_full_flow_default_mode_question_column 

69 WHERE question='What is the capital of Sweden?' 

70 """ 

71 ) 

72 assert "stockholm" in result_df["answer"].iloc[0].lower() 

73 

74 @patch("mindsdb.integrations.handlers.postgres_handler.Handler") 

75 def test_full_flow_in_default_mode_with_question_column_for_bulk_predictions_runs_no_errors(self, mock_handler): 

76 """ 

77 Test the full flow in default mode with a question column for bulk predictions. 

78 """ 

79 df = pd.DataFrame.from_dict({"question": [ 

80 "What is the capital of Sweden?", 

81 "What is the second planet of the solar system?" 

82 ]}) 

83 self.set_handler(mock_handler, name="pg", tables={"df": df}) 

84 

85 self.run_sql( 

86 """ 

87 CREATE MODEL proj.test_openai_bulk_full_flow_default_mode_question_column 

88 PREDICT answer 

89 USING 

90 engine='openai_engine', 

91 question_column='question'; 

92 """ 

93 ) 

94 

95 self.wait_predictor("proj", "test_openai_bulk_full_flow_default_mode_question_column") 

96 

97 result_df = self.run_sql( 

98 """ 

99 SELECT p.answer 

100 FROM pg.df as t 

101 JOIN proj.test_openai_bulk_full_flow_default_mode_question_column as p; 

102 """ 

103 ) 

104 assert "stockholm" in result_df["answer"].iloc[0].lower() 

105 assert "venus" in result_df["answer"].iloc[1].lower() 

106 

107 def test_full_flow_in_default_mode_with_prompt_template_for_single_prediction_runs_no_errors(self): 

108 """ 

109 Test the full flow in default mode with a prompt template for a single prediction. 

110 """ 

111 self.run_sql( 

112 """ 

113 CREATE MODEL proj.test_openai_single_full_flow_default_mode_prompt_template 

114 PREDICT answer 

115 USING 

116 engine='openai_engine', 

117 prompt_template='Answer this question and add "Boom!" to the end of the answer: {{{{question}}}}'; 

118 """ 

119 ) 

120 

121 self.wait_predictor("proj", "test_openai_single_full_flow_default_mode_prompt_template") 

122 

123 result_df = self.run_sql( 

124 """ 

125 SELECT answer 

126 FROM proj.test_openai_single_full_flow_default_mode_prompt_template 

127 WHERE question='What is the capital of Sweden?' 

128 """ 

129 ) 

130 assert "stockholm" in result_df["answer"].iloc[0].lower() 

131 assert "boom!" in result_df["answer"].iloc[0].lower() 

132 

133 @patch("mindsdb.integrations.handlers.postgres_handler.Handler") 

134 def test_full_flow_in_default_mode_with_prompt_template_for_bulk_predictions_runs_no_errors(self, mock_handler): 

135 """ 

136 Test the full flow in default mode with a prompt template for bulk predictions. 

137 """ 

138 df = pd.DataFrame.from_dict({"question": [ 

139 "What is the capital of Sweden?", 

140 "What is the second planet of the solar system?" 

141 ]}) 

142 self.set_handler(mock_handler, name="pg", tables={"df": df}) 

143 

144 self.run_sql( 

145 """ 

146 CREATE MODEL proj.test_openai_bulk_full_flow_default_mode_prompt_template 

147 PREDICT answer 

148 USING 

149 engine='openai_engine', 

150 prompt_template='Answer this question and add "Boom!" to the end of the answer: {{{{question}}}}'; 

151 """ 

152 ) 

153 

154 self.wait_predictor("proj", "test_openai_bulk_full_flow_default_mode_prompt_template") 

155 

156 result_df = self.run_sql( 

157 """ 

158 SELECT p.answer 

159 FROM pg.df as t 

160 JOIN proj.test_openai_bulk_full_flow_default_mode_prompt_template as p; 

161 """ 

162 ) 

163 assert "stockholm" in result_df["answer"].iloc[0].lower() 

164 assert "boom!" in result_df["answer"].iloc[0].lower() 

165 assert "venus" in result_df["answer"].iloc[1].lower() 

166 assert "boom!" in result_df["answer"].iloc[1].lower() 

167 

168 def test_full_flow_in_embedding_mode_for_single_prediction_runs_no_errors(self): 

169 """ 

170 Test the full flow in embedding mode for a single prediction. 

171 """ 

172 self.run_sql( 

173 """ 

174 CREATE MODEL proj.test_openai_single_full_flow_embedding_mode 

175 PREDICT answer 

176 USING 

177 engine='openai_engine', 

178 mode='embedding', 

179 model_name = 'text-embedding-ada-002', 

180 question_column = 'text'; 

181 """ 

182 ) 

183 

184 self.wait_predictor("proj", "test_openai_single_full_flow_embedding_mode") 

185 

186 result_df = self.run_sql( 

187 """ 

188 SELECT answer 

189 FROM proj.test_openai_single_full_flow_embedding_mode 

190 WHERE text='Sweden' 

191 """ 

192 ) 

193 assert type(result_df["answer"].iloc[0]) == list 

194 assert type(result_df["answer"].iloc[0][0]) == float 

195 

196 @patch("mindsdb.integrations.handlers.postgres_handler.Handler") 

197 def test_full_flow_in_embedding_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): 

198 """ 

199 Test the full flow in embedding mode for bulk predictions. 

200 """ 

201 df = pd.DataFrame.from_dict({"text": [ 

202 "Sweden", 

203 "Venus" 

204 ]}) 

205 self.set_handler(mock_handler, name="pg", tables={"df": df}) 

206 

207 self.run_sql( 

208 """ 

209 CREATE MODEL proj.test_openai_bulk_full_flow_embedding_mode 

210 PREDICT answer 

211 USING 

212 engine='openai_engine', 

213 mode='embedding', 

214 model_name = 'text-embedding-ada-002', 

215 question_column = 'text'; 

216 """ 

217 ) 

218 

219 self.wait_predictor("proj", "test_openai_bulk_full_flow_embedding_mode") 

220 

221 result_df = self.run_sql( 

222 """ 

223 SELECT p.answer 

224 FROM pg.df as t 

225 JOIN proj.test_openai_bulk_full_flow_embedding_mode as p; 

226 """ 

227 ) 

228 assert type(result_df["answer"].iloc[0]) == list 

229 assert type(result_df["answer"].iloc[0][0]) == float 

230 assert type(result_df["answer"].iloc[1]) == list 

231 assert type(result_df["answer"].iloc[1][0]) == float 

232 

233 def test_full_flow_in_image_mode_for_single_prediction_runs_no_errors(self): 

234 """ 

235 Test the full flow in image mode for a single prediction. 

236 """ 

237 self.run_sql( 

238 """ 

239 CREATE MODEL proj.test_openai_single_full_flow_image_mode 

240 PREDICT answer 

241 USING 

242 engine='openai_engine', 

243 mode='image', 

244 prompt_template='Generate an image for: {{{{text}}}}' 

245 """ 

246 ) 

247 

248 self.wait_predictor("proj", "test_openai_single_full_flow_image_mode") 

249 

250 result_df = self.run_sql( 

251 """ 

252 SELECT answer 

253 FROM proj.test_openai_single_full_flow_image_mode 

254 WHERE text='Leopard clubs playing in the jungle' 

255 """ 

256 ) 

257 assert type(result_df["answer"].iloc[0]) == str 

258 

259 @patch("mindsdb.integrations.handlers.postgres_handler.Handler") 

260 def test_full_flow_in_image_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): 

261 """ 

262 Test the full flow in image mode for bulk predictions. 

263 """ 

264 df = pd.DataFrame.from_dict({"text": [ 

265 "Leopard clubs playing in the jungle", 

266 "A beautiful sunset over the ocean" 

267 ]}) 

268 self.set_handler(mock_handler, name="pg", tables={"df": df}) 

269 

270 self.run_sql( 

271 """ 

272 CREATE MODEL proj.test_openai_bulk_full_flow_image_mode 

273 PREDICT answer 

274 USING 

275 engine='openai_engine', 

276 mode='image', 

277 prompt_template='Generate an image for: {{{{text}}}}' 

278 """ 

279 ) 

280 

281 self.wait_predictor("proj", "test_openai_bulk_full_flow_image_mode") 

282 

283 result_df = self.run_sql( 

284 """ 

285 SELECT p.answer 

286 FROM pg.df as t 

287 JOIN proj.test_openai_bulk_full_flow_image_mode as p; 

288 """ 

289 ) 

290 assert type(result_df["answer"].iloc[0]) == str 

291 assert type(result_df["answer"].iloc[1]) == str 

292 

293 def test_full_flow_in_conversational_for_single_prediction_mode_runs_no_errors(self): 

294 """ 

295 Test the full flow in conversational mode for a single prediction. 

296 """ 

297 self.run_sql( 

298 """ 

299 CREATE MODEL proj.test_openai_single_full_flow_conversational_mode 

300 PREDICT answer 

301 USING 

302 engine='openai_engine', 

303 mode='conversational', 

304 user_column='question', 

305 prompt='you are a helpful assistant', 

306 assistant_column='answer'; 

307 """ 

308 ) 

309 

310 self.wait_predictor("proj", "test_openai_single_full_flow_conversational_mode") 

311 

312 result_df = self.run_sql( 

313 """ 

314 SELECT answer 

315 FROM proj.test_openai_single_full_flow_conversational_mode 

316 WHERE question='What is the capital of Sweden?' 

317 """ 

318 ) 

319 assert "stockholm" in result_df["answer"].iloc[0].lower() 

320 

321 @patch("mindsdb.integrations.handlers.postgres_handler.Handler") 

322 def test_full_flow_in_conversational_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): 

323 """ 

324 Test the full flow in conversational mode for bulk predictions. 

325 """ 

326 df = pd.DataFrame.from_dict({"question": [ 

327 "What is the capital of Sweden?", 

328 "What are some cool places to visit there?" 

329 ]}) 

330 self.set_handler(mock_handler, name="pg", tables={"df": df}) 

331 

332 self.run_sql( 

333 """ 

334 CREATE MODEL proj.test_openai_bulk_full_flow_conversational_mode 

335 PREDICT answer 

336 USING 

337 engine='openai_engine', 

338 mode='conversational', 

339 user_column='question', 

340 prompt='you are a helpful assistant', 

341 assistant_column='answer'; 

342 """ 

343 ) 

344 

345 self.wait_predictor("proj", "test_openai_bulk_full_flow_conversational_mode") 

346 

347 result_df = self.run_sql( 

348 """ 

349 SELECT p.answer 

350 FROM pg.df as t 

351 JOIN proj.test_openai_bulk_full_flow_conversational_mode as p; 

352 """ 

353 ) 

354 assert result_df["answer"].iloc[0] == "" 

355 assert "gamla stan" in result_df["answer"].iloc[1].lower() 

356 

357 def test_full_flow_in_conversational_full_mode_for_single_prediction_runs_no_errors(self): 

358 """ 

359 Test the full flow in conversational-full mode for a single prediction. 

360 """ 

361 self.run_sql( 

362 """ 

363 CREATE MODEL proj.test_openai_single_full_flow_conversational_full_mode 

364 PREDICT answer 

365 USING 

366 engine='openai_engine', 

367 mode='conversational-full', 

368 user_column='question', 

369 prompt='you are a helpful assistant', 

370 assistant_column='answer'; 

371 """ 

372 ) 

373 

374 self.wait_predictor("proj", "test_openai_single_full_flow_conversational_full_mode") 

375 

376 result_df = self.run_sql( 

377 """ 

378 SELECT answer 

379 FROM proj.test_openai_single_full_flow_conversational_full_mode 

380 WHERE question='What is the capital of Sweden?' 

381 """ 

382 ) 

383 assert "stockholm" in result_df["answer"].iloc[0].lower() 

384 

385 @patch("mindsdb.integrations.handlers.postgres_handler.Handler") 

386 def test_full_flow_in_conversational_full_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): 

387 """ 

388 Test the full flow in conversational-full mode for bulk predictions. 

389 """ 

390 df = pd.DataFrame.from_dict({"question": [ 

391 "What is the capital of Sweden?", 

392 "What are some cool places to visit there?" 

393 ]}) 

394 self.set_handler(mock_handler, name="pg", tables={"df": df}) 

395 

396 self.run_sql( 

397 """ 

398 CREATE MODEL proj.test_openai_bulk_full_flow_conversational_full_mode 

399 PREDICT answer 

400 USING 

401 engine='openai_engine', 

402 mode='conversational-full', 

403 user_column='question', 

404 prompt='you are a helpful assistant', 

405 assistant_column='answer'; 

406 """ 

407 ) 

408 

409 self.wait_predictor("proj", "test_openai_bulk_full_flow_conversational_full_mode") 

410 

411 result_df = self.run_sql( 

412 """ 

413 SELECT p.answer 

414 FROM pg.df as t 

415 JOIN proj.test_openai_bulk_full_flow_conversational_full_mode as p; 

416 """ 

417 ) 

418 assert "stockholm" in result_df["answer"].iloc[0].lower() 

419 assert "gamla stan" in result_df["answer"].iloc[1].lower() 

420 

421 # TODO: Fix this test for fine-tuning 

422 # @patch("mindsdb.integrations.handlers.postgres_handler.Handler") 

423 # def test_full_flow_finetune_runs_no_errors(self, mock_handler): 

424 # """ 

425 # Test the full flow for finetuning a model and making a prediction. 

426 # """ 

427 # df = pd.DataFrame.from_dict( 

428 # { 

429 # "prompt": [ 

430 # "What is the SQL syntax to connect a database to MindsDB?", 

431 # "What is the SQL command to connect to the demo postgres database for MindsDB learning hub examples?", 

432 # "What is the SQL syntax to create a MindsDB machine learning model?", 

433 # "What is the SQL syntax to join input data with predictions from a MindsDB machine learning model?" 

434 # ], 

435 # "completion": [ 

436 # """ 

437 # CREATE DATABASE datasource_name 

438 # [WITH] [ENGINE [=] engine_name] [,] 

439 # [PARAMETERS [=] { 

440 # "key": "value", 

441 # ... 

442 # }]; 

443 # """, 

444 # """ 

445 # CREATE DATABASE example_db 

446 # WITH 

447 # ENGINE = "postgres", 

448 # PARAMETERS = { 

449 # "user": "demo_user", 

450 # "password": "demo_password", 

451 # "host": "samples.mindsdb.com", 

452 # "port": "5432", 

453 # "database": "demo" 

454 # }; 

455 # """, 

456 # """ 

457 # CREATE MODEL 

458 # mindsdb.home_rentals_model 

459 # FROM example_db 

460 # (SELECT * FROM demo_data.home_rentals) 

461 # PREDICT rental_price; 

462 # """, 

463 # """ 

464 # SELECT t.column_name, p.column_name, ... 

465 # FROM integration_name.table_name [AS] t 

466 # JOIN project_name.model_name [AS] p; 

467 # """ 

468 # ] 

469 

470 # } 

471 # ) 

472 # self.set_handler(mock_handler, name="pg", tables={"df": df}) 

473 

474 # self.run_sql( 

475 # f""" 

476 # CREATE MODEL proj.test_openai_full_flow_finetune 

477 # PREDICT completion 

478 # USING 

479 # engine = 'openai_engine', 

480 # model_name = 'davinci-002', 

481 # prompt_template = 'Return a valid SQL string for the following question about MindsDB in-database machine learning: {{{{prompt}}}}'; 

482 # """ 

483 # ) 

484 

485 # self.wait_predictor("proj", "test_openai_full_flow_finetune") 

486 

487 # self.run_sql( 

488 # """ 

489 # FINETUNE proj.test_openai_full_flow_finetune 

490 # FROM pg 

491 # (SELECT prompt, completion FROM df); 

492 # """ 

493 # ) 

494 

495 # self.wait_predictor("proj", "test_openai_full_flow_finetune", finetune=True) 

496 

497 # result_df = self.run_sql( 

498 # """ 

499 # SELECT prompt, completion 

500 # FROM proj.test_openai_full_flow_finetune 

501 # WHERE prompt = 'What is the SQL syntax to join input data with predictions from a MindsDB machine learning model?' 

502 # USING max_tokens=400; 

503 # """ 

504 # ) 

505 # assert "SELECT t.column_name, p.column_name, ..." in result_df["completion"].iloc[0].lower() 

506 

507 

508if __name__ == "__main__": 

509 pytest.main([__file__])