Coverage for mindsdb/integrations/handlers/openai_handler/tests/test_openai

1import os

2import pytest

3import pandas as pd

4from unittest.mock import patch

6from tests.unit.ml_handlers.base_ml_test import BaseMLAPITest

9@pytest.mark.skipif(os.environ.get('MDB_TEST_MDB_OPENAI_API_KEY') is None, reason='Missing API key!')

10class TestOpenAI(BaseMLAPITest):

11 """

12 Integration tests for the OpenAI handler.

13 """

15 def setup_method(self):

16 """

17 Setup test environment by creating a project and an OpenAI engine.

18 """

19 super().setup_method()

20 self.run_sql("CREATE DATABASE proj")

21 self.run_sql(

22 f"""

23 CREATE ML_ENGINE openai_engine

24 FROM openai

25 USING

26 openai_api_key = '{self.get_api_key('MDB_TEST_MDB_OPENAI_API_KEY')}';

27 """

28 )

30 def test_create_model_with_unsupported_model_raises_exception(self):

31 """

32 Test if CREATE MODEL raises an exception with an unsupported model.

33 """

34 self.run_sql(

35 """

36 CREATE MODEL proj.test_openaai_unsupported_model_model

37 PREDICT answer

38 USING

39 engine='openai_engine',

40 model_name='this-model-does-not-exist',

41 prompt_template='dummy_prompt_template';

42 """

43 )

44 with pytest.raises(Exception) as excinfo:

45 self.wait_predictor("proj", "test_openaai_unsupported_model_model")

47 assert "Invalid model name." in str(excinfo.value)

49 def test_full_flow_in_default_mode_with_question_column_for_single_prediction_runs_no_errors(self):

50 """

51 Test the full flow in default mode with a question column for a single prediction.

52 """

53 self.run_sql(

54 """

55 CREATE MODEL proj.test_openai_single_full_flow_default_mode_question_column

56 PREDICT answer

57 USING

58 engine='openai_engine',

59 question_column='question';

60 """

61 )

63 self.wait_predictor("proj", "test_openai_single_full_flow_default_mode_question_column")

65 result_df = self.run_sql(

66 """

67 SELECT answer

68 FROM proj.test_openai_single_full_flow_default_mode_question_column

69 WHERE question='What is the capital of Sweden?'

70 """

71 )

72 assert "stockholm" in result_df["answer"].iloc[0].lower()

74 @patch("mindsdb.integrations.handlers.postgres_handler.Handler")

75 def test_full_flow_in_default_mode_with_question_column_for_bulk_predictions_runs_no_errors(self, mock_handler):

76 """

77 Test the full flow in default mode with a question column for bulk predictions.

78 """

79 df = pd.DataFrame.from_dict({"question": [

80 "What is the capital of Sweden?",

81 "What is the second planet of the solar system?"

82 ]})

83 self.set_handler(mock_handler, name="pg", tables={"df": df})

85 self.run_sql(

86 """

87 CREATE MODEL proj.test_openai_bulk_full_flow_default_mode_question_column

88 PREDICT answer

89 USING

90 engine='openai_engine',

91 question_column='question';

92 """

93 )

95 self.wait_predictor("proj", "test_openai_bulk_full_flow_default_mode_question_column")

97 result_df = self.run_sql(

98 """

99 SELECT p.answer

100 FROM pg.df as t

101 JOIN proj.test_openai_bulk_full_flow_default_mode_question_column as p;

102 """

103 )

104 assert "stockholm" in result_df["answer"].iloc[0].lower()

105 assert "venus" in result_df["answer"].iloc[1].lower()

106

107 def test_full_flow_in_default_mode_with_prompt_template_for_single_prediction_runs_no_errors(self):

108 """

109 Test the full flow in default mode with a prompt template for a single prediction.

110 """

111 self.run_sql(

112 """

113 CREATE MODEL proj.test_openai_single_full_flow_default_mode_prompt_template

114 PREDICT answer

115 USING

116 engine='openai_engine',

117 prompt_template='Answer this question and add "Boom!" to the end of the answer: {{{{question}}}}';

118 """

119 )

120

121 self.wait_predictor("proj", "test_openai_single_full_flow_default_mode_prompt_template")

122

123 result_df = self.run_sql(

124 """

125 SELECT answer

126 FROM proj.test_openai_single_full_flow_default_mode_prompt_template

127 WHERE question='What is the capital of Sweden?'

128 """

129 )

130 assert "stockholm" in result_df["answer"].iloc[0].lower()

131 assert "boom!" in result_df["answer"].iloc[0].lower()

132

133 @patch("mindsdb.integrations.handlers.postgres_handler.Handler")

134 def test_full_flow_in_default_mode_with_prompt_template_for_bulk_predictions_runs_no_errors(self, mock_handler):

135 """

136 Test the full flow in default mode with a prompt template for bulk predictions.

137 """

138 df = pd.DataFrame.from_dict({"question": [

139 "What is the capital of Sweden?",

140 "What is the second planet of the solar system?"

141 ]})

142 self.set_handler(mock_handler, name="pg", tables={"df": df})

143

144 self.run_sql(

145 """

146 CREATE MODEL proj.test_openai_bulk_full_flow_default_mode_prompt_template

147 PREDICT answer

148 USING

149 engine='openai_engine',

150 prompt_template='Answer this question and add "Boom!" to the end of the answer: {{{{question}}}}';

151 """

152 )

153

154 self.wait_predictor("proj", "test_openai_bulk_full_flow_default_mode_prompt_template")

155

156 result_df = self.run_sql(

157 """

158 SELECT p.answer

159 FROM pg.df as t

160 JOIN proj.test_openai_bulk_full_flow_default_mode_prompt_template as p;

161 """

162 )

163 assert "stockholm" in result_df["answer"].iloc[0].lower()

164 assert "boom!" in result_df["answer"].iloc[0].lower()

165 assert "venus" in result_df["answer"].iloc[1].lower()

166 assert "boom!" in result_df["answer"].iloc[1].lower()

167

168 def test_full_flow_in_embedding_mode_for_single_prediction_runs_no_errors(self):

169 """

170 Test the full flow in embedding mode for a single prediction.

171 """

172 self.run_sql(

173 """

174 CREATE MODEL proj.test_openai_single_full_flow_embedding_mode

175 PREDICT answer

176 USING

177 engine='openai_engine',

178 mode='embedding',

179 model_name = 'text-embedding-ada-002',

180 question_column = 'text';

181 """

182 )

183

184 self.wait_predictor("proj", "test_openai_single_full_flow_embedding_mode")

185

186 result_df = self.run_sql(

187 """

188 SELECT answer

189 FROM proj.test_openai_single_full_flow_embedding_mode

190 WHERE text='Sweden'

191 """

192 )

193 assert type(result_df["answer"].iloc[0]) == list

194 assert type(result_df["answer"].iloc[0][0]) == float

195

196 @patch("mindsdb.integrations.handlers.postgres_handler.Handler")

197 def test_full_flow_in_embedding_mode_for_bulk_predictions_runs_no_errors(self, mock_handler):

198 """

199 Test the full flow in embedding mode for bulk predictions.

200 """

201 df = pd.DataFrame.from_dict({"text": [

202 "Sweden",

203 "Venus"

204 ]})

205 self.set_handler(mock_handler, name="pg", tables={"df": df})

206

207 self.run_sql(

208 """

209 CREATE MODEL proj.test_openai_bulk_full_flow_embedding_mode

210 PREDICT answer

211 USING

212 engine='openai_engine',

213 mode='embedding',

214 model_name = 'text-embedding-ada-002',

215 question_column = 'text';

216 """

217 )

218

219 self.wait_predictor("proj", "test_openai_bulk_full_flow_embedding_mode")

220

221 result_df = self.run_sql(

222 """

223 SELECT p.answer

224 FROM pg.df as t

225 JOIN proj.test_openai_bulk_full_flow_embedding_mode as p;

226 """

227 )

228 assert type(result_df["answer"].iloc[0]) == list

229 assert type(result_df["answer"].iloc[0][0]) == float

230 assert type(result_df["answer"].iloc[1]) == list

231 assert type(result_df["answer"].iloc[1][0]) == float

232

233 def test_full_flow_in_image_mode_for_single_prediction_runs_no_errors(self):

234 """

235 Test the full flow in image mode for a single prediction.

236 """

237 self.run_sql(

238 """

239 CREATE MODEL proj.test_openai_single_full_flow_image_mode

240 PREDICT answer

241 USING

242 engine='openai_engine',

243 mode='image',

244 prompt_template='Generate an image for: {{{{text}}}}'

245 """

246 )

247

248 self.wait_predictor("proj", "test_openai_single_full_flow_image_mode")

249

250 result_df = self.run_sql(

251 """

252 SELECT answer

253 FROM proj.test_openai_single_full_flow_image_mode

254 WHERE text='Leopard clubs playing in the jungle'

255 """

256 )

257 assert type(result_df["answer"].iloc[0]) == str

258

259 @patch("mindsdb.integrations.handlers.postgres_handler.Handler")

260 def test_full_flow_in_image_mode_for_bulk_predictions_runs_no_errors(self, mock_handler):

261 """

262 Test the full flow in image mode for bulk predictions.

263 """

264 df = pd.DataFrame.from_dict({"text": [

265 "Leopard clubs playing in the jungle",

266 "A beautiful sunset over the ocean"

267 ]})

268 self.set_handler(mock_handler, name="pg", tables={"df": df})

269

270 self.run_sql(

271 """

272 CREATE MODEL proj.test_openai_bulk_full_flow_image_mode

273 PREDICT answer

274 USING

275 engine='openai_engine',

276 mode='image',

277 prompt_template='Generate an image for: {{{{text}}}}'

278 """

279 )

280

281 self.wait_predictor("proj", "test_openai_bulk_full_flow_image_mode")

282

283 result_df = self.run_sql(

284 """

285 SELECT p.answer

286 FROM pg.df as t

287 JOIN proj.test_openai_bulk_full_flow_image_mode as p;

288 """

289 )

290 assert type(result_df["answer"].iloc[0]) == str

291 assert type(result_df["answer"].iloc[1]) == str

292

293 def test_full_flow_in_conversational_for_single_prediction_mode_runs_no_errors(self):

294 """

295 Test the full flow in conversational mode for a single prediction.

296 """

297 self.run_sql(

298 """

299 CREATE MODEL proj.test_openai_single_full_flow_conversational_mode

300 PREDICT answer

301 USING

302 engine='openai_engine',

303 mode='conversational',

304 user_column='question',

305 prompt='you are a helpful assistant',

306 assistant_column='answer';

307 """

308 )

309

310 self.wait_predictor("proj", "test_openai_single_full_flow_conversational_mode")

311

312 result_df = self.run_sql(

313 """

314 SELECT answer

315 FROM proj.test_openai_single_full_flow_conversational_mode

316 WHERE question='What is the capital of Sweden?'

317 """

318 )

319 assert "stockholm" in result_df["answer"].iloc[0].lower()

320

321 @patch("mindsdb.integrations.handlers.postgres_handler.Handler")

322 def test_full_flow_in_conversational_mode_for_bulk_predictions_runs_no_errors(self, mock_handler):

323 """

324 Test the full flow in conversational mode for bulk predictions.

325 """

326 df = pd.DataFrame.from_dict({"question": [

327 "What is the capital of Sweden?",

328 "What are some cool places to visit there?"

329 ]})

330 self.set_handler(mock_handler, name="pg", tables={"df": df})

331

332 self.run_sql(

333 """

334 CREATE MODEL proj.test_openai_bulk_full_flow_conversational_mode

335 PREDICT answer

336 USING

337 engine='openai_engine',

338 mode='conversational',

339 user_column='question',

340 prompt='you are a helpful assistant',

341 assistant_column='answer';

342 """

343 )

344

345 self.wait_predictor("proj", "test_openai_bulk_full_flow_conversational_mode")

346

347 result_df = self.run_sql(

348 """

349 SELECT p.answer

350 FROM pg.df as t

351 JOIN proj.test_openai_bulk_full_flow_conversational_mode as p;

352 """

353 )

354 assert result_df["answer"].iloc[0] == ""

355 assert "gamla stan" in result_df["answer"].iloc[1].lower()

356

357 def test_full_flow_in_conversational_full_mode_for_single_prediction_runs_no_errors(self):

358 """

359 Test the full flow in conversational-full mode for a single prediction.

360 """

361 self.run_sql(

362 """

363 CREATE MODEL proj.test_openai_single_full_flow_conversational_full_mode

364 PREDICT answer

365 USING

366 engine='openai_engine',

367 mode='conversational-full',

368 user_column='question',

369 prompt='you are a helpful assistant',

370 assistant_column='answer';

371 """

372 )

373

374 self.wait_predictor("proj", "test_openai_single_full_flow_conversational_full_mode")

375

376 result_df = self.run_sql(

377 """

378 SELECT answer

379 FROM proj.test_openai_single_full_flow_conversational_full_mode

380 WHERE question='What is the capital of Sweden?'

381 """

382 )

383 assert "stockholm" in result_df["answer"].iloc[0].lower()

384

385 @patch("mindsdb.integrations.handlers.postgres_handler.Handler")

386 def test_full_flow_in_conversational_full_mode_for_bulk_predictions_runs_no_errors(self, mock_handler):

387 """

388 Test the full flow in conversational-full mode for bulk predictions.

389 """

390 df = pd.DataFrame.from_dict({"question": [

391 "What is the capital of Sweden?",

392 "What are some cool places to visit there?"

393 ]})

394 self.set_handler(mock_handler, name="pg", tables={"df": df})

395

396 self.run_sql(

397 """

398 CREATE MODEL proj.test_openai_bulk_full_flow_conversational_full_mode

399 PREDICT answer

400 USING

401 engine='openai_engine',

402 mode='conversational-full',

403 user_column='question',

404 prompt='you are a helpful assistant',

405 assistant_column='answer';

406 """

407 )

408

409 self.wait_predictor("proj", "test_openai_bulk_full_flow_conversational_full_mode")

410

411 result_df = self.run_sql(

412 """

413 SELECT p.answer

414 FROM pg.df as t

415 JOIN proj.test_openai_bulk_full_flow_conversational_full_mode as p;

416 """

417 )

418 assert "stockholm" in result_df["answer"].iloc[0].lower()

419 assert "gamla stan" in result_df["answer"].iloc[1].lower()

420

421 # TODO: Fix this test for fine-tuning

422 # @patch("mindsdb.integrations.handlers.postgres_handler.Handler")

423 # def test_full_flow_finetune_runs_no_errors(self, mock_handler):

424 # """

425 # Test the full flow for finetuning a model and making a prediction.

426 # """

427 # df = pd.DataFrame.from_dict(

428 # {

429 # "prompt": [

430 # "What is the SQL syntax to connect a database to MindsDB?",

431 # "What is the SQL command to connect to the demo postgres database for MindsDB learning hub examples?",

432 # "What is the SQL syntax to create a MindsDB machine learning model?",

433 # "What is the SQL syntax to join input data with predictions from a MindsDB machine learning model?"

434 # ],

435 # "completion": [

436 # """

437 # CREATE DATABASE datasource_name

438 # [WITH] [ENGINE [=] engine_name] [,]

439 # [PARAMETERS [=] {

440 # "key": "value",

441 # ...

442 # }];

443 # """,

444 # """

445 # CREATE DATABASE example_db

446 # WITH

447 # ENGINE = "postgres",

448 # PARAMETERS = {

449 # "user": "demo_user",

450 # "password": "demo_password",

451 # "host": "samples.mindsdb.com",

452 # "port": "5432",

453 # "database": "demo"

454 # };

455 # """,

456 # """

457 # CREATE MODEL

458 # mindsdb.home_rentals_model

459 # FROM example_db

460 # (SELECT * FROM demo_data.home_rentals)

461 # PREDICT rental_price;

462 # """,

463 # """

464 # SELECT t.column_name, p.column_name, ...

465 # FROM integration_name.table_name [AS] t

466 # JOIN project_name.model_name [AS] p;

467 # """

468 # ]

469

470 # }

471 # )

472 # self.set_handler(mock_handler, name="pg", tables={"df": df})

473

474 # self.run_sql(

475 # f"""

476 # CREATE MODEL proj.test_openai_full_flow_finetune

477 # PREDICT completion

478 # USING

479 # engine = 'openai_engine',

480 # model_name = 'davinci-002',

481 # prompt_template = 'Return a valid SQL string for the following question about MindsDB in-database machine learning: {{{{prompt}}}}';

482 # """

483 # )

484

485 # self.wait_predictor("proj", "test_openai_full_flow_finetune")

486

487 # self.run_sql(

488 # """

489 # FINETUNE proj.test_openai_full_flow_finetune

490 # FROM pg

491 # (SELECT prompt, completion FROM df);

492 # """

493 # )

494

495 # self.wait_predictor("proj", "test_openai_full_flow_finetune", finetune=True)

496

497 # result_df = self.run_sql(

498 # """

499 # SELECT prompt, completion

500 # FROM proj.test_openai_full_flow_finetune

501 # WHERE prompt = 'What is the SQL syntax to join input data with predictions from a MindsDB machine learning model?'

502 # USING max_tokens=400;

503 # """

504 # )

505 # assert "SELECT t.column_name, p.column_name, ..." in result_df["completion"].iloc[0].lower()

506

507

508if __name__ == "__main__":

509 pytest.main([__file__])

Coverage for mindsdb / integrations / handlers / openai_handler / tests / test_openai_handler.py: 0%

108 statements