Coverage for mindsdb / interfaces / skills / custom / text2sql / mindsdb_sql_toolkit.py: 0%
31 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1from typing import List
2from textwrap import dedent
3from datetime import datetime
5from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
6from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
7from langchain_core.tools import BaseTool
9from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_tool import MindsDBSQLParserTool
10from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
11 KnowledgeBaseListTool,
12 KnowledgeBaseInfoTool,
13 KnowledgeBaseQueryTool,
14)
17class MindsDBSQLToolkit(SQLDatabaseToolkit):
18 include_tables_tools: bool = True
19 include_knowledge_base_tools: bool = True
21 def get_tools(self, prefix="") -> List[BaseTool]:
22 current_date_time = datetime.now().strftime("%Y-%m-%d %H:%M")
24 """Get the tools in the toolkit."""
25 list_sql_database_tool = ListSQLDatabaseTool(
26 name=f"sql_db_list_tables{prefix}",
27 db=self.db,
28 description=dedent(
29 """\n
30 Input is an empty string, output is a comma-separated list of tables in the database. Each table name is escaped using backticks.
31 Each table name in the list may be in one of two formats: database_name.`table_name` or database_name.schema_name.`table_name`.
32 Table names in response to the user must be escaped using backticks.
33 """
34 ),
35 )
37 info_sql_database_tool_description = (
38 "Input: A comma-separated list of tables enclosed between the symbols $START$ and $STOP$. The tables names itself must be escaped using backticks.\n"
39 "Output: Schema and sample rows for those tables. \n"
40 "Use this tool to investigate table schemas for needed columns. "
41 f"Ensure tables exist by calling {list_sql_database_tool.name} first. "
42 # "The names of tables, schemas, and databases must be escaped using backticks. "
43 # "Always enclose the names of tables, schemas, and databases in backticks. "
44 "Get sample data with 'SELECT * FROM `database`.`table` LIMIT 3' before answering questions. \n"
45 "Example of correct Input:\n $START$ `database`.`table1`, `database`.`table2`, `database`.`table3` $STOP$\n"
46 " $START$ `table1` `table2` `table3` $STOP$\n"
47 "Example of wrong Input:\n $START$ `database.table1`, `database.table2`, `database.table3` $STOP$\n"
48 " $START$ table1 table2 table3 $STOP$\n"
49 )
50 info_sql_database_tool = InfoSQLDatabaseTool(
51 name=f"sql_db_schema{prefix}", db=self.db, description=info_sql_database_tool_description
52 )
54 query_sql_database_tool_description = dedent(
55 f"""\
56 Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
57 Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
58 This system is a highly intelligent and reliable SQL skill designed to work with databases.
59 Follow these instructions with utmost precision:
60 1. Final Response Format:
61 - Assume the frontend fully supports Markdown unless the user specifies otherwise.
62 - When the response contains data that fits a table format, present it as a properly formatted Markdown table
63 - Ensure clarity and proper structure for easy readability.
64 2. Sample Data:
65 - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
66 3. Categorical Data:
67 - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first.
68 - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses.
69 4. Result Limiting and Counting:
70 - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`.
71 - If the count is greater than 10, limit the query to return only 10 results initially.
72 - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
73 - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
74 5. Date Handling:
75 - **System current date and time: {current_date_time} (UTC or local timezone based on server settings).**
76 - **Always** use `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
77 - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
78 - Do not compare date values without casting columns to date.
79 - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
80 SELECT NOW() + INTERVAL 5 DAY;
81 SELECT NOW() - INTERVAL 3 HOUR;
82 SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
83 SELECT NOW() - INTERVAL 1 YEAR;
84 - Always run SELECT NOW() to retrieve the current date when answering current or relative to current date-related questions.
85 6. Query Best Practices:
86 - Always send only one query at a time.
87 - Always enclose the names of tables, schemas, and databases in backticks.
88 - The input SQL query must end with a semicolon.
89 - Query only necessary columns, not all.
90 - Use only existing column names from correct tables.
91 - Use database-specific syntax for date operations.
92 7. Error Handling:
93 - For errors, rewrite and retry the query.
94 - For 'Unknown column' errors, check table fields using info_sql_database_tool.
95 8. Identity and Purpose:
96 - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
97 - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
98 9. Important: you can use only mysql quoting rules to compose queries: backticks (`) for identifiers, and single quotes (') for constants
100 Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
101 """
102 )
104 query_sql_database_tool = QuerySQLDataBaseTool(
105 name=f"sql_db_query{prefix}", db=self.db, description=query_sql_database_tool_description
106 )
108 mindsdb_sql_parser_tool_description = (
109 "Use this tool to ensure a SQL query passes the MindsDB SQL parser. "
110 "If the query is not correct, it will be corrected and returned. Use the new query. "
111 "If the query can't be corrected, an error is returned. In this case, rewrite and retry. "
112 "If the query is correct, it will be parsed and returned. "
113 f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
114 )
115 mindsdb_sql_parser_tool = MindsDBSQLParserTool( # noqa: F841
116 name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
117 )
119 sql_tools = [
120 query_sql_database_tool,
121 info_sql_database_tool,
122 list_sql_database_tool,
123 # mindsdb_sql_parser_tool,
124 ]
125 if not self.include_knowledge_base_tools:
126 return sql_tools
128 # Knowledge base tools
129 kb_list_tool = KnowledgeBaseListTool(
130 name=f"kb_list_tool{prefix}",
131 db=self.db,
132 description=dedent(
133 """\
134 Lists all available knowledge bases that can be queried.
135 Input: No input required, just call the tool directly.
136 Output: A table of all available knowledge bases with their names and creation dates.
138 Use this tool first when answering factual questions to see what knowledge bases are available.
139 Each knowledge base name is escaped using backticks.
141 Example usage: kb_list_tool()
142 """
143 ),
144 )
146 kb_info_tool = KnowledgeBaseInfoTool(
147 name=f"kb_info_tool{prefix}",
148 db=self.db,
149 description=dedent(
150 f"""\
151 Gets detailed information about specific knowledge bases including their structure and metadata fields.
153 Input: A knowledge base name as a simple string.
154 Output: Schema, metadata columns, and sample rows for the specified knowledge base.
156 Use this after kb_list_tool to understand what information is contained in the knowledge base
157 and what metadata fields are available for filtering.
159 Example usage: kb_info_tool("kb_name")
161 Make sure the knowledge base exists by calling {kb_list_tool.name} first.
162 """
163 ),
164 )
166 kb_query_tool = KnowledgeBaseQueryTool(
167 name=f"kb_query_tool{prefix}",
168 db=self.db,
169 description=dedent(
170 f"""\
171 Queries knowledge bases using SQL syntax to retrieve relevant information.
173 Input: A SQL query string that targets a knowledge base.
174 Output: Knowledge base search results or error message.
176 This tool is designed for semantic search and metadata filtering in MindsDB knowledge bases.
178 Query Types and Examples:
179 1. Basic semantic search:
180 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'your search query';")
182 2. Metadata filtering:
183 kb_query_tool("SELECT * FROM kb_name WHERE metadata_field = 'value';")
185 3. Combined search:
186 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND metadata_field = 'value';")
188 4. Setting relevance threshold:
189 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND relevance_threshold = 0.7;")
191 5. Limiting results:
192 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' LIMIT 5;")
194 6. Getting sample data:
195 kb_query_tool("SELECT * FROM kb_name LIMIT 3;")
197 Result Format:
198 - Results include: id, chunk_id, chunk_content, metadata, distance, and relevance columns
199 - The metadata column contains a JSON object with all metadata fields
201 Best Practices:
202 - Always check available knowledge bases with {kb_list_tool.name} first
203 - Use {kb_info_tool.name} to understand the structure and metadata fields
204 - Always include a semicolon at the end of your SQL query
206 For factual questions, use this tool to retrieve information rather than relying on the model's knowledge.
207 """
208 ),
209 )
211 # Return standard SQL tools and knowledge base tools
212 kb_tools = [
213 kb_list_tool,
214 kb_info_tool,
215 kb_query_tool,
216 ]
218 if not self.include_tables_tools:
219 return kb_tools
220 else:
221 return sql_tools + kb_tools