Coverage for mindsdb / interfaces / skills / custom / text2sql / mindsdb_sql_toolkit.py: 0%

31 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1from typing import List 

2from textwrap import dedent 

3from datetime import datetime 

4 

5from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit 

6from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool 

7from langchain_core.tools import BaseTool 

8 

9from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_tool import MindsDBSQLParserTool 

10from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import ( 

11 KnowledgeBaseListTool, 

12 KnowledgeBaseInfoTool, 

13 KnowledgeBaseQueryTool, 

14) 

15 

16 

17class MindsDBSQLToolkit(SQLDatabaseToolkit): 

18 include_tables_tools: bool = True 

19 include_knowledge_base_tools: bool = True 

20 

21 def get_tools(self, prefix="") -> List[BaseTool]: 

22 current_date_time = datetime.now().strftime("%Y-%m-%d %H:%M") 

23 

24 """Get the tools in the toolkit.""" 

25 list_sql_database_tool = ListSQLDatabaseTool( 

26 name=f"sql_db_list_tables{prefix}", 

27 db=self.db, 

28 description=dedent( 

29 """\n 

30 Input is an empty string, output is a comma-separated list of tables in the database. Each table name is escaped using backticks. 

31 Each table name in the list may be in one of two formats: database_name.`table_name` or database_name.schema_name.`table_name`. 

32 Table names in response to the user must be escaped using backticks. 

33 """ 

34 ), 

35 ) 

36 

37 info_sql_database_tool_description = ( 

38 "Input: A comma-separated list of tables enclosed between the symbols $START$ and $STOP$. The tables names itself must be escaped using backticks.\n" 

39 "Output: Schema and sample rows for those tables. \n" 

40 "Use this tool to investigate table schemas for needed columns. " 

41 f"Ensure tables exist by calling {list_sql_database_tool.name} first. " 

42 # "The names of tables, schemas, and databases must be escaped using backticks. " 

43 # "Always enclose the names of tables, schemas, and databases in backticks. " 

44 "Get sample data with 'SELECT * FROM `database`.`table` LIMIT 3' before answering questions. \n" 

45 "Example of correct Input:\n $START$ `database`.`table1`, `database`.`table2`, `database`.`table3` $STOP$\n" 

46 " $START$ `table1` `table2` `table3` $STOP$\n" 

47 "Example of wrong Input:\n $START$ `database.table1`, `database.table2`, `database.table3` $STOP$\n" 

48 " $START$ table1 table2 table3 $STOP$\n" 

49 ) 

50 info_sql_database_tool = InfoSQLDatabaseTool( 

51 name=f"sql_db_schema{prefix}", db=self.db, description=info_sql_database_tool_description 

52 ) 

53 

54 query_sql_database_tool_description = dedent( 

55 f"""\ 

56 Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$. 

57 Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields. 

58 This system is a highly intelligent and reliable SQL skill designed to work with databases. 

59 Follow these instructions with utmost precision: 

60 1. Final Response Format: 

61 - Assume the frontend fully supports Markdown unless the user specifies otherwise. 

62 - When the response contains data that fits a table format, present it as a properly formatted Markdown table 

63 - Ensure clarity and proper structure for easy readability. 

64 2. Sample Data: 

65 - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers. 

66 3. Categorical Data: 

67 - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first. 

68 - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses. 

69 4. Result Limiting and Counting: 

70 - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`. 

71 - If the count is greater than 10, limit the query to return only 10 results initially. 

72 - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results. 

73 - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped. 

74 5. Date Handling: 

75 - **System current date and time: {current_date_time} (UTC or local timezone based on server settings).** 

76 - **Always** use `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date. 

77 - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..` 

78 - Do not compare date values without casting columns to date. 

79 - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples: 

80 SELECT NOW() + INTERVAL 5 DAY; 

81 SELECT NOW() - INTERVAL 3 HOUR; 

82 SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY; 

83 SELECT NOW() - INTERVAL 1 YEAR; 

84 - Always run SELECT NOW() to retrieve the current date when answering current or relative to current date-related questions. 

85 6. Query Best Practices: 

86 - Always send only one query at a time. 

87 - Always enclose the names of tables, schemas, and databases in backticks. 

88 - The input SQL query must end with a semicolon. 

89 - Query only necessary columns, not all. 

90 - Use only existing column names from correct tables. 

91 - Use database-specific syntax for date operations. 

92 7. Error Handling: 

93 - For errors, rewrite and retry the query. 

94 - For 'Unknown column' errors, check table fields using info_sql_database_tool. 

95 8. Identity and Purpose: 

96 - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions. 

97 - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data. 

98 9. Important: you can use only mysql quoting rules to compose queries: backticks (`) for identifiers, and single quotes (') for constants 

99 

100 Adhere to these guidelines for all queries and responses. Ask for clarification if needed. 

101 """ 

102 ) 

103 

104 query_sql_database_tool = QuerySQLDataBaseTool( 

105 name=f"sql_db_query{prefix}", db=self.db, description=query_sql_database_tool_description 

106 ) 

107 

108 mindsdb_sql_parser_tool_description = ( 

109 "Use this tool to ensure a SQL query passes the MindsDB SQL parser. " 

110 "If the query is not correct, it will be corrected and returned. Use the new query. " 

111 "If the query can't be corrected, an error is returned. In this case, rewrite and retry. " 

112 "If the query is correct, it will be parsed and returned. " 

113 f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. " 

114 ) 

115 mindsdb_sql_parser_tool = MindsDBSQLParserTool( # noqa: F841 

116 name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description 

117 ) 

118 

119 sql_tools = [ 

120 query_sql_database_tool, 

121 info_sql_database_tool, 

122 list_sql_database_tool, 

123 # mindsdb_sql_parser_tool, 

124 ] 

125 if not self.include_knowledge_base_tools: 

126 return sql_tools 

127 

128 # Knowledge base tools 

129 kb_list_tool = KnowledgeBaseListTool( 

130 name=f"kb_list_tool{prefix}", 

131 db=self.db, 

132 description=dedent( 

133 """\ 

134 Lists all available knowledge bases that can be queried. 

135 Input: No input required, just call the tool directly. 

136 Output: A table of all available knowledge bases with their names and creation dates. 

137 

138 Use this tool first when answering factual questions to see what knowledge bases are available. 

139 Each knowledge base name is escaped using backticks. 

140 

141 Example usage: kb_list_tool() 

142 """ 

143 ), 

144 ) 

145 

146 kb_info_tool = KnowledgeBaseInfoTool( 

147 name=f"kb_info_tool{prefix}", 

148 db=self.db, 

149 description=dedent( 

150 f"""\ 

151 Gets detailed information about specific knowledge bases including their structure and metadata fields. 

152 

153 Input: A knowledge base name as a simple string. 

154 Output: Schema, metadata columns, and sample rows for the specified knowledge base. 

155 

156 Use this after kb_list_tool to understand what information is contained in the knowledge base 

157 and what metadata fields are available for filtering. 

158 

159 Example usage: kb_info_tool("kb_name") 

160 

161 Make sure the knowledge base exists by calling {kb_list_tool.name} first. 

162 """ 

163 ), 

164 ) 

165 

166 kb_query_tool = KnowledgeBaseQueryTool( 

167 name=f"kb_query_tool{prefix}", 

168 db=self.db, 

169 description=dedent( 

170 f"""\ 

171 Queries knowledge bases using SQL syntax to retrieve relevant information. 

172 

173 Input: A SQL query string that targets a knowledge base. 

174 Output: Knowledge base search results or error message. 

175 

176 This tool is designed for semantic search and metadata filtering in MindsDB knowledge bases. 

177 

178 Query Types and Examples: 

179 1. Basic semantic search: 

180 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'your search query';") 

181 

182 2. Metadata filtering: 

183 kb_query_tool("SELECT * FROM kb_name WHERE metadata_field = 'value';") 

184 

185 3. Combined search: 

186 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND metadata_field = 'value';") 

187 

188 4. Setting relevance threshold: 

189 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND relevance_threshold = 0.7;") 

190 

191 5. Limiting results: 

192 kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' LIMIT 5;") 

193 

194 6. Getting sample data: 

195 kb_query_tool("SELECT * FROM kb_name LIMIT 3;") 

196 

197 Result Format: 

198 - Results include: id, chunk_id, chunk_content, metadata, distance, and relevance columns 

199 - The metadata column contains a JSON object with all metadata fields 

200 

201 Best Practices: 

202 - Always check available knowledge bases with {kb_list_tool.name} first 

203 - Use {kb_info_tool.name} to understand the structure and metadata fields 

204 - Always include a semicolon at the end of your SQL query 

205 

206 For factual questions, use this tool to retrieve information rather than relying on the model's knowledge. 

207 """ 

208 ), 

209 ) 

210 

211 # Return standard SQL tools and knowledge base tools 

212 kb_tools = [ 

213 kb_list_tool, 

214 kb_info_tool, 

215 kb_query_tool, 

216 ] 

217 

218 if not self.include_tables_tools: 

219 return kb_tools 

220 else: 

221 return sql_tools + kb_tools