Coverage for mindsdb / integrations / utilities / pydantic_utils.py: 0%
89 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import pprint
3pydantic_schema_description = """## Understanding Pydantic Schemas for JSON Formatting
5Pydantic schemas provide a framework for defining the structure and validation rules for JSON output. Below is an overview of key components commonly found in a Pydantic schema:
7### Key Components
9Each object in the schema represents a Pydantic model in JSON format. Typical fields in a Pydantic model description include:
11- **`anyOf`**:
12 - A list describing possible values for a Pydantic model field.
14- **`additionalProperties`**:
15 - Describes the keys of a dictionary. Keys are always of type `string` due to this being a JSON Pydantic schema. The corresponding key types supported by Pydantic are:
16 - `string`: a text string
17 - `integer`: an integer number
18 - `number`: a floating-point number
20- **`items`**:
21 - Describes the items contained within an `array` (list).
23- **`type`**:
24 - Specifies the Pydantic type assigned to the field, defining the expected data type. Common types include:
25 - `string`: a text string
26 - `integer`: an integer number
27 - `number`: a floating-point number
28 - `array`: a list
29 - `object`: a dictionary
30 - `null`: the python null value None. Indicates the field is optional.
32- **`description`**:
33 - Provides a textual narrative explaining the purpose and details of the output JSON field.
35- **`title`**:
36 - A Pydantic-generated, human-readable title for the field.
38- **`default`**:
39 - The default value for this field if no value is provided by the user.
41### Schema
43Below is the Pydantic schema:
45{schema}
47### Examples
49Below is an example of well-formed output adhering to this schema.
51- Dummy text strings are represented as "lorem ipsum."
53{example}
54"""
57def get_dummy_value(field_value):
58 """A function to return a dummy value of a Pydantic model field."""
59 type_str = field_value["type"]
60 example_dict = {
61 "string": "lorem ipsum",
62 "int": 3,
63 "number": 42.0,
64 "null": None,
65 "object": {"lorem ipsum": "lorem_ipsum"},
66 }
68 if type_str in example_dict:
69 return example_dict[type_str]
70 else:
71 return None
74def get_dummy_array(field_value):
75 """A function to return a dummy array of a Pydantic model field."""
76 items = field_value["items"]
78 if "type" in items:
79 if items["type"] == "null": # skip if null
80 pass
81 elif items["type"] == "array": # is it an array?
82 array_value = get_dummy_array(items)
83 elif (
84 items["type"] == "object" and "additionalProperties" in items
85 ): # is it a dict?
86 array_value = get_dummy_dict(items)
87 else: # it is a regular value!
88 array_value = get_dummy_value(items)
89 return [array_value for _ in range(2)]
91 elif "AnyOf" in field_value["items"]:
92 array_value = get_any_of(field_value["items"]) # can be one of many types
93 return [array_value for _ in range(2)]
95 else: # is it a pydantic class?
96 array_value = example_generator(items)
97 return [array_value for _ in range(2)]
100def get_dummy_dict(field_value):
101 """A function to return a dummy dictionary of a Pydantic model field."""
102 return get_dummy_value(field_value)
105def get_any_of(field_value):
106 """A function to return the first viable pydantic type of an Any() Pydantic model field."""
107 for any_of in field_value["anyOf"]:
108 if "type" in any_of:
109 if any_of["type"] == "null": # skip if null
110 continue
111 elif any_of["type"] == "array": # is it an array?
112 out = get_dummy_array(any_of)
113 return out
114 elif (
115 any_of["type"] == "object" and "additionalProperties" in any_of
116 ): # is it a dict?
117 out = get_dummy_dict(any_of)
118 return out
119 else: # it is a regular value!
120 out = get_dummy_value(any_of)
121 return out
122 else: # is it a pydantic class?
123 out = example_generator(any_of)
124 return out
127def example_generator(pydantic_json_schema):
128 """dynamically parse a pydantic object and generate an example of it's formatting."""
130 example_dict = {}
131 for schema_name, schema in pydantic_json_schema.items():
133 for field_name, field_value in schema.items():
134 if "type" in field_value:
136 if field_value["type"] == "array": # is it an array?
137 example_dict[field_name] = get_dummy_array(field_value)
139 elif (
140 field_value["type"] == "object"
141 and "additionalProperties" in field_value
142 ): # is it a dict?
143 example_dict[field_name] = get_dummy_dict(field_value)
145 else: # it is a regular value!
146 example_dict[field_name] = get_dummy_value(field_value)
148 elif "anyOf" in field_value:
149 example_dict[field_name] = get_any_of(field_value)
151 else: # it is a pydantic class
152 example_dict[field_name] = example_generator(field_value)
153 return example_dict
156def search_and_replace_refs(schema, defs, ref_skip={}, n=0):
157 """Dynamically substitute subclass references in a Pydantic object schema."""
158 for key, value in schema.items():
159 if key in ref_skip:
160 continue
161 if type(value) is dict:
162 if "$ref" in value:
163 definition_key = value["$ref"].split("/")[-1]
164 if definition_key in ref_skip:
165 schema[key] = {"type": "null"}
166 else:
167 schema[key] = {definition_key: defs[definition_key]["properties"]}
168 else:
169 search_and_replace_refs(value, defs, ref_skip, n + 1)
170 elif type(value) is list:
171 for val in value:
172 search_and_replace_refs(val, defs, ref_skip, n + 1)
175def remove_extraneous_fields(schema, ref_skip):
176 """Remove extraneous fields from object descriptions."""
177 reduced_schema = schema["properties"]
179 for ref in ref_skip.keys():
180 if ref in reduced_schema:
181 del reduced_schema[ref]
183 for key, value in reduced_schema.items():
184 if "title" in value:
185 del value["title"]
186 if "$defs" in value:
187 del value["$defs"]
188 if "required" in value:
189 del value["required"]
191 return reduced_schema
194def format_for_prompt(pydantic_object, ref_skip={}):
195 """Format a Pydantic object description for prompting an LLM."""
196 schema = {k: v for k, v in pydantic_object.schema().items()}
198 search_and_replace_refs(
199 schema=schema["properties"], defs=schema["$defs"], ref_skip=ref_skip, n=0
200 )
202 reduced_schema = remove_extraneous_fields(schema, ref_skip)
204 reduced_schema = {schema["title"]: reduced_schema}
206 out = pprint.pformat(reduced_schema)
208 return out, reduced_schema