Coverage for mindsdb / integrations / utilities / pydantic_utils.py: 0%

89 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1import pprint 

2 

3pydantic_schema_description = """## Understanding Pydantic Schemas for JSON Formatting 

4 

5Pydantic schemas provide a framework for defining the structure and validation rules for JSON output. Below is an overview of key components commonly found in a Pydantic schema: 

6 

7### Key Components 

8 

9Each object in the schema represents a Pydantic model in JSON format. Typical fields in a Pydantic model description include: 

10 

11- **`anyOf`**: 

12 - A list describing possible values for a Pydantic model field. 

13 

14- **`additionalProperties`**: 

15 - Describes the keys of a dictionary. Keys are always of type `string` due to this being a JSON Pydantic schema. The corresponding key types supported by Pydantic are: 

16 - `string`: a text string 

17 - `integer`: an integer number 

18 - `number`: a floating-point number 

19 

20- **`items`**: 

21 - Describes the items contained within an `array` (list). 

22 

23- **`type`**: 

24 - Specifies the Pydantic type assigned to the field, defining the expected data type. Common types include: 

25 - `string`: a text string 

26 - `integer`: an integer number 

27 - `number`: a floating-point number 

28 - `array`: a list 

29 - `object`: a dictionary 

30 - `null`: the python null value None. Indicates the field is optional. 

31 

32- **`description`**: 

33 - Provides a textual narrative explaining the purpose and details of the output JSON field. 

34 

35- **`title`**: 

36 - A Pydantic-generated, human-readable title for the field. 

37 

38- **`default`**: 

39 - The default value for this field if no value is provided by the user. 

40 

41### Schema 

42 

43Below is the Pydantic schema: 

44 

45{schema} 

46 

47### Examples 

48 

49Below is an example of well-formed output adhering to this schema. 

50 

51- Dummy text strings are represented as "lorem ipsum." 

52 

53{example} 

54""" 

55 

56 

57def get_dummy_value(field_value): 

58 """A function to return a dummy value of a Pydantic model field.""" 

59 type_str = field_value["type"] 

60 example_dict = { 

61 "string": "lorem ipsum", 

62 "int": 3, 

63 "number": 42.0, 

64 "null": None, 

65 "object": {"lorem ipsum": "lorem_ipsum"}, 

66 } 

67 

68 if type_str in example_dict: 

69 return example_dict[type_str] 

70 else: 

71 return None 

72 

73 

74def get_dummy_array(field_value): 

75 """A function to return a dummy array of a Pydantic model field.""" 

76 items = field_value["items"] 

77 

78 if "type" in items: 

79 if items["type"] == "null": # skip if null 

80 pass 

81 elif items["type"] == "array": # is it an array? 

82 array_value = get_dummy_array(items) 

83 elif ( 

84 items["type"] == "object" and "additionalProperties" in items 

85 ): # is it a dict? 

86 array_value = get_dummy_dict(items) 

87 else: # it is a regular value! 

88 array_value = get_dummy_value(items) 

89 return [array_value for _ in range(2)] 

90 

91 elif "AnyOf" in field_value["items"]: 

92 array_value = get_any_of(field_value["items"]) # can be one of many types 

93 return [array_value for _ in range(2)] 

94 

95 else: # is it a pydantic class? 

96 array_value = example_generator(items) 

97 return [array_value for _ in range(2)] 

98 

99 

100def get_dummy_dict(field_value): 

101 """A function to return a dummy dictionary of a Pydantic model field.""" 

102 return get_dummy_value(field_value) 

103 

104 

105def get_any_of(field_value): 

106 """A function to return the first viable pydantic type of an Any() Pydantic model field.""" 

107 for any_of in field_value["anyOf"]: 

108 if "type" in any_of: 

109 if any_of["type"] == "null": # skip if null 

110 continue 

111 elif any_of["type"] == "array": # is it an array? 

112 out = get_dummy_array(any_of) 

113 return out 

114 elif ( 

115 any_of["type"] == "object" and "additionalProperties" in any_of 

116 ): # is it a dict? 

117 out = get_dummy_dict(any_of) 

118 return out 

119 else: # it is a regular value! 

120 out = get_dummy_value(any_of) 

121 return out 

122 else: # is it a pydantic class? 

123 out = example_generator(any_of) 

124 return out 

125 

126 

127def example_generator(pydantic_json_schema): 

128 """dynamically parse a pydantic object and generate an example of it's formatting.""" 

129 

130 example_dict = {} 

131 for schema_name, schema in pydantic_json_schema.items(): 

132 

133 for field_name, field_value in schema.items(): 

134 if "type" in field_value: 

135 

136 if field_value["type"] == "array": # is it an array? 

137 example_dict[field_name] = get_dummy_array(field_value) 

138 

139 elif ( 

140 field_value["type"] == "object" 

141 and "additionalProperties" in field_value 

142 ): # is it a dict? 

143 example_dict[field_name] = get_dummy_dict(field_value) 

144 

145 else: # it is a regular value! 

146 example_dict[field_name] = get_dummy_value(field_value) 

147 

148 elif "anyOf" in field_value: 

149 example_dict[field_name] = get_any_of(field_value) 

150 

151 else: # it is a pydantic class 

152 example_dict[field_name] = example_generator(field_value) 

153 return example_dict 

154 

155 

156def search_and_replace_refs(schema, defs, ref_skip={}, n=0): 

157 """Dynamically substitute subclass references in a Pydantic object schema.""" 

158 for key, value in schema.items(): 

159 if key in ref_skip: 

160 continue 

161 if type(value) is dict: 

162 if "$ref" in value: 

163 definition_key = value["$ref"].split("/")[-1] 

164 if definition_key in ref_skip: 

165 schema[key] = {"type": "null"} 

166 else: 

167 schema[key] = {definition_key: defs[definition_key]["properties"]} 

168 else: 

169 search_and_replace_refs(value, defs, ref_skip, n + 1) 

170 elif type(value) is list: 

171 for val in value: 

172 search_and_replace_refs(val, defs, ref_skip, n + 1) 

173 

174 

175def remove_extraneous_fields(schema, ref_skip): 

176 """Remove extraneous fields from object descriptions.""" 

177 reduced_schema = schema["properties"] 

178 

179 for ref in ref_skip.keys(): 

180 if ref in reduced_schema: 

181 del reduced_schema[ref] 

182 

183 for key, value in reduced_schema.items(): 

184 if "title" in value: 

185 del value["title"] 

186 if "$defs" in value: 

187 del value["$defs"] 

188 if "required" in value: 

189 del value["required"] 

190 

191 return reduced_schema 

192 

193 

194def format_for_prompt(pydantic_object, ref_skip={}): 

195 """Format a Pydantic object description for prompting an LLM.""" 

196 schema = {k: v for k, v in pydantic_object.schema().items()} 

197 

198 search_and_replace_refs( 

199 schema=schema["properties"], defs=schema["$defs"], ref_skip=ref_skip, n=0 

200 ) 

201 

202 reduced_schema = remove_extraneous_fields(schema, ref_skip) 

203 

204 reduced_schema = {schema["title"]: reduced_schema} 

205 

206 out = pprint.pformat(reduced_schema) 

207 

208 return out, reduced_schema