diff --git a/jsf/parser.py b/jsf/parser.py index 1514623..fcd4ff8 100644 --- a/jsf/parser.py +++ b/jsf/parser.py @@ -53,6 +53,7 @@ def __init__( ), initial_state: Dict[str, Any] = MappingProxyType({}), allow_none_optionals: confloat(ge=0.0, le=1.0) = 0.5, + max_recursive_depth: int = 10, ): """Initializes the JSF generator with the provided schema and configuration options. @@ -62,16 +63,19 @@ def __init__( context (Dict[str, Any], optional): A dictionary that provides additional utilities for handling the schema, such as a faker for generating fake data, a random number generator, and datetime utilities. It also includes an internal dictionary for handling List, Union, and Tuple types. Defaults to a dictionary with "faker", "random", "datetime", and "__internal__" keys. initial_state (Dict[str, Any], optional): A dictionary that represents the initial state of the parser. If you wish to extend the state so it can be accesses by your schema you can add any references in here. Defaults to an empty dictionary. allow_none_optionals (confloat, optional): A parameter that determines the probability of optional fields being set to None. Defaults to 0.5. + max_recursive_depth (int, optional): A parameter that determines the maximum depth when generating a recursive schema. Defaults to 10. """ self.root_schema = schema self.definitions = {} self.base_state = { "__counter__": count(start=1), "__all_json_paths__": [], + "__depth__": 0, **initial_state, } self.base_context = context self.allow_none_optionals = allow_none_optionals + self.max_recursive_depth = max_recursive_depth self.root = None self._parse(schema) @@ -89,6 +93,7 @@ def from_json( ), initial_state: Dict[str, Any] = MappingProxyType({}), allow_none_optionals: confloat(ge=0.0, le=1.0) = 0.5, + max_recursive_depth: int = 10, ) -> "JSF": """Initializes the JSF generator with the provided schema at the given path and configuration options. @@ -98,9 +103,12 @@ def from_json( context (Dict[str, Any], optional): A dictionary that provides additional utilities for handling the schema, such as a faker for generating fake data, a random number generator, and datetime utilities. It also includes an internal dictionary for handling List, Union, and Tuple types. Defaults to a dictionary with "faker", "random", "datetime", and "__internal__" keys. initial_state (Dict[str, Any], optional): A dictionary that represents the initial state of the parser. If you wish to extend the state so it can be accesses by your schema you can add any references in here. Defaults to an empty dictionary. allow_none_optionals (confloat, optional): A parameter that determines the probability of optional fields being set to None. Defaults to 0.5. + max_recursive_depth (int, optional): A parameter that determines the maximum depth when generating a recursive schema. Defaults to 10. """ with open(path) as f: - return JSF(json.load(f), context, initial_state, allow_none_optionals) + return JSF( + json.load(f), context, initial_state, allow_none_optionals, max_recursive_depth + ) def __parse_primitive(self, name: str, path: str, schema: Dict[str, Any]) -> PrimitiveTypes: item_type, is_nullable = self.__is_field_nullable(schema) @@ -111,11 +119,14 @@ def __parse_primitive(self, name: str, path: str, schema: Dict[str, Any]) -> Pri "path": path, "is_nullable": is_nullable, "allow_none_optionals": self.allow_none_optionals, + "max_recursive_depth": self.max_recursive_depth, **schema, } ) - def __parse_object(self, name: str, path: str, schema: Dict[str, Any]) -> Object: + def __parse_object( + self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None + ) -> Object: _, is_nullable = self.__is_field_nullable(schema) model = Object.from_dict( { @@ -123,23 +134,29 @@ def __parse_object(self, name: str, path: str, schema: Dict[str, Any]) -> Object "path": path, "is_nullable": is_nullable, "allow_none_optionals": self.allow_none_optionals, + "max_recursive_depth": self.max_recursive_depth, **schema, } ) + root = model if root is None else root props = [] for _name, definition in schema.get("properties", {}).items(): - props.append(self.__parse_definition(_name, path=f"{path}/{_name}", schema=definition)) + props.append( + self.__parse_definition(_name, path=f"{path}/{_name}", schema=definition, root=root) + ) model.properties = props pattern_props = [] for _name, definition in schema.get("patternProperties", {}).items(): pattern_props.append( - self.__parse_definition(_name, path=f"{path}/{_name}", schema=definition) + self.__parse_definition(_name, path=f"{path}/{_name}", schema=definition, root=root) ) model.patternProperties = pattern_props return model - def __parse_array(self, name: str, path: str, schema: Dict[str, Any]) -> Array: + def __parse_array( + self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None + ) -> Array: _, is_nullable = self.__is_field_nullable(schema) arr = Array.from_dict( { @@ -147,13 +164,17 @@ def __parse_array(self, name: str, path: str, schema: Dict[str, Any]) -> Array: "path": path, "is_nullable": is_nullable, "allow_none_optionals": self.allow_none_optionals, + "max_recursive_depth": self.max_recursive_depth, **schema, } ) - arr.items = self.__parse_definition(name, name, schema["items"]) + root = arr if root is None else root + arr.items = self.__parse_definition(name, f"{path}/items", schema["items"], root=root) return arr - def __parse_tuple(self, name: str, path: str, schema: Dict[str, Any]) -> JSFTuple: + def __parse_tuple( + self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None + ) -> JSFTuple: _, is_nullable = self.__is_field_nullable(schema) arr = JSFTuple.from_dict( { @@ -161,12 +182,16 @@ def __parse_tuple(self, name: str, path: str, schema: Dict[str, Any]) -> JSFTupl "path": path, "is_nullable": is_nullable, "allow_none_optionals": self.allow_none_optionals, + "max_recursive_depth": self.max_recursive_depth, **schema, } ) + root = arr if root is None else root arr.items = [] for i, item in enumerate(schema["items"]): - arr.items.append(self.__parse_definition(name, path=f"{name}[{i}]", schema=item)) + arr.items.append( + self.__parse_definition(name, path=f"{path}/{name}[{i}]", schema=item, root=root) + ) return arr def __is_field_nullable(self, schema: Dict[str, Any]) -> Tuple[str, bool]: @@ -181,40 +206,55 @@ def __is_field_nullable(self, schema: Dict[str, Any]) -> Tuple[str, bool]: return random.choice(item_type_deep_copy), False return item_type, False - def __parse_anyOf(self, name: str, path: str, schema: Dict[str, Any]) -> AnyOf: + def __parse_anyOf( + self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None + ) -> AnyOf: + model = AnyOf(name=name, path=path, max_recursive_depth=self.max_recursive_depth, **schema) + root = model if root is None else root schemas = [] for d in schema["anyOf"]: - schemas.append(self.__parse_definition(name, path, d)) - return AnyOf(name=name, path=path, schemas=schemas, **schema) + schemas.append(self.__parse_definition(name, path, d, root=root)) + model.schemas = schemas + return model - def __parse_allOf(self, name: str, path: str, schema: Dict[str, Any]) -> AllOf: + def __parse_allOf( + self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None + ) -> AllOf: combined_schema = dict(ChainMap(*schema["allOf"])) - return AllOf( - name=name, - path=path, - combined_schema=self.__parse_definition(name, path, combined_schema), - **schema, - ) + model = AllOf(name=name, path=path, max_recursive_depth=self.max_recursive_depth, **schema) + root = model if root is None else root + model.combined_schema = self.__parse_definition(name, path, combined_schema, root=root) + return model - def __parse_oneOf(self, name: str, path: str, schema: Dict[str, Any]) -> OneOf: + def __parse_oneOf( + self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None + ) -> OneOf: + model = OneOf(name=name, path=path, max_recursive_depth=self.max_recursive_depth, **schema) + root = model if root is None else root schemas = [] for d in schema["oneOf"]: - schemas.append(self.__parse_definition(name, path, d)) - return OneOf(name=name, path=path, schemas=schemas, **schema) + schemas.append(self.__parse_definition(name, path, d, root=root)) + model.schemas = schemas + return model - def __parse_named_definition(self, def_name: str) -> AllTypes: + def __parse_named_definition(self, path: str, def_name: str, root) -> AllTypes: schema = self.root_schema parsed_definition = None for def_tag in ("definitions", "$defs"): - for name, definition in schema.get(def_tag, {}).items(): - if name == def_name: - parsed_definition = self.__parse_definition( - name, path=f"#/{def_tag}", schema=definition - ) - self.definitions[f"#/{def_tag}/{name}"] = parsed_definition + if path.startswith(f"#/{def_tag}/{def_name}"): + root.is_recursive = True + return root + definition = schema.get(def_tag, {}).get(def_name) + if definition is not None: + parsed_definition = self.__parse_definition( + def_name, path=f"{path}/#/{def_tag}/{def_name}", schema=definition, root=root + ) + self.definitions[f"#/{def_tag}/{def_name}"] = parsed_definition return parsed_definition - def __parse_definition(self, name: str, path: str, schema: Dict[str, Any]) -> AllTypes: + def __parse_definition( + self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None + ) -> AllTypes: self.base_state["__all_json_paths__"].append(path) item_type, is_nullable = self.__is_field_nullable(schema) if "const" in schema: @@ -232,25 +272,26 @@ def __parse_definition(self, name: str, path: str, schema: Dict[str, Any]) -> Al "path": path, "is_nullable": is_nullable, "allow_none_optionals": self.allow_none_optionals, + "max_recursive_depth": self.max_recursive_depth, **schema, } ) elif "type" in schema: if item_type == "object" and "properties" in schema: - return self.__parse_object(name, path, schema) + return self.__parse_object(name, path, schema, root) elif item_type == "object" and "anyOf" in schema: - return self.__parse_anyOf(name, path, schema) + return self.__parse_anyOf(name, path, schema, root) elif item_type == "object" and "allOf" in schema: - return self.__parse_allOf(name, path, schema) + return self.__parse_allOf(name, path, schema, root) elif item_type == "object" and "oneOf" in schema: - return self.__parse_oneOf(name, path, schema) + return self.__parse_oneOf(name, path, schema, root) elif item_type == "array": if (schema.get("contains") is not None) or isinstance(schema.get("items"), dict): - return self.__parse_array(name, path, schema) + return self.__parse_array(name, path, schema, root) if isinstance(schema.get("items"), list) and all( isinstance(x, dict) for x in schema.get("items", []) ): - return self.__parse_tuple(name, path, schema) + return self.__parse_tuple(name, path, schema, root) else: return self.__parse_primitive(name, path, schema) elif "$ref" in schema: @@ -261,20 +302,23 @@ def __parse_definition(self, name: str, path: str, schema: Dict[str, Any]) -> Al else: # parse referenced definition ref_name = frag.split("/")[-1] - cls = self.__parse_named_definition(ref_name) + cls = self.__parse_named_definition(path, ref_name, root) else: with s_open(ext, "r") as f: external_jsf = JSF(json.load(f)) cls = deepcopy(external_jsf.definitions.get(f"#{frag}")) - cls.name = name - cls.path = path + if path != "#" and cls == root: + cls.name = name + elif path != "#": + cls.name = name + cls.path = path return cls elif "anyOf" in schema: - return self.__parse_anyOf(name, path, schema) + return self.__parse_anyOf(name, path, schema, root) elif "allOf" in schema: - return self.__parse_allOf(name, path, schema) + return self.__parse_allOf(name, path, schema, root) elif "oneOf" in schema: - return self.__parse_oneOf(name, path, schema) + return self.__parse_oneOf(name, path, schema, root) else: raise ValueError(f"Cannot parse schema {repr(schema)}") # pragma: no cover @@ -282,7 +326,9 @@ def _parse(self, schema: Dict[str, Any]) -> AllTypes: for def_tag in ("definitions", "$defs"): for name, definition in schema.get(def_tag, {}).items(): if f"#/{def_tag}/{name}" not in self.definitions: - item = self.__parse_definition(name, path=f"#/{def_tag}", schema=definition) + item = self.__parse_definition( + name, path=f"#/{def_tag}/{name}", schema=definition + ) self.definitions[f"#/{def_tag}/{name}"] = item self.root = self.__parse_definition(name="root", path="#", schema=schema) diff --git a/jsf/schema_types/_tuple.py b/jsf/schema_types/_tuple.py index 27fc643..57998b4 100644 --- a/jsf/schema_types/_tuple.py +++ b/jsf/schema_types/_tuple.py @@ -24,7 +24,12 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Tuple]]: try: return super().generate(context) except ProviderNotSetException: - return tuple(item.generate(context) for item in self.items) + depth = context["state"]["__depth__"] + output = [] + for item in self.items: + output.append(item.generate(context)) + context["state"]["__depth__"] = depth + return tuple(output) def model(self, context: Dict[str, Any]) -> Tuple[Type, Any]: _type = eval( diff --git a/jsf/schema_types/anyof.py b/jsf/schema_types/anyof.py index 34043af..9790e5c 100644 --- a/jsf/schema_types/anyof.py +++ b/jsf/schema_types/anyof.py @@ -15,7 +15,10 @@ def generate(self, context: Dict[str, Any]) -> Optional[Any]: try: return super().generate(context) except ProviderNotSetException: - return random.choice(self.schemas).generate(context) + filtered_schemas = [] + if context["state"]["__depth__"] > self.max_recursive_depth: + filtered_schemas = [schema for schema in self.schemas if not schema.is_recursive] + return random.choice(filtered_schemas or self.schemas).generate(context) def model(self, context: Dict[str, Any]) -> None: pass diff --git a/jsf/schema_types/array.py b/jsf/schema_types/array.py index 9e18f19..8686c52 100644 --- a/jsf/schema_types/array.py +++ b/jsf/schema_types/array.py @@ -27,19 +27,22 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]: elif isinstance(self.fixed, int): self.minItems = self.maxItems = self.fixed - output = [ - self.items.generate(context) - for _ in range(random.randint(int(self.minItems), int(self.maxItems))) - ] + depth = context["state"]["__depth__"] + output = [] + for _ in range(random.randint(int(self.minItems), int(self.maxItems))): + output.append(self.items.generate(context)) + context["state"]["__depth__"] = depth if self.uniqueItems and self.items.type == "object": output = [dict(s) for s in {frozenset(d.items()) for d in output}] while len(output) < self.minItems: output.append(self.items.generate(context)) output = [dict(s) for s in {frozenset(d.items()) for d in output}] + context["state"]["__depth__"] = depth elif self.uniqueItems: output = set(output) while len(output) < self.minItems: output.add(self.items.generate(context)) + context["state"]["__depth__"] = depth output = list(output) return output diff --git a/jsf/schema_types/base.py b/jsf/schema_types/base.py index 9e1d09e..85e5976 100644 --- a/jsf/schema_types/base.py +++ b/jsf/schema_types/base.py @@ -34,17 +34,25 @@ class BaseSchema(BaseModel): provider: Optional[str] = Field(None, alias="$provider") set_state: Optional[Dict[str, str]] = Field(None, alias="$state") is_nullable: bool = False + is_recursive: bool = False allow_none_optionals: float = Field(0.5, ge=0.0, le=1.0) + max_recursive_depth: int = 10 @classmethod def from_dict(cls, d: Dict[str, Any]) -> Self: raise NotImplementedError # pragma: no cover def generate(self, context: Dict[str, Any]) -> Any: + if self.is_recursive: + context["state"]["__depth__"] += 1 + if self.set_state is not None: context["state"][self.path] = {k: eval(v, context)() for k, v in self.set_state.items()} - if self.is_nullable and random.uniform(0, 1) < self.allow_none_optionals: + if self.is_nullable and ( + random.uniform(0, 1) < self.allow_none_optionals + or context["state"]["__depth__"] > self.max_recursive_depth + ): return None if self.provider is not None: return eval(self.provider, context)() diff --git a/jsf/schema_types/object.py b/jsf/schema_types/object.py index a77a988..7e3fcf6 100644 --- a/jsf/schema_types/object.py +++ b/jsf/schema_types/object.py @@ -32,23 +32,28 @@ class Object(BaseSchema): def from_dict(cls, d: Dict[str, Any]) -> "Object": return Object(**d) - def should_keep(self, property_name: str) -> bool: + def should_keep(self, property_name: str, context: Dict[str, Any]) -> bool: if isinstance(self.required, list) and property_name in self.required: return True - return random.uniform(0, 1) > self.allow_none_optionals + return ( + random.uniform(0, 1) > self.allow_none_optionals + and context["state"]["__depth__"] <= self.max_recursive_depth + ) def generate(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]: try: return super().generate(context) except ProviderNotSetException: explicit_properties = { - o.name: o.generate(context) for o in self.properties if self.should_keep(o.name) + o.name: o.generate(context) + for o in self.properties + if self.should_keep(o.name, context) } pattern_props = {} if self.patternProperties: for o in self.patternProperties: for _ in range(random.randint(0, 10)): - if self.should_keep(o.name): + if self.should_keep(o.name, context): pattern_props[rstr.xeger(o.name)] = o.generate(context) return {**pattern_props, **explicit_properties} diff --git a/jsf/schema_types/oneof.py b/jsf/schema_types/oneof.py index 7fea1af..3034a31 100644 --- a/jsf/schema_types/oneof.py +++ b/jsf/schema_types/oneof.py @@ -15,7 +15,10 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]: try: return super().generate(context) except ProviderNotSetException: - return random.choice(self.schemas).generate(context) + filtered_schemas = [] + if context["state"]["__depth__"] > self.max_recursive_depth: + filtered_schemas = [schema for schema in self.schemas if not schema.is_recursive] + return random.choice(filtered_schemas or self.schemas).generate(context) def model(self, context: Dict[str, Any]) -> None: pass diff --git a/jsf/tests/data/complex_recursive.json b/jsf/tests/data/complex_recursive.json new file mode 100644 index 0000000..fc4782c --- /dev/null +++ b/jsf/tests/data/complex_recursive.json @@ -0,0 +1,45 @@ +{ + "$ref": "#/definitions/tree", + "definitions": { + "tree": { + "anyOf": [ + { + "$ref": "#/definitions/node" + }, + { + "type": "string" + } + ] + }, + "node": { + "type": "object", + "allOf": [ + { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "value": { + "$ref": "#/definitions/tree" + } + }, + "required": [ + "value" + ] + }, + { + "type": "object", + "properties": { + "value": { + "type": "string" + } + }, + "required": ["value"] + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/jsf/tests/data/object_recursive.json b/jsf/tests/data/object_recursive.json new file mode 100644 index 0000000..f711c65 --- /dev/null +++ b/jsf/tests/data/object_recursive.json @@ -0,0 +1,34 @@ +{ + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "tree": { + "$ref": "#/definitions/tree" + } + }, + "required": [ + "id" + ], + "definitions": { + "tree": { + "type": "object", + "properties": { + "value": { + "type": "string" + }, + "branches": { + "type": "array", + "items": { + "$ref": "#/definitions/tree" + }, + "minItems": 1 + } + }, + "required": [ + "value" + ] + } + } +} \ No newline at end of file diff --git a/jsf/tests/data/oneof_recursive.json b/jsf/tests/data/oneof_recursive.json new file mode 100644 index 0000000..07f65cd --- /dev/null +++ b/jsf/tests/data/oneof_recursive.json @@ -0,0 +1,18 @@ +{ + "$ref": "#/definitions/tree", + "definitions": { + "tree": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/definitions/tree" + }, + { + "type": "integer" + } + ] + } + } + } +} \ No newline at end of file diff --git a/jsf/tests/test_default_fake.py b/jsf/tests/test_default_fake.py index 89c203f..fcfd6d5 100644 --- a/jsf/tests/test_default_fake.py +++ b/jsf/tests/test_default_fake.py @@ -457,3 +457,44 @@ def test_non_required_are_not_none(TestData): assert fake_data["name"] is not None assert fake_data["credit_card"] is not None + + +def test_fake_object_recursive(TestData): + with open(TestData / "object_recursive.json") as file: + schema = json.load(file) + p = JSF(schema, allow_none_optionals=0.0, max_recursive_depth=2) + + fake_data = [p.generate() for _ in range(5)] + for d in fake_data: + assert isinstance(d, dict) + assert "tree" in d and "id" in d + assert "branches" in d["tree"] and "value" in d["tree"] + for subtree in d["tree"]["branches"]: + assert isinstance(subtree, dict) + assert "branches" in subtree and "value" in subtree + for leave in subtree["branches"]: + assert "branches" not in leave and "value" in leave + + +def test_fake_oneof_recursive(TestData): + with open(TestData / "oneof_recursive.json") as file: + schema = json.load(file) + p = JSF(schema, max_recursive_depth=2) + + fake_data = [p.generate() for _ in range(10)] + for d in fake_data: + assert isinstance(d, list) + for item in d: + assert isinstance(item, int) or isinstance(item, list) + + +def test_fake_complex_recursive(TestData): + with open(TestData / "complex_recursive.json") as file: + schema = json.load(file) + p = JSF(schema, max_recursive_depth=2) + + fake_data = [p.generate() for _ in range(10)] + for d in fake_data: + assert isinstance(d, str) or isinstance(d, dict) + if isinstance(d, dict): + assert "value" in d