@@ -432,3 +432,281 @@ def default(self, obj):
432432 if isinstance (obj , Missing ):
433433 return None
434434 return json .JSONEncoder .default (self , obj )
435+
436+
437+ def check_dict (data_dict , select_dict , parent_path = ()):
438+ """
439+ return list of key tuples from select_dict whose values don't match
440+ corresponding values in data_dict.
441+ """
442+ if not isinstance (data_dict , dict ):
443+ return [parent_path ]
444+
445+ unmatched = []
446+ for k , v in sorted (select_dict .items ()):
447+ if k not in data_dict :
448+ unmatched .append (parent_path + (k ,))
449+
450+ elif isinstance (v , dict ):
451+ unmatched .extend (check_dict (data_dict [k ], v , parent_path + (k ,)))
452+
453+ elif isinstance (v , list ):
454+ unmatched .extend (check_list (data_dict [k ], v , parent_path + (k ,)))
455+
456+ elif data_dict [k ] != v :
457+ unmatched .append (parent_path + (k ,))
458+
459+ return unmatched
460+
461+
462+ def check_list (data_list , select_list , parent_path = ()):
463+ """
464+ return list of key tuples from select_list whose values don't match
465+ corresponding values in data_list.
466+ """
467+ if not isinstance (data_list , list ):
468+ return [parent_path ]
469+
470+ unmatched = []
471+ for i , v in enumerate (select_list ):
472+ if i >= len (data_list ):
473+ unmatched .append (parent_path + (i ,))
474+
475+ elif isinstance (v , dict ):
476+ unmatched .extend (check_dict (data_list [i ], v , parent_path + (i ,)))
477+
478+ elif isinstance (v , list ):
479+ unmatched .extend (check_list (data_list [i ], v , parent_path + (i ,)))
480+
481+ elif data_list [i ] != v :
482+ unmatched .append (parent_path + (i ,))
483+
484+ return unmatched
485+
486+
487+ def resolve_string_key (data , string_key ):
488+ """
489+ return (child, parent_path) if string_key is found in data
490+ raise DataError on incompatible types or key not found.
491+
492+ supports partial-id keys for lists of dicts (minimum 5 hex digits)
493+ e.g. `resources__1492a` would select the first matching resource
494+ with an id field matching "1492a..."
495+ """
496+ parent_path = []
497+ current = data
498+ for k in string_key .split ('__' ):
499+ if isinstance (current , dict ):
500+ if k not in current :
501+ raise DataError ('Unmatched key %s' % '__' .join (
502+ str (p ) for p in parent_path + [k ]))
503+ parent_path .append (k )
504+ current = current [k ]
505+ continue
506+
507+ if not isinstance (current , list ):
508+ raise DataError ('Unmatched key %s' % '__' .join (
509+ str (p ) for p in parent_path + [k ]))
510+
511+ if len (k ) >= 5 :
512+ for i , rec in enumerate (current ):
513+ if not isinstance (rec , dict ) or 'id' not in rec :
514+ raise DataError ('Unmatched key %s' % '__' .join (
515+ str (p ) for p in parent_path + [k ]))
516+ if rec ['id' ].startswith (k ):
517+ parent_path .append (i )
518+ current = rec
519+ break
520+ else :
521+ raise DataError ('Unmatched key %s' % '__' .join (
522+ str (p ) for p in parent_path + [k ]))
523+ continue
524+
525+ try :
526+ index = int (k )
527+ if index < 0 or index >= len (current ):
528+ raise ValueError
529+ except ValueError :
530+ raise DataError ('Unmatched key %s' % '__' .join (
531+ str (p ) for p in parent_path + [k ]))
532+
533+ parent_path .append (index )
534+ current = current [index ]
535+
536+ return current , tuple (parent_path )
537+
538+
539+ def check_string_key (data_dict , string_key , value ):
540+ """
541+ return list of key tuples from string_key whose values don't match
542+ corresponding values in data_dict.
543+
544+ raise DataError on incompatible types such as checking for dict values
545+ in a list value.
546+ """
547+ current , parent_path = resolve_string_key (data_dict , string_key )
548+ if isinstance (value , dict ):
549+ return check_dict (current , value , parent_path )
550+ if isinstance (value , list ):
551+ return check_list (current , value , parent_path )
552+ if current != value :
553+ return [parent_path ]
554+ return []
555+
556+
557+ def filter_glob_match (data_dict , glob_patterns ):
558+ """
559+ remove keys and values from data_dict in-place based on glob patterns.
560+
561+ glob patterns are string_keys with optional '*' keys matching everything
562+ at that level. a '+' prefix on the glob pattern indicates values to
563+ protect from deletion, where the first matching pattern "wins".
564+ """
565+ return _filter_glob_match (data_dict , [
566+ (p .startswith ('+' ), p .lstrip ('-+' ).split ('__' ))
567+ for p in glob_patterns ])
568+
569+
570+ def _filter_glob_match (data , parsed_globs ):
571+ if isinstance (data , dict ):
572+ protected = {}
573+ children = {}
574+ for keep , globs in parsed_globs :
575+ head = globs [0 ]
576+ if head == '*' :
577+ if keep :
578+ protected .update (data )
579+ else :
580+ data .clear ()
581+ continue
582+ if head not in data :
583+ continue
584+
585+ if len (globs ) > 1 :
586+ children .setdefault (head , []).append ((keep , globs [1 :]))
587+ elif keep :
588+ protected [head ] = data [head ]
589+ else :
590+ del data [head ]
591+ data .update (protected )
592+
593+ for head in children :
594+ if head not in data :
595+ continue
596+ _filter_glob_match (data [head ], children [head ])
597+
598+ return
599+
600+ elif not isinstance (data , list ):
601+ return
602+
603+ protected = set ()
604+ removed = set ()
605+ children = {}
606+ for keep , globs in parsed_globs :
607+ head = globs [0 ]
608+ if head == '*' :
609+ if keep :
610+ protected .update (set (range (len (data ))) - removed )
611+ else :
612+ removed .update (set (range (len (data ))) - protected )
613+ continue
614+ try :
615+ child , (index ,) = resolve_string_key (data , head )
616+ except DataError :
617+ continue
618+
619+ if len (globs ) > 1 :
620+ children .setdefault (index , []).append ((keep , globs [1 :]))
621+ elif keep :
622+ if index not in removed :
623+ protected .add (index )
624+ else :
625+ if index not in protected :
626+ removed .add (index )
627+
628+ for head in children :
629+ if head not in removed - protected :
630+ _filter_glob_match (data [head ], children [head ])
631+
632+ data [:] = [e for i , e in enumerate (data ) if i not in removed - protected ]
633+
634+
635+ def update_merge_dict (data_dict , update_dict , parent_path = ()):
636+ """
637+ update data_dict keys and values in-place based on update_dict.
638+
639+ raise DataError on incompatible types such as replacing a dict with a list
640+ """
641+ if not isinstance (update_dict , dict ):
642+ raise DataError ('Expected dict for %s' % '__' .join (
643+ str (p ) for p in parent_path ))
644+
645+ for k , v in update_dict .items ():
646+ if k not in data_dict :
647+ data_dict [k ] = v
648+ elif isinstance (data_dict [k ], dict ):
649+ update_merge_dict (data_dict [k ], v , parent_path + (k ,))
650+ elif isinstance (data_dict [k ], list ):
651+ update_merge_list (data_dict [k ], v , parent_path + (k ,))
652+ else :
653+ data_dict [k ] = v
654+
655+
656+ def update_merge_list (data_list , update_list , parent_path = ()):
657+ """
658+ update data_list entries in-place based on update_list.
659+
660+ raise DataError on incompatible types such as replacing a dict with a list
661+ """
662+ if not isinstance (update_list , list ):
663+ raise DataError ('Expected list for %s' % '__' .join (
664+ str (p ) for p in parent_path ))
665+
666+ for i , v in enumerate (update_list ):
667+ if i >= len (data_list ):
668+ data_list .append (v )
669+ elif isinstance (data_list [i ], dict ):
670+ update_merge_dict (data_list [i ], v , parent_path + (i ,))
671+ elif isinstance (data_list [i ], list ):
672+ update_merge_list (data_list [i ], v , parent_path + (i ,))
673+ else :
674+ data_list [i ] = v
675+
676+
677+ def update_merge_string_key (data_dict , string_key , value ):
678+ """
679+ update data_dict entries in-place based on string_key and value.
680+ Also supports extending existing lists with `__extend` suffix.
681+
682+ raise DataError on incompatible types such as replacing a dict with a list
683+ """
684+
685+ parts = string_key .split ('__' )
686+ k = parts [- 1 ]
687+ string_key = '__' .join (parts [:- 1 ])
688+
689+ if string_key :
690+ current , parent_path = resolve_string_key (data_dict , string_key )
691+ else :
692+ current = data_dict
693+ parent_path = ()
694+
695+ if isinstance (current , dict ):
696+ update_merge_dict (current , {k : value }, parent_path )
697+ elif isinstance (current , list ):
698+ if k == 'extend' :
699+ if not isinstance (value , list ):
700+ raise DataError ('Expected list for %s' % string_key )
701+ current .extend (value )
702+ return
703+
704+ child , (index ,) = resolve_string_key (current , k )
705+ if isinstance (child , dict ):
706+ update_merge_dict (child , value , parent_path + (index ,))
707+ elif isinstance (child , list ):
708+ update_merge_list (child , value , parent_path + (index ,))
709+ else :
710+ current [index ] = value
711+ else :
712+ raise DataError ('Expected list or dict for %s' % string_key )
0 commit comments