first commit
This commit is contained in:
@@ -0,0 +1,415 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Calculate the difference between two dictionaries as:
|
||||
(1) items added
|
||||
(2) items removed
|
||||
(3) keys same in both but changed values
|
||||
(4) keys same in both and unchanged values
|
||||
|
||||
Originally posted at http://stackoverflow.com/questions/1165352/fast-comparison-between-two-python-dictionary/1165552#1165552
|
||||
Available at repository: https://github.com/hughdbrown/dictdiffer
|
||||
|
||||
Added the ability to recursively compare dictionaries
|
||||
"""
|
||||
import copy
|
||||
from collections import Mapping
|
||||
from typing import Any, Dict, List, Text, Set
|
||||
|
||||
|
||||
def diff(current_dict, past_dict):
|
||||
return DictDiffer(current_dict, past_dict)
|
||||
|
||||
|
||||
class DictDiffer:
|
||||
"""
|
||||
Calculate the difference between two dictionaries as:
|
||||
(1) items added
|
||||
(2) items removed
|
||||
(3) keys same in both but changed values
|
||||
(4) keys same in both and unchanged values
|
||||
"""
|
||||
|
||||
def __init__(self, current_dict: Dict, past_dict: Dict):
|
||||
self.current_dict, self.past_dict = current_dict, past_dict
|
||||
self.set_current, self.set_past = set(list(current_dict)), set(list(past_dict))
|
||||
self.intersect = self.set_current.intersection(self.set_past)
|
||||
|
||||
def added(self) -> Set:
|
||||
return self.set_current - self.intersect
|
||||
|
||||
def removed(self) -> Set:
|
||||
return self.set_past - self.intersect
|
||||
|
||||
def changed(self) -> Set:
|
||||
return set(
|
||||
o for o in self.intersect if self.past_dict[o] != self.current_dict[o]
|
||||
)
|
||||
|
||||
def unchanged(self) -> Set:
|
||||
return set(
|
||||
o for o in self.intersect if self.past_dict[o] == self.current_dict[o]
|
||||
)
|
||||
|
||||
|
||||
class RecursiveDictDiffer(DictDiffer):
|
||||
"""
|
||||
Calculates a recursive diff between the current_dict and the past_dict
|
||||
creating a diff in the format
|
||||
|
||||
{'new': new_value, 'old': old_value}
|
||||
|
||||
It recursively searches differences in common keys whose values are
|
||||
dictionaries creating a diff dict in the format
|
||||
|
||||
{'common_key' : {'new': new_value, 'old': old_value}
|
||||
|
||||
The class overrides all DictDiffer methods, returning lists of keys and
|
||||
subkeys using the . notation (i.e 'common_key1.common_key2.changed_key')
|
||||
|
||||
The class provides access to:
|
||||
(1) the added, removed, changes keys and subkeys (using the . notation)
|
||||
``added``, ``removed``, ``changed`` methods
|
||||
(2) the diffs in the format aboce (diff property)
|
||||
``diffs`` property
|
||||
(3) a dict with the new changed values only (new_values property)
|
||||
``new_values`` property
|
||||
(4) a dict with the old changed values only (old_values property)
|
||||
``old_values`` property
|
||||
(5) a string representation of the changes in the format:
|
||||
``changes_str`` property
|
||||
|
||||
Note:
|
||||
The <_null_> value is a reserved value
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
common_key1:
|
||||
common_key2:
|
||||
changed_key1 from '<old_str>' to '<new_str>'
|
||||
changed_key2 from '[<old_elem1>, ..]' to '[<new_elem1>, ..]'
|
||||
common_key3:
|
||||
changed_key3 from <old_int> to <new_int>
|
||||
|
||||
"""
|
||||
|
||||
NONE_VALUE = "<_null_>"
|
||||
|
||||
def __init__(self, past_dict: Dict, current_dict: Dict, ignore_missing_keys: bool):
|
||||
"""
|
||||
past_dict
|
||||
Past dictionary.
|
||||
|
||||
current_dict
|
||||
Current dictionary.
|
||||
|
||||
ignore_missing_keys
|
||||
Flag specifying whether to ignore keys that no longer exist in the
|
||||
current_dict, but exist in the past_dict. If true, the diff will
|
||||
not contain the missing keys.
|
||||
"""
|
||||
super(RecursiveDictDiffer, self).__init__(current_dict, past_dict)
|
||||
self._diffs = self._get_diffs(
|
||||
self.current_dict, self.past_dict, ignore_missing_keys
|
||||
)
|
||||
# Ignores unet values when assessing the changes
|
||||
self.ignore_unset_values = True
|
||||
|
||||
@classmethod
|
||||
def _get_diffs(cls, dict1, dict2, ignore_missing_keys) -> Dict:
|
||||
"""
|
||||
Returns a dict with the differences between dict1 and dict2
|
||||
|
||||
Notes:
|
||||
Keys that only exist in dict2 are not included in the diff if
|
||||
ignore_missing_keys is True, otherwise they are
|
||||
Simple compares are done on lists
|
||||
"""
|
||||
ret_dict = {}
|
||||
for p in dict1.keys():
|
||||
if p not in dict2:
|
||||
ret_dict.update({p: {"new": dict1[p], "old": cls.NONE_VALUE}})
|
||||
elif dict1[p] != dict2[p]:
|
||||
if isinstance(dict1[p], dict) and isinstance(dict2[p], dict):
|
||||
sub_diff_dict = cls._get_diffs(
|
||||
dict1[p], dict2[p], ignore_missing_keys
|
||||
)
|
||||
if sub_diff_dict:
|
||||
ret_dict.update({p: sub_diff_dict})
|
||||
else:
|
||||
ret_dict.update({p: {"new": dict1[p], "old": dict2[p]}})
|
||||
if not ignore_missing_keys:
|
||||
for p in dict2.keys():
|
||||
if p not in dict1.keys():
|
||||
ret_dict.update({p: {"new": cls.NONE_VALUE, "old": dict2[p]}})
|
||||
return ret_dict
|
||||
|
||||
@classmethod
|
||||
def _get_values(cls, diff_dict: Dict, type_: str = "new") -> Dict:
|
||||
"""
|
||||
Returns a dictionaries with the 'new' values in a diff dict.
|
||||
|
||||
type_
|
||||
Which values to return, 'new' or 'old'
|
||||
"""
|
||||
ret_dict = {}
|
||||
for p in diff_dict.keys():
|
||||
if type_ in diff_dict[p].keys():
|
||||
ret_dict.update({p: diff_dict[p][type_]})
|
||||
else:
|
||||
ret_dict.update({p: cls._get_values(diff_dict[p], type_=type_)})
|
||||
return ret_dict
|
||||
|
||||
@classmethod
|
||||
def _get_changes(cls, diff_dict: Dict) -> Dict:
|
||||
"""
|
||||
Returns a list of string message with the differences in a diff dict.
|
||||
|
||||
Each inner difference is tabulated two space deeper
|
||||
"""
|
||||
changes_strings = []
|
||||
for p in sorted(diff_dict.keys()):
|
||||
if sorted(diff_dict[p].keys()) == ["new", "old"]:
|
||||
# Some string formatting
|
||||
old_value = diff_dict[p]["old"]
|
||||
if diff_dict[p]["old"] == cls.NONE_VALUE:
|
||||
old_value = "nothing"
|
||||
elif isinstance(diff_dict[p]["old"], Text):
|
||||
old_value = "'{0}'".format(diff_dict[p]["old"])
|
||||
elif isinstance(diff_dict[p]["old"], list):
|
||||
old_value = "'{0}'".format(", ".join(diff_dict[p]["old"]))
|
||||
new_value = diff_dict[p]["new"]
|
||||
if diff_dict[p]["new"] == cls.NONE_VALUE:
|
||||
new_value = "nothing"
|
||||
elif isinstance(diff_dict[p]["new"], Text):
|
||||
new_value = "'{0}'".format(diff_dict[p]["new"])
|
||||
elif isinstance(diff_dict[p]["new"], list):
|
||||
new_value = "'{0}'".format(", ".join(diff_dict[p]["new"]))
|
||||
changes_strings.append(
|
||||
"{0} from {1} to {2}".format(p, old_value, new_value)
|
||||
)
|
||||
else:
|
||||
sub_changes = cls._get_changes(diff_dict[p])
|
||||
if sub_changes:
|
||||
changes_strings.append("{0}:".format(p))
|
||||
changes_strings.extend([" {0}".format(c) for c in sub_changes])
|
||||
return changes_strings
|
||||
|
||||
def added(self) -> Set[str]:
|
||||
"""
|
||||
Returns all keys that have been added.
|
||||
|
||||
If the keys are in child dictionaries they will be represented with
|
||||
. notation
|
||||
"""
|
||||
|
||||
def _added(diffs, prefix):
|
||||
keys = []
|
||||
for key in diffs.keys():
|
||||
if isinstance(diffs[key], dict) and "old" not in diffs[key]:
|
||||
keys.extend(
|
||||
_added(diffs[key], prefix="{0}{1}.".format(prefix, key))
|
||||
)
|
||||
elif diffs[key]["old"] == self.NONE_VALUE:
|
||||
if isinstance(diffs[key]["new"], dict):
|
||||
keys.extend(
|
||||
_added(
|
||||
diffs[key]["new"], prefix="{0}{1}.".format(prefix, key)
|
||||
)
|
||||
)
|
||||
else:
|
||||
keys.append("{0}{1}".format(prefix, key))
|
||||
return keys
|
||||
|
||||
return sorted(_added(self._diffs, prefix=""))
|
||||
|
||||
def removed(self) -> Set[str]:
|
||||
"""
|
||||
Returns all keys that have been removed.
|
||||
|
||||
If the keys are in child dictionaries they will be represented with
|
||||
. notation
|
||||
"""
|
||||
|
||||
def _removed(diffs, prefix):
|
||||
keys = []
|
||||
for key in diffs.keys():
|
||||
if isinstance(diffs[key], dict) and "old" not in diffs[key]:
|
||||
keys.extend(
|
||||
_removed(diffs[key], prefix="{0}{1}.".format(prefix, key))
|
||||
)
|
||||
elif diffs[key]["new"] == self.NONE_VALUE:
|
||||
keys.append("{0}{1}".format(prefix, key))
|
||||
elif isinstance(diffs[key]["new"], dict):
|
||||
keys.extend(
|
||||
_removed(
|
||||
diffs[key]["new"], prefix="{0}{1}.".format(prefix, key)
|
||||
)
|
||||
)
|
||||
return keys
|
||||
|
||||
return sorted(_removed(self._diffs, prefix=""))
|
||||
|
||||
def changed(self) -> Set[str]:
|
||||
"""
|
||||
Returns all keys that have been changed.
|
||||
|
||||
If the keys are in child dictionaries they will be represented with
|
||||
. notation
|
||||
"""
|
||||
|
||||
def _changed(diffs, prefix):
|
||||
keys = []
|
||||
for key in diffs.keys():
|
||||
if not isinstance(diffs[key], dict):
|
||||
continue
|
||||
|
||||
if isinstance(diffs[key], dict) and "old" not in diffs[key]:
|
||||
keys.extend(
|
||||
_changed(diffs[key], prefix="{0}{1}.".format(prefix, key))
|
||||
)
|
||||
continue
|
||||
if self.ignore_unset_values:
|
||||
if (
|
||||
"old" in diffs[key]
|
||||
and "new" in diffs[key]
|
||||
and diffs[key]["old"] != self.NONE_VALUE
|
||||
and diffs[key]["new"] != self.NONE_VALUE
|
||||
):
|
||||
if isinstance(diffs[key]["new"], dict):
|
||||
keys.extend(
|
||||
_changed(
|
||||
diffs[key]["new"],
|
||||
prefix="{0}{1}.".format(prefix, key),
|
||||
)
|
||||
)
|
||||
else:
|
||||
keys.append("{0}{1}".format(prefix, key))
|
||||
elif isinstance(diffs[key], dict):
|
||||
keys.extend(
|
||||
_changed(diffs[key], prefix="{0}{1}.".format(prefix, key))
|
||||
)
|
||||
else:
|
||||
if "old" in diffs[key] and "new" in diffs[key]:
|
||||
if isinstance(diffs[key]["new"], dict):
|
||||
keys.extend(
|
||||
_changed(
|
||||
diffs[key]["new"],
|
||||
prefix="{0}{1}.".format(prefix, key),
|
||||
)
|
||||
)
|
||||
else:
|
||||
keys.append("{0}{1}".format(prefix, key))
|
||||
elif isinstance(diffs[key], dict):
|
||||
keys.extend(
|
||||
_changed(diffs[key], prefix="{0}{1}.".format(prefix, key))
|
||||
)
|
||||
|
||||
return keys
|
||||
|
||||
return sorted(_changed(self._diffs, prefix=""))
|
||||
|
||||
def unchanged(self) -> Set[str]:
|
||||
"""
|
||||
Returns all keys that have been unchanged.
|
||||
|
||||
If the keys are in child dictionaries they will be represented with
|
||||
. notation
|
||||
"""
|
||||
|
||||
def _unchanged(current_dict, diffs, prefix):
|
||||
keys = []
|
||||
for key in current_dict.keys():
|
||||
if key not in diffs:
|
||||
keys.append("{0}{1}".format(prefix, key))
|
||||
elif isinstance(current_dict[key], dict):
|
||||
if "new" in diffs[key]:
|
||||
# There is a diff
|
||||
continue
|
||||
else:
|
||||
keys.extend(
|
||||
_unchanged(
|
||||
current_dict[key],
|
||||
diffs[key],
|
||||
prefix="{0}{1}.".format(prefix, key),
|
||||
)
|
||||
)
|
||||
|
||||
return keys
|
||||
|
||||
return sorted(_unchanged(self.current_dict, self._diffs, prefix=""))
|
||||
|
||||
@property
|
||||
def diffs(self) -> Dict:
|
||||
"""Returns a dict with the recursive diffs current_dict - past_dict"""
|
||||
return self._diffs
|
||||
|
||||
@property
|
||||
def new_values(self) -> Dict:
|
||||
"""Returns a dictionary with the new values"""
|
||||
return self._get_values(self._diffs, type_="new")
|
||||
|
||||
@property
|
||||
def old_values(self) -> Dict:
|
||||
"""Returns a dictionary with the old values"""
|
||||
return self._get_values(self._diffs, type_="old")
|
||||
|
||||
@property
|
||||
def changes_str(self) -> str:
|
||||
"""Returns a string describing the changes"""
|
||||
return "\n".join(self._get_changes(self._diffs))
|
||||
|
||||
|
||||
def deep_diff(old: Dict, new: Dict, ignore: List = None) -> Dict[str, Any]:
|
||||
ignore = ignore or []
|
||||
res = {}
|
||||
old = copy.deepcopy(old) or {}
|
||||
new = copy.deepcopy(new) or {}
|
||||
|
||||
stack = [(old, new, False)]
|
||||
|
||||
while len(stack) > 0:
|
||||
tmps = []
|
||||
tmp_old, tmp_new, reentrant = stack.pop()
|
||||
for key in set(list(tmp_old) + list(tmp_new)):
|
||||
if key in tmp_old and key in tmp_new and tmp_old[key] == tmp_new[key]:
|
||||
del tmp_old[key]
|
||||
del tmp_new[key]
|
||||
continue
|
||||
if not reentrant:
|
||||
if key in tmp_old and key in ignore:
|
||||
del tmp_old[key]
|
||||
if key in tmp_new and key in ignore:
|
||||
del tmp_new[key]
|
||||
if isinstance(tmp_old.get(key), Mapping) and isinstance(
|
||||
tmp_new.get(key), Mapping
|
||||
):
|
||||
tmps.append((tmp_old[key], tmp_new[key], False))
|
||||
if tmps:
|
||||
stack.extend([(tmp_old, tmp_new, True)] + tmps)
|
||||
if old:
|
||||
res["old"] = old
|
||||
if new:
|
||||
res["new"] = new
|
||||
return res
|
||||
|
||||
|
||||
def recursive_diff(
|
||||
past_dict: Dict, current_dict: Dict, ignore_missing_keys: bool = True
|
||||
) -> RecursiveDictDiffer:
|
||||
"""
|
||||
Returns a RecursiveDictDiffer object that computes the recursive diffs
|
||||
between two dictionaries
|
||||
|
||||
past_dict
|
||||
Past dictionary
|
||||
|
||||
current_dict
|
||||
Current dictionary
|
||||
|
||||
ignore_missing_keys
|
||||
Flag specifying whether to ignore keys that no longer exist in the
|
||||
current_dict, but exist in the past_dict. If true, the diff will
|
||||
not contain the missing keys.
|
||||
Default is True.
|
||||
"""
|
||||
return RecursiveDictDiffer(past_dict, current_dict, ignore_missing_keys)
|
||||
Reference in New Issue
Block a user