Multilevel JSON diff in python

26,035

Solution 1

solved it partially with following function

def diff(prev,lat):
    p=prev
    l=lat


    prevDiff = []
    latDiff = []

    for d1 in p[:]:
        flag = False
        for d2 in l:
            if len(set(d1.items()) ^ set(d2.items())) == 0:
                p.remove(d1)
                l.remove(d2)
                flag = True
                break
        if not flag:
            prevDiff.append(d1)
            p.remove(d1)

    prevDiff = prevDiff + p
    latDiff = latDiff + l

    resJSONdata=[]
    if len(prevDiff) != 0:
        resJSONdata.append({'prevCount':len(prevDiff)})
        resJSONdata.append({'prev':prevDiff})
    if len(latDiff) != 0:
        resJSONdata.append({'latestCount':len(latDiff)})
        resJSONdata.append({'latest':latDiff})

#     return json.dumps(resJSONdata,indent = 4,sort_keys=True)
    return resJSONdata

it's not doing it recursively into level into levels but for my purpose this solved the issue

Solution 2

Check out this python library jsondiff , that will help you to identify the diff's

import json

import jsondiff

json1 = json.loads(
    '{"isDynamic": false, "name": "", "value": "SID:<sid>", "description": "instance","argsOrder": 1,"isMultiSelect": false}')

json2 = json.loads(
    '{ "name": "", "value": "SID:<sid>","isDynamic": false, "description": "instance","argsOrder": 1,"isMultiSelect": false}')

res = jsondiff.diff(json1, json2)
if res:
    print("Diff found")
else:
    print("Same")

Solution 3

Yes! You can diff it with jycm which has a rendering tool out of the box.

It uses LCS, Edit distance and Kuhn–Munkres to diff arrays.

Here's an universal example with set in set and value changes in some set

from jycm.helper import make_ignore_order_func
from jycm.jycm import YouchamaJsonDiffer

left = {
    "set_in_set": [
        {
            "id": 1,
            "label": "label:1",
            "set": [
                1,
                5,
                3
            ]
        },
        {
            "id": 2,
            "label": "label:2",
            "set": [
                4,
                5,
                6
            ]
        }
    ]
}

right = {
    "set_in_set": [
        {
            "id": 2,
            "label": "label:2",
            "set": [
                6,
                5,
                4
            ]
        },
        {
            "id": 1,
            "label": "label:1111",
            "set": [
                3,
                2,
                1
            ]
        }
    ]
}

ycm = YouchamaJsonDiffer(left, right, ignore_order_func=make_ignore_order_func([
    "^set_in_set$",
    "^set_in_set->\\[\\d+\\]->set$"
]))

ycm.diff()

expected = {
    'list:add': [
        {'left': '__NON_EXIST__', 'right': 2, 'left_path': '', 'right_path': 'set_in_set->[1]->set->[1]'}
    ],
    'list:remove': [
        {'left': 5, 'right': '__NON_EXIST__', 'left_path': 'set_in_set->[0]->set->[1]', 'right_path': ''}
    ],
    'value_changes': [
        {'left': 'label:1', 'right': 'label:1111', 'left_path': 'set_in_set->[0]->label',
         'right_path': 'set_in_set->[1]->label', 'old': 'label:1', 'new': 'label:1111'}
    ]
}

assert ycm.to_dict(no_pairs=True) == expected

you can set no_pairs=False to get the all pairs. Here's a rendered example:

enter image description here

As for the example here, you can use it as:

from jycm.helper import make_ignore_order_func
from jycm.jycm import YouchamaJsonDiffer

left = {
    "data": [{"x": 1, "y": 2}, {"x": 3, "y": 4}]
}

right = {
    "data": [{"x": 3, "y": 4}, {"x": 1, "y": 2}]
}

ycm = YouchamaJsonDiffer(left, right, ignore_order_func=make_ignore_order_func([
    "^data",
]))

ycm.diff()

assert ycm.to_dict(no_pairs=True) == {}

Bonus, you the values are interrupted as coordinates on plain, you can even define a operator to determine whether two points should be matched!(Then comparing their values)

Here's the code:

from typing import Tuple

from jycm.helper import make_ignore_order_func
from jycm.jycm import YouchamaJsonDiffer
from jycm.operator import BaseOperator
import math

left = {
    "data": [
        {"x": 1, "y": 1},
        {"x": 10, "y": 10},
        {"x": 100, "y": 100}
    ]
}

right = {
    "data": [
        {"x": 150, "y": 150},
        {"x": 10, "y": 11},
        {"x": 2, "y": 3}
    ]
}


class L2DistanceOperator(BaseOperator):
    __operator_name__ = "operator:l2distance"
    __event__ = "operator:l2distance"

    def __init__(self, path_regex, distance_threshold):
        super().__init__(path_regex=path_regex)
        self.distance_threshold = distance_threshold

    def diff(self, level: 'TreeLevel', instance, drill: bool) -> Tuple[bool, float]:
        distance = math.sqrt(
            (level.left["x"] - level.right["x"]) ** 2 + (level.left["y"] - level.right["y"]) ** 2
        )
        info = {
            "distance": distance,
            "distance_threshold": self.distance_threshold,
            "pass": distance < self.distance_threshold
        }

        if not drill:
            instance.report(self.__event__, level, info)
            return False, 1 if info["pass"] else 0
        return True, 1 if info["pass"] else 0


ycm = YouchamaJsonDiffer(left, right, ignore_order_func=make_ignore_order_func([
    "^data$",
]), custom_operators=[
    L2DistanceOperator("^data->\\[.*\\]$", 10),
])

ycm.diff()

expected = {
    'just4vis:pairs': [
        {'left': 1, 'right': 2, 'left_path': 'data->[0]->x', 'right_path': 'data->[2]->x'},
        {'left': {'x': 1, 'y': 1}, 'right': {'x': 2, 'y': 3}, 'left_path': 'data->[0]',
         'right_path': 'data->[2]'},
        {'left': 1, 'right': 3, 'left_path': 'data->[0]->y', 'right_path': 'data->[2]->y'},
        {'left': {'x': 1, 'y': 1}, 'right': {'x': 2, 'y': 3}, 'left_path': 'data->[0]',
         'right_path': 'data->[2]'},
        {'left': {'x': 1, 'y': 1}, 'right': {'x': 2, 'y': 3}, 'left_path': 'data->[0]',
         'right_path': 'data->[2]'}
    ],
    'list:add': [
        {'left': '__NON_EXIST__', 'right': {'x': 150, 'y': 150}, 'left_path': '', 'right_path': 'data->[0]'}
    ],
    'list:remove': [
        {'left': {'x': 100, 'y': 100}, 'right': '__NON_EXIST__', 'left_path': 'data->[2]', 'right_path': ''}
    ],
    'operator:l2distance': [
        {'left': {'x': 1, 'y': 1}, 'right': {'x': 2, 'y': 3}, 'left_path': 'data->[0]',
         'right_path': 'data->[2]', 'distance': 2.23606797749979, 'distance_threshold': 10,
         'pass': True},
        {'left': {'x': 10, 'y': 10}, 'right': {'x': 10, 'y': 11}, 'left_path': 'data->[1]',
         'right_path': 'data->[1]', 'distance': 1.0, 'distance_threshold': 10,
         'pass': True}
    ],
    'value_changes': [
        {'left': 1, 'right': 2, 'left_path': 'data->[0]->x', 'right_path': 'data->[2]->x', 'old': 1, 'new': 2},
        {'left': 1, 'right': 3, 'left_path': 'data->[0]->y', 'right_path': 'data->[2]->y', 'old': 1, 'new': 3},
        {'left': 10, 'right': 11, 'left_path': 'data->[1]->y', 'right_path': 'data->[1]->y', 'old': 10, 'new': 11}
    ]
}
assert ycm.to_dict() == expected

As you can see jycm report addition and remove for points {'x': 150, 'y': 150} and {'x': 100, 'y': 100} for their distances are too far (more than 10) and value-change for the other two points.

P.S. RENDERER DEMO

gif-show

Share:
26,035
Admin
Author by

Admin

Updated on July 09, 2022

Comments

  • Admin
    Admin almost 2 years

    Please link me to answer if this has already been answered, my problem is i want to get diff of multilevel json which is unordered.

    x=json.loads('''[{"y":2,"x":1},{"x":3,"y":4}]''')
    y=json.loads('''[{"x":1,"y":2},{"x":3,"y":4}]''')
    z=json.loads('''[{"x":3,"y":4},{"x":1,"y":2}]''')
    
    import json_tools as jt
    import json_delta as jd
    
    
    print jt.diff(y,z)
    print jd.diff(y,z)
    print y==z
    print x==y
    

    output is

    [{'prev': 2, 'value': 4, 'replace': u'/0/y'}, {'prev': 1, 'value': 3, 'replace': u'/0/x'}, {'prev': 4, 'value': 2, 'replace': u'/1/y'}, {'prev': 3, 'value': 1, 'replace': u'/1/x'}]
    [[[2], {u'y': 2, u'x': 1}], [[0]]]
    False
    True
    

    my question is how can i get y and z to be equal or if there are actual differences depending on non-order of the JSON.

    kind of unordered List of dictionaries but i am looking for something which is level-proof that is list/dict of dictionaries of list/dictionaries ...