Thursday, June 18, 2015

Overriding Object.GetHashCode

Different objects have different memory address

namespace OverridingGetHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Number() { Value = 1 };
            var b = new Number() { Value = 1 };

            // a,b contains equals semantic values but the Equals returns false
            // cause a and b object addresses are different.
            Debug.Assert(!a.Equals(b));
        }
    }

    public class Number
    {
        public double Value { get; set; }
    }

}

Overriding Object.Equals method

namespace OverridingGetHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Number() { Value = 1 };
            var b = new Number() { Value = 1 };

            // a,b results equals cause the overridden Equals method checks
            // against semantic values of two different objects.
            Debug.Assert(a.Equals(b));
        }
    }

    public class Number
    {
        public double Value { get; set; }

        public override bool Equals(object obj)
        {
            return Value == ((Number)obj).Value;
        }
    }

}

Object.Equals may not enough for collections: use of GetHashCode

namespace OverridingGetHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var hs = new HashSet<Number>();
            var a = new Number() { Value = 1 };
            var b = new Number() { Value = 1 };

            hs.Add(a);

            // the hashset contains a, but not the semantic equals object b
            // cause the GetHashCode of b is different from a by default
            // (different instances)
            Debug.Assert(hs.Contains(a));
            Debug.Assert(!hs.Contains(b));
        }
    }

    public class Number
    {
        public double Value { get; set; }

        public override bool Equals(object obj)
        {
            return Value == ((Number)obj).Value;
        }
    }

}

With the use of GetHashCode two different object instances have a chance to be Object.Equals

namespace OverridingGetHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var hs = new HashSet<Number>();
            var a = new Number() { Value = 1 };
            var b = new Number() { Value = 1 };

            hs.Add(a);

            // hashset contains b from the semantic point of view cause equals a
            Debug.Assert(hs.Contains(a));
            Debug.Assert(hs.Contains(b));
        }
    }

    public class Number
    {
        public double Value { get; set; }

        public override bool Equals(object obj)
        {
            return Value == ((Number)obj).Value;
        }

        public override int GetHashCode()
        {
            return Value.GetHashCode();
        }
    }

}

Features and requirements to override GetHashCode method

  1. two semantic equals objects must have the same hash code number or they definitively considered different regardless the Equals result.
  2. two different semantic objects can have the same hash code number (collisions), but
  3. avoid that two different sematic object have the same hash code number, cause
  4. more collision results in more calls to the Equals method
  5. maintain minimal cpu overhead on gethashcode
  6. when composing the xor expression of fields in the hashcode include only those that are used against a simple equality check ( == ) in the Equals 

If not meet the requirements (1) then two semantic equals objects will be considered different regardless the Equals method simply cause the hash code, fast check, states the objects are definitively different.

If two different semantic objects share the same hash code (2) then an additional check using Equals will ensure with a detailed check if these two objects are the same at all. (3) This will results in a slower check cause the detailed check using Equals will span over more objects, so is best to avoid hashcode collision for really different objects.

Xor of GetHashCode members

Computing the xor value of more integers numbers is a fast way to retrieve a combination of these numbers.

When xor works

namespace OverridingGetHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var hs = new HashSet<Number>();
            var a = new Number() { Value = 1, ValueB = 2 };
            var b = new Number() { Value = 1, ValueB = 2 };

            hs.Add(a);

            // hashset contains b from the sematic point of view cause equals a
            Debug.Assert(hs.Contains(a));
            Debug.Assert(hs.Contains(b));
        }
    }

    public class Number
    {
        public double Value { get; set; }
        public double ValueB { get; set; }

        public override bool Equals(object obj)
        {
            return Value == ((Number)obj).Value && ValueB == ((Number)obj).ValueB;
        }

        public override int GetHashCode()
        {
            return Value.GetHashCode() ^ ValueB.GetHashCode();
        }
    }

}

When xor not works

namespace OverridingGetHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var hs = new HashSet<Number>();
            var a = new Number() { Value = 1, Tolerance = .1 };
            var b = new Number() { Value = 1.2, Tolerance = .5 };

            // a and b are object semantically equals
            Debug.Assert(a.Equals(b));

            hs.Add(a);
            
            Debug.Assert(hs.Contains(a));
            Debug.Assert(!hs.Contains(b)); // hash not find b in the set!
        }
    }

    public class Number
    {
        public double Value { get; set; }
        public double Tolerance { get; set; }

        public override bool Equals(object obj)
        {
            var other = (Number)obj;
            var diff = Math.Abs(Value - other.Value);
            var maxtol = Math.Max(Tolerance, other.Tolerance);

            return diff <= maxtol;
        }

        public override int GetHashCode()
        {
            return Value.GetHashCode() ^ Tolerance.GetHashCode(); // wrong
        }
    }

}

The example above shown that using xor of the field without reasoning is not a good practice: the override of GetHashCode is not an obvious task.

Inserting the xor with the Value and Tolerance fields in the GetHashCode does not comply with the first requirement "Two semantic equals objects must have the same hash code number or they definitively considered different regardless the Equals result".

In fact is easy to see that the objects

            var a = new Number() { Value = 1, Tolerance = .1 };
            var b = new Number() { Value = 1.2, Tolerance = .5 };

appears different at a simple view ( they have Value and Tolerance different ).

More it not comply with the 6) rule "when composing the xor expression of fields in the hashcode include only those that are used against a simple equality check ( == ) in the Equals", in fact the Equals contains complex operations over Value and Tolerance fields :

        public override bool Equals(object obj)
        {
            var other = (Number)obj;
            var diff = Math.Abs(Value - other.Value);
            var maxtol = Math.Max(Tolerance, other.Tolerance);

            return diff <= maxtol;
        }

A basic solution to let the HashSet works as expected would be the follow :

...

        public override int GetHashCode()
        {
            return 0;
        }
...

In other words we lost the behavior of an hashset and we are using effectively a List<Number> collection, cause to search an object the worst case is to check against all N objects in the list using the Equals method. After all, not all classes implements complex Equals methods.

Implements IEquatable<T>

When overriding Equals(Object) method is a good practice to mark the class as IEquatable<T> and implements the Equals(T) too. (see C# objects equality).


Creative Commons License
Overriding Object.GetHashCode by Lorenzo Delana is licensed under a Creative Commons Attribution 4.0 International License.

Tuesday, June 2, 2015

C# objects equality

Object Equality Schematics

Equals(Object) method override

  • override Equals(Object) method let you to check if two different object instances are the same object from their content point of view, using a.Equals(b).

Equality and inequality operators overload

  • when override Equals(Object) method is a good practice to override the equality == and inequality != operators to avoid mistake if never mind to use the Equals method.

IEquatable<T> interface implementation

  • when override Equals(Object) method is a good practice to implements the IEquatable<T> interface and call the Equals(T) method from the Equals(Object) to increment efficiency avoiding box-unboxing from Object to T type.

Collections HashSet and Dictionary

  • overriding of the Equals method in objects allow to achieve follow results for Equals objects :
    • HashSet avoid double insertion.
    • Dictionary find object key already present using Equals objects not inserted in the dictionary.

GetHashCode

  • when override Equals method for use of objects in collections :
    • must implement GetHashCode or as default it will declare two object different regardless they are equal using Equals method.
    • when two objects have the same hashcode then a second detailed check will ensure they equals using Equals method :
      • two different object can have the same hash code ( this of course decrease performance cause more detailed equals check will be done eg. key collisions ).
      • two equals object must have the same hash code or the equals method check will skipped and they are stated as different.

Sample unit test code

Without Equals(Object)

using System.Diagnostics;

namespace TestHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Test() { value = 10 };
            var b = new Test() { value = 10 };

            Debug.Assert(!a.Equals(b));
        }
    }

    public class Test
    {
        public int value { get; set; }
    }

}

With Equals(Object) and without equality inequality operators

using System.Diagnostics;

namespace TestHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Test() { value = 10 };
            var b = new Test() { value = 10 };

            Debug.Assert(!ReferenceEquals(a, b)); // different instances
            Debug.Assert(a != b); // operator not overloaded
            Debug.Assert(a.Equals(b)); // same equals object            
        }
    }

    public class Test
    {
        public int value { get; set; }
        
        public override bool Equals(object obj)
        {
            return value == ((Test)obj).value;
        }        
    }

}

With Equals(Object) and with equality inequality operators

using System.Diagnostics;

namespace TestHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Test() { value = 10 };
            var b = new Test() { value = 10 };

            Debug.Assert(!ReferenceEquals(a, b)); // different instances
            Debug.Assert(a == b); // operator overloaded            
            Debug.Assert(a.Equals(b)); // same equals object            
        }
    }

    public class Test
    {
        public int value { get; set; }
        
        public override bool Equals(object obj)
        {
            return value == ((Test)obj).value;
        }

        public static bool operator ==(Test x, Test y)
        {
            return x.Equals(y);
        }

        public static bool operator !=(Test x, Test y)
        {
            return !x.Equals(y);
        }
    }

}

With Equals(T)

Test results (in Release mode) :
  • with Equals(Object) : ~5 sec
  • with Equals(T) : ~1 sec

using System;
using System.Diagnostics;

namespace TestHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Test() { value = 10 };
            var b = new Test() { value = 10 };
            var c = 0;

            var sw = new Stopwatch();
            sw.Start();
            for (int i = 0; i < 1500000000; ++i)
            {
                if (a == b) ++c;
            }
            Console.WriteLine(sw.Elapsed);
        }
    }

    public class Test : IEquatable<Test>
    {
        public int value { get; set; }        
        
        public bool Equals(Test other)
        {
            return value == other.value;
        }
        
        public override bool Equals(object obj)
        {
            return Equals((Test)obj); // recall Equals(T)
        }              
        
        public static bool operator ==(Test x, Test y)
        {
            return x.Equals(y);
        }

        public static bool operator !=(Test x, Test y)
        {
            return !x.Equals(y);
        }
    }

}

Without GetHashCode

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace TestHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Test() { value = 10 };
            var b = new Test() { value = 10 };

            var hs = new HashSet<Test>();
            hs.Add(a);
            // element b will added cause GetHashCode returns different values
            // for different objects as default
hs.Add(b); 
            
            Debug.Assert(hs.Count == 2);

            var dict = new Dictionary<Test, int>();
            dict.Add(a, a.value);
            Debug.Assert(!dict.ContainsKey(b));            
        }
    }

    public class Test : IEquatable<Test>
    {
        public int value { get; set; }        
        
        public bool Equals(Test other)
        {
            return value == other.value;
        }
        
        public override bool Equals(object obj)
        {
            return Equals((Test)obj); // recall Equals(T)
        }              
        
        public static bool operator ==(Test x, Test y)
        {
            return x.Equals(y);
        }

        public static bool operator !=(Test x, Test y)
        {
            return !x.Equals(y);
        }
    }

}

With GetHashCode

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace TestHashCode
{
    class Program
    {
        static void Main(string[] args)
        {
            var a = new Test() { value = 10 };
            var b = new Test() { value = 10 };

            var hs = new HashSet<Test>();
            hs.Add(a);
            // element b will not added despite its different instance
hs.Add(b); Debug.Assert(hs.Count == 1); var dict = new Dictionary<Test, int>(); dict.Add(a, a.value); Debug.Assert(dict.ContainsKey(b)); } } public class Test : IEquatable<Test> { public int value { get; set; } public bool Equals(Test other) { return value == other.value; } public override bool Equals(object obj) { return Equals((Test)obj); // recall Equals(T) } public static bool operator ==(Test x, Test y) { return x.Equals(y); } public static bool operator !=(Test x, Test y) { return !x.Equals(y); } public override int GetHashCode() { return value; } } }

Reference material


Creative Commons License
C# Objects Equality by Lorenzo Delana is licensed under a Creative Commons Attribution 4.0 International License.