From: <pa...@us...> - 2011-06-27 00:59:54
|
Revision: 5969 http://nhibernate.svn.sourceforge.net/nhibernate/?rev=5969&view=rev Author: patearl Date: 2011-06-27 00:59:46 +0000 (Mon, 27 Jun 2011) Log Message: ----------- Linq: Improved join detection in linq where clauses. Uses an easier to understand algorithm that corrects many cases. Also added some related tests. Modified Paths: -------------- trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetector.cs trunk/nhibernate/src/NHibernate/NHibernate.csproj trunk/nhibernate/src/NHibernate.Test/NHibernate.Test.csproj Added Paths: ----------- trunk/nhibernate/src/NHibernate/Linq/Visitors/PossibleValueSet.cs trunk/nhibernate/src/NHibernate.Test/NHSpecificTest/NH2583/InnerJoinFixture.cs Removed Paths: ------------- trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetectorDesignNotes.txt Added: trunk/nhibernate/src/NHibernate/Linq/Visitors/PossibleValueSet.cs =================================================================== --- trunk/nhibernate/src/NHibernate/Linq/Visitors/PossibleValueSet.cs (rev 0) +++ trunk/nhibernate/src/NHibernate/Linq/Visitors/PossibleValueSet.cs 2011-06-27 00:59:46 UTC (rev 5969) @@ -0,0 +1,370 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace NHibernate.Linq.Visitors +{ + /// <summary> + /// Represents a possible set of values for a computation. For example, an expression may + /// be null, it may be a non-null value, or we may even have a constant value that is known + /// precisely. This class contains operators that know how to combine these values with + /// each other. This class is intended to be used to provide static analysis of expressions + /// before we hit the database. As an example for future improvement, we could handle + /// ranges of numeric values. We can also improve this by handling operators such as the + /// comparison operators and arithmetic operators. They are currently handled by naive + /// null checks. + /// </summary> + public class PossibleValueSet + { + private System.Type ExpressionType { get; set; } + private bool ContainsNull { get; set; } + private bool ContainsAllNonNullValues { get; set; } + private List<object> DistinctValues + { + get + { + if (_DistinctValues == null) + _DistinctValues = new List<object>(); + return _DistinctValues; + } + } + private List<object> _DistinctValues; + + private bool ContainsAnyNonNullValues + { + get { return ContainsAllNonNullValues || DistinctValues.Any(); } + } + + private PossibleValueSet(System.Type expressionType) + { + ExpressionType = expressionType; + } + + public bool Contains(object obj) + { + if (obj == null) + return ContainsNull; + + if (obj.GetType() != ExpressionType) + return false; + + if (ContainsAllNonNullValues) + return true; + + return DistinctValues.Contains(obj); + } + + #region Operations + + public PossibleValueSet Add(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet Divide(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet Modulo(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet Multiply(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet Power(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet Subtract(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet And(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet Or(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet ExclusiveOr(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet LeftShift(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + public PossibleValueSet RightShift(PossibleValueSet pvs, System.Type resultType) + { + return MathOperation(pvs, resultType); + } + + private PossibleValueSet MathOperation(PossibleValueSet pvs, System.Type resultType) + { + // If either side is only null, the result will be null. + if (!ContainsAnyNonNullValues || !pvs.ContainsAnyNonNullValues) + return CreateNull(resultType); + + // If neither side is null, the result cannot be null. + if (!ContainsNull && !pvs.ContainsNull) + return CreateAllNonNullValues(resultType); + + // Otherwise, the result can be anything. + return CreateAllValues(resultType); + } + + public PossibleValueSet AndAlso(PossibleValueSet pvs) + { + /* + * T && T == T + * T && F == F + * T && N == N + * F && T == F + * F && F == F + * F && N == F + * N && T == N + * N && F == F + * N && N == N + */ + + CheckType<bool>(); + pvs.CheckType<bool>(); + PossibleValueSet result = new PossibleValueSet(typeof(bool)); + + if (Contains(true) && pvs.Contains(true) && !result.Contains(true)) result.DistinctValues.Add(true); + if (Contains(true) && pvs.Contains(false) && !result.Contains(false)) result.DistinctValues.Add(false); + if (Contains(true) && pvs.Contains(null)) result.ContainsNull = true; + if (Contains(false) && pvs.Contains(true) && !result.Contains(false)) result.DistinctValues.Add(false); + if (Contains(false) && pvs.Contains(false) && !result.Contains(false)) result.DistinctValues.Add(false); + if (Contains(false) && pvs.Contains(null) && !result.Contains(false)) result.DistinctValues.Add(false); + if (Contains(null) && pvs.Contains(true)) result.ContainsNull = true; + if (Contains(null) && pvs.Contains(false) && !result.Contains(false)) result.DistinctValues.Add(false); + if (Contains(null) && pvs.Contains(null)) result.ContainsNull = true; + + return result; + } + + public PossibleValueSet OrElse(PossibleValueSet pvs) + { + /* + * T || T == T + * T || F == T + * T || N == T + * F || T == T + * F || F == F + * F || N == N + * N || T == T + * N || F == N + * N || N == N + */ + + CheckType<bool>(); + pvs.CheckType<bool>(); + + PossibleValueSet result = new PossibleValueSet(typeof(bool)); + + if (Contains(true) && pvs.Contains(true) && !result.Contains(true)) result.DistinctValues.Add(true); + if (Contains(true) && pvs.Contains(false) && !result.Contains(true)) result.DistinctValues.Add(true); + if (Contains(true) && pvs.Contains(null) && !result.Contains(true)) result.DistinctValues.Add(true); + if (Contains(false) && pvs.Contains(true) && !result.Contains(true)) result.DistinctValues.Add(true); + if (Contains(false) && pvs.Contains(false) && !result.Contains(false)) result.DistinctValues.Add(false); + if (Contains(false) && pvs.Contains(null)) result.ContainsNull = true; + if (Contains(null) && pvs.Contains(true) && !result.Contains(true)) result.DistinctValues.Add(true); + if (Contains(null) && pvs.Contains(false)) result.ContainsNull = true; + if (Contains(null) && pvs.Contains(null)) result.ContainsNull = true; + + return result; + } + + public PossibleValueSet Equal(PossibleValueSet pvs) + { + return ComparisonOperation(pvs); + } + + public PossibleValueSet NotEqual(PossibleValueSet pvs) + { + return ComparisonOperation(pvs); + } + + public PossibleValueSet GreaterThanOrEqual(PossibleValueSet pvs) + { + return ComparisonOperation(pvs); + } + + public PossibleValueSet GreaterThan(PossibleValueSet pvs) + { + return ComparisonOperation(pvs); + } + + public PossibleValueSet LessThan(PossibleValueSet pvs) + { + return ComparisonOperation(pvs); + } + + public PossibleValueSet LessThanOrEqual(PossibleValueSet pvs) + { + return ComparisonOperation(pvs); + } + + private PossibleValueSet ComparisonOperation(PossibleValueSet pvs) + { + return MathOperation(pvs, typeof (bool)); + } + + public PossibleValueSet Coalesce(PossibleValueSet pvs) + { + if (!ContainsNull) + return this; + + PossibleValueSet result = new PossibleValueSet(ExpressionType); + result.DistinctValues.AddRange(DistinctValues); + result.ContainsAllNonNullValues = ContainsAllNonNullValues; + + result.ContainsNull = pvs.ContainsNull; + result.ContainsAllNonNullValues |= pvs.ContainsAllNonNullValues; + result.DistinctValues.AddRange(pvs.DistinctValues.Except(result.DistinctValues)); + + return result; + } + + // Unary Operators + + public PossibleValueSet Not() + { + CheckType<bool>(); + + PossibleValueSet result = new PossibleValueSet(typeof(bool)); + result.ContainsNull = ContainsNull; + result.DistinctValues.AddRange(DistinctValues.Cast<bool>().Select(v => !v).Cast<object>()); + return result; + } + + public PossibleValueSet BitwiseNot(System.Type resultType) + { + return UnaryMathOperation(resultType); + } + + public PossibleValueSet ArrayLength(System.Type resultType) + { + return CreateAllNonNullValues(typeof (int)); + } + + public PossibleValueSet Convert(System.Type resultType) + { + return UnaryMathOperation(resultType); + } + + public PossibleValueSet Negate(System.Type resultType) + { + return UnaryMathOperation(resultType); + } + + public PossibleValueSet UnaryPlus(System.Type resultType) + { + return this; + } + + private PossibleValueSet UnaryMathOperation(System.Type resultType) + { + return + !ContainsAnyNonNullValues ? CreateNull(resultType) : + !ContainsNull ? CreateAllNonNullValues(resultType) : + CreateAllValues(resultType); + } + + // Special Operators + + public PossibleValueSet IsNull() + { + PossibleValueSet result = new PossibleValueSet(typeof(bool)); + if (ContainsNull) + result.DistinctValues.Add(true); + if (ContainsAllNonNullValues || DistinctValues.Any()) + result.DistinctValues.Add(false); + return result; + } + + public PossibleValueSet IsNotNull() + { + PossibleValueSet result = new PossibleValueSet(typeof(bool)); + if (ContainsNull) + result.DistinctValues.Add(false); + if (ContainsAllNonNullValues || DistinctValues.Any()) + result.DistinctValues.Add(true); + return result; + } + + public PossibleValueSet MemberAccess(System.Type resultType) + { + // If instance is null, member will be null too. + if (!ContainsAnyNonNullValues) + return CreateNull(resultType); + + // Otherwise, value could be anything due to value of member. + return CreateAllValues(resultType); + } + + #endregion + + private void CheckType<T>() + { + if (ExpressionType != typeof(T)) + throw new AssertionFailure("Cannot perform desired possible value set operation on expression of type: " + ExpressionType); + } + + public static PossibleValueSet CreateNull(System.Type expressionType) + { + return new PossibleValueSet(expressionType) {ContainsNull = true}; + } + + public static PossibleValueSet CreateAllNonNullValues(System.Type expressionType) + { + PossibleValueSet result = new PossibleValueSet(expressionType); + if (expressionType == typeof(bool)) + { + result.DistinctValues.Add(true); + result.DistinctValues.Add(false); + } + else + { + result.ContainsAllNonNullValues = true; + } + return result; + } + + public static PossibleValueSet CreateAllValues(System.Type expressionType) + { + PossibleValueSet result = CreateAllNonNullValues(expressionType); + result.ContainsNull = true; + return result; + } + + public static PossibleValueSet Create(System.Type expressionType, params object[] values) + { + PossibleValueSet result = new PossibleValueSet(expressionType); + foreach (var v in values) + { + if (v == null) + result.ContainsNull = true; + else if (v.GetType() == expressionType && !result.DistinctValues.Contains(v)) + result.DistinctValues.Add(v); + else + throw new AssertionFailure("Don't know how to add value to possible value set of type " + expressionType + ": " + v); + } + return result; + } + } +} Modified: trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetector.cs =================================================================== --- trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetector.cs 2011-06-21 16:31:37 UTC (rev 5968) +++ trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetector.cs 2011-06-27 00:59:46 UTC (rev 5969) @@ -1,126 +1,69 @@ -// FIXME - Are there other things that can convert N into T? What about the ?: operator? using System; using System.Collections.Generic; +using System.Linq; using System.Linq.Expressions; using NHibernate.Linq.ReWriters; using Remotion.Linq.Clauses.Expressions; -using Remotion.Linq.Utilities; namespace NHibernate.Linq.Visitors { /// <summary> /// The WhereJoinDetector creates the joins for the where clause, including - /// optimizations for inner joins. The algorithms are explained in - /// the accompanying text file. + /// optimizations for inner joins. + /// + /// The detector asks the following question: + /// Can an empty outer join ever return a record (ie. produce true in the where clause)? + /// If not, it's equivalent to an inner join since empty joins that can't produce true + /// never appear in the result set. + /// + /// A record (object) will be in the result if the evaluation of the condition in 3-value SQL + /// logic will return true; it will not be in the result if the result is either logical-null + /// or false. The difference between outer joining and inner joining is that with the latter, + /// objects are missing from the set on which the condition is checked. Thus, inner joins + /// "emulates" a result that is logical-null or false. And therefore, we can replace an outer + /// join with an inner join only if the resulting condition was not true on the outer join in + /// the first place when there was an "empty outer join" - i.e., the outer join had to add + /// nulls because there was no joinable record. These nulls can appear even for a column + /// that is not nullable. + /// + /// For example: + /// a.B.C == 1 could never produce true if B didn't match any rows, so it's safe to inner join. + /// a.B.C == null could produce true even if B didn't match any rows, so we can't inner join. + /// a.B.C == 1 && a.D.E == 1 can be inner joined. + /// a.B.C == 1 || a.D.E == 1 must be outer joined. + /// + /// By default we outer join via the code in VisitExpression. The use of inner joins is only + /// an optimization hint to the database. + /// + /// More examples: + /// a.B.C == 1 || a.B.C == null + /// We don't need multiple joins for this. When we reach the ||, we ask the value sets + /// on either side if they have a value for when a.B.C is emptily outer joined. Both of + /// them do, so those values are combined. + /// a.B.C == 1 || a.D.E == 1 + /// In this case, there is no value for a.B.C on the right side, so we use the possible + /// values for the entire expression, ignoring specific members. We only test for the + /// empty outer joining of one member expression at a time, since we can't guarantee that + /// they will all be emptily outer joined at the same time. + /// a.B.C ?? a.D.E + /// Even though each side is null when emptily outer joined, we can't promise that a.D.E + /// will be emptily outer joined when a.B.C is. Therefore, despite both sides being + /// null, the result may not be. + /// + /// There was significant discussion on the developers mailing list regarding this topic. See also NH-2583. + /// + /// The code here is based on the excellent work started by Harald Mueller. /// </summary> internal class WhereJoinDetector : AbstractJoinDetector { - // Possible results of a condition when emptily outer joined. - private const int T = 1; - private const int N = 2; - private const int TN = T | N; - private const int F = 4; - private const int TF = T | F; - private const int NF = N | F; - private const int TNF = T | N | F; + // TODO: There are a number of types of expressions that we didn't handle here due to time constraints. For example, the ?: operator could be checked easily. - // Composition rules for possible results when emptily outer joined - // for &&, ||, !. - private static readonly int[,] AND = new int[8, 8]; - private static readonly int[,] OR = new int[8, 8]; - private static readonly int[] NOT = new int[8]; - private static readonly int[] ISNULL = new int[8]; - private static readonly int[] ISNOTNULL = new int[8]; + private Stack<bool> _handled = new Stack<bool>(); + + // Stack of result values of each expression. After an expression has processed itself, it adds itself to the stack. + private Stack<ExpressionValues> _values = new Stack<ExpressionValues>(); - /// <summary> - /// Setup of <see cref="AND"/>, <see cref="OR"/>, <see cref="NOT"/>. - /// </summary> - static WhereJoinDetector() - { - // Setup of simple values according to SQL 3-valued logic. - NOT[T] = F; - NOT[N] = N; - NOT[F] = T; - ISNULL[T] = F; - ISNULL[N] = T; - ISNULL[F] = F; - ISNOTNULL[T] = T; - ISNOTNULL[N] = F; - ISNOTNULL[F] = T; - - foreach (var p in new[] { T, N, F }) - { - OR[p, p] = AND[p, p] = p; - AND[p, F] = AND[F, p] = F; - OR[p, T] = OR[T, p] = T; - } - AND[T, N] = AND[N, T] = N; - OR[F, N] = OR[N, F] = N; - - // Setup of compound values. Idea: Split each - // compound value to simple values, compute results - // for simple values and or them together. - var allValues = new[] { T, N, TN, F, TF, NF, TNF }; - - // How compound values are split up into simple values. - var split = new int[8][]; - split[T] = new[] { T }; - split[N] = new[] { N }; - split[TN] = new[] { T, N }; - split[F] = new[] { F }; - split[TF] = new[] { T, F }; - split[NF] = new[] { N, F }; - split[TNF] = new[] { T, N, F }; - - foreach (var p in allValues) - { - int[] splitP = split[p]; - // We only need to compute unary operations for compound values. - if (splitP.Length > 1) - { - int notResult = 0; - int isNullResult = 0; - int isNotNullResult = 0; - foreach (var p0 in splitP) - { - notResult |= NOT[p0]; - isNullResult |= ISNULL[p0]; - isNotNullResult |= ISNOTNULL[p0]; - } - NOT[p] = notResult; - ISNULL[p] = isNullResult; - ISNOTNULL[p] = isNotNullResult; - } - foreach (var q in allValues) - { - int[] splitQ = split[q]; - // We must compute AND and OR if both values are compound, - // *but also* if one is compound and the other is simple - // (e.g. T and TNF). - if (splitP.Length > 1 || splitQ.Length > 1) - { - int andResult = 0; - int orResult = 0; - foreach (var p0 in splitP) - { - foreach (var q0 in splitQ) - { - andResult |= AND[p0, q0]; - orResult |= OR[p0, q0]; - } - } - AND[p, q] = andResult; - OR[p, q] = orResult; - } - } - } - } - - // The following is used for all *condition* traversal (but not *expressions* that are not conditions). - // This is the "mapping" described in the text at NH-2583. - private Stack<Dictionary<string, int>> _memberExpressionMappings = new Stack<Dictionary<string, int>>(); - - // The following two are used for member expressions traversal. + // The following is used for member expressions traversal. private int _memberExpressionDepth = 0; internal @@ -133,221 +76,327 @@ { WhereJoinDetector f = new WhereJoinDetector(nameGenerator, isEntityDecider, joins, expressionMap); - f._memberExpressionMappings.Push(new Dictionary<string, int>()); - f.VisitExpression(expression); - foreach (var mapping in f._memberExpressionMappings.Pop()) + ExpressionValues values = f._values.Pop(); + + foreach (var memberExpression in values.MemberExpressions) { // If outer join can never produce true, we can safely inner join. - if ((mapping.Value & T) == 0) + if (!values.GetValues(memberExpression).Contains(true)) { - f.MakeInnerIfJoined(mapping.Key); + f.MakeInnerIfJoined(memberExpression); } } } - protected override Expression VisitBinaryExpression(BinaryExpression expression) + public override Expression VisitExpression(Expression expression) { - ArgumentUtility.CheckNotNull("expression", expression); + if (expression == null) + return null; - Expression baseResult = expression; - if (expression.NodeType == ExpressionType.AndAlso && expression.Type == typeof(bool)) + // To ensure safety in situations we don't understand, we default to "all possible values" + // if we can't process any expression in a known way. The SetResultValues() method is used + // to indicate that the expression has been processed, and what the results are. + + _handled.Push(false); + int originalCount = _values.Count; + + Expression result = base.VisitExpression(expression); + + if (!_handled.Pop()) { - // Case (a) from the text at NH-2583. - _memberExpressionMappings.Push(new Dictionary<string, int>()); - var newLeft = VisitExpression(expression.Left); - var leftMapping = _memberExpressionMappings.Pop(); + // If this expression was not handled in a known way, we throw away any values that might + // have been returned and we return "all values" for this expression, since we don't know + // what the expresson might result in. + while (_values.Count > originalCount) + _values.Pop(); + _values.Push(new ExpressionValues(PossibleValueSet.CreateAllValues(expression.Type))); + } - _memberExpressionMappings.Push(new Dictionary<string, int>()); - var newRight = VisitExpression(expression.Right); - var rightMapping = _memberExpressionMappings.Pop(); + return result; + } - BinaryMapping(leftMapping, rightMapping, AND); + protected override Expression VisitBinaryExpression(BinaryExpression expression) + { + var result = base.VisitBinaryExpression(expression); - // The following is copy-pasted from Relinq's visitor, as I had to split the code above. - var newConversion = (LambdaExpression)VisitExpression(expression.Conversion); - if (newLeft != expression.Left || newRight != expression.Right || newConversion != expression.Conversion) - baseResult = Expression.MakeBinary(expression.NodeType, newLeft, newRight, expression.IsLiftedToNull, expression.Method, newConversion); + if (expression.NodeType == ExpressionType.AndAlso) + { + HandleBinaryOperation((a, b) => a.AndAlso(b)); } - else if (expression.NodeType == ExpressionType.OrElse && expression.Type == typeof(bool)) + else if (expression.NodeType == ExpressionType.OrElse) { - // Case (b) - _memberExpressionMappings.Push(new Dictionary<string, int>()); - var newLeft = VisitExpression(expression.Left); - var leftMapping = _memberExpressionMappings.Pop(); + HandleBinaryOperation((a, b) => a.OrElse(b)); + } + else if (expression.NodeType == ExpressionType.NotEqual && IsNullConstantExpression(expression.Right)) + { + // Discard result from right null. Left is visited first, so it's below right on the stack. + _values.Pop(); - _memberExpressionMappings.Push(new Dictionary<string, int>()); - var newRight = VisitExpression(expression.Right); - var rightMapping = _memberExpressionMappings.Pop(); + HandleUnaryOperation(pvs => pvs.IsNotNull()); + } + else if (expression.NodeType == ExpressionType.NotEqual && IsNullConstantExpression(expression.Left)) + { + // Discard result from left null. + var right = _values.Pop(); + _values.Pop(); // Discard left. + _values.Push(right); - BinaryMapping(leftMapping, rightMapping, OR); + HandleUnaryOperation(pvs => pvs.IsNotNull()); + } + else if (expression.NodeType == ExpressionType.Equal && IsNullConstantExpression(expression.Right)) + { + // Discard result from right null. Left is visited first, so it's below right on the stack. + _values.Pop(); - // Again, the following is copy-pasted from Relinq's visitor, as I had to split the code above. - var newConversion = (LambdaExpression)VisitExpression(expression.Conversion); - if (newLeft != expression.Left || newRight != expression.Right || newConversion != expression.Conversion) - baseResult = Expression.MakeBinary(expression.NodeType, newLeft, newRight, expression.IsLiftedToNull, expression.Method, newConversion); + HandleUnaryOperation(pvs => pvs.IsNull()); } - else if (expression.Type == typeof(bool) - && expression.NodeType == ExpressionType.NotEqual - && (IsNullConstantExpression(expression.Right) || IsNullConstantExpression(expression.Left))) + else if (expression.NodeType == ExpressionType.Equal && IsNullConstantExpression(expression.Left)) { - // Case (h) - _memberExpressionMappings.Push(new Dictionary<string, int>()); - baseResult = base.VisitBinaryExpression(expression); - UnaryMapping(_memberExpressionMappings.Pop(), ISNOTNULL); + // Discard result from left null. + var right = _values.Pop(); + _values.Pop(); // Discard left. + _values.Push(right); + + HandleUnaryOperation(pvs => pvs.IsNull()); } - else if (expression.Type == typeof(bool) - && expression.NodeType == ExpressionType.Equal - && (IsNullConstantExpression(expression.Right) || IsNullConstantExpression(expression.Left))) + else if (expression.NodeType == ExpressionType.Coalesce) { - // Case (i) - _memberExpressionMappings.Push(new Dictionary<string, int>()); - baseResult = base.VisitBinaryExpression(expression); - UnaryMapping(_memberExpressionMappings.Pop(), ISNULL); + HandleBinaryOperation((a, b) => a.Coalesce(b)); } - else // +, * etc. + else if (expression.NodeType == ExpressionType.Add || expression.NodeType == ExpressionType.AddChecked) { - // Case (j) - baseResult = base.VisitBinaryExpression(expression); + HandleBinaryOperation((a, b) => a.Add(b, expression.Type)); } - return baseResult; + else if (expression.NodeType == ExpressionType.Divide) + { + HandleBinaryOperation((a, b) => a.Divide(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.Modulo) + { + HandleBinaryOperation((a, b) => a.Modulo(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.Multiply || expression.NodeType == ExpressionType.MultiplyChecked) + { + HandleBinaryOperation((a, b) => a.Multiply(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.Power) + { + HandleBinaryOperation((a, b) => a.Power(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.Subtract || expression.NodeType == ExpressionType.SubtractChecked) + { + HandleBinaryOperation((a, b) => a.Subtract(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.And) + { + HandleBinaryOperation((a, b) => a.And(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.Or) + { + HandleBinaryOperation((a, b) => a.Or(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.ExclusiveOr) + { + HandleBinaryOperation((a, b) => a.ExclusiveOr(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.LeftShift) + { + HandleBinaryOperation((a, b) => a.LeftShift(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.RightShift) + { + HandleBinaryOperation((a, b) => a.RightShift(b, expression.Type)); + } + else if (expression.NodeType == ExpressionType.Equal) + { + HandleBinaryOperation((a, b) => a.Equal(b)); + } + else if (expression.NodeType == ExpressionType.NotEqual) + { + HandleBinaryOperation((a, b) => a.NotEqual(b)); + } + else if (expression.NodeType == ExpressionType.GreaterThanOrEqual) + { + HandleBinaryOperation((a, b) => a.GreaterThanOrEqual(b)); + } + else if (expression.NodeType == ExpressionType.GreaterThan) + { + HandleBinaryOperation((a, b) => a.GreaterThan(b)); + } + else if (expression.NodeType == ExpressionType.LessThan) + { + HandleBinaryOperation((a, b) => a.LessThan(b)); + } + else if (expression.NodeType == ExpressionType.LessThanOrEqual) + { + HandleBinaryOperation((a, b) => a.LessThanOrEqual(b)); + } + + return result; } protected override Expression VisitUnaryExpression(UnaryExpression expression) { - ArgumentUtility.CheckNotNull("expression", expression); + Expression result = base.VisitUnaryExpression(expression); - Expression baseResult; if (expression.NodeType == ExpressionType.Not && expression.Type == typeof(bool)) { - // Case (c) from text at NH-2583. - _memberExpressionMappings.Push(new Dictionary<string, int>()); - baseResult = VisitExpression(expression.Operand); - UnaryMapping(_memberExpressionMappings.Pop(), NOT); + HandleUnaryOperation(pvs => pvs.Not()); } - else + else if (expression.NodeType == ExpressionType.Not) { - baseResult = base.VisitUnaryExpression(expression); + HandleUnaryOperation(pvs => pvs.BitwiseNot(expression.Type)); } - return baseResult; + else if (expression.NodeType == ExpressionType.ArrayLength) + { + HandleUnaryOperation(pvs => pvs.ArrayLength(expression.Type)); + } + else if (expression.NodeType == ExpressionType.Convert || expression.NodeType == ExpressionType.ConvertChecked) + { + HandleUnaryOperation(pvs => pvs.Convert(expression.Type)); + } + else if (expression.NodeType == ExpressionType.Negate || expression.NodeType == ExpressionType.NegateChecked) + { + HandleUnaryOperation(pvs => pvs.Negate(expression.Type)); + } + else if (expression.NodeType == ExpressionType.UnaryPlus) + { + HandleUnaryOperation(pvs => pvs.UnaryPlus(expression.Type)); + } + + return result; } - //protected override Expression VisitConditionalExpression(ConditionalExpression expression) + // We would usually get NULL if one of our inner member expresions was null. + // However, it's possible a method call will convert the null value from the failed join into a non-null value. + // This could be optimized by actually checking what the method does. For example StartsWith("s") would leave null as null and would still allow us to inner join. + //protected override Expression VisitMethodCallExpression(MethodCallExpression expression) //{ - // ArgumentUtility.CheckNotNull("expression", expression); - - // VisitExpression(expression.Test); - // // If the ?: returns bool, it is (most probably ...) a condition which may require outer joins. - // // TODO: Check check whether HQL accepts ?: conditions; if not, should be rewritten it as (a && b || !a && c). - // if (expression.Type == typeof(bool)) - // { - // ... - // } - // return expression; + // Expression result = base.VisitMethodCallExpression(expression); + // return result; //} - protected override Expression VisitMethodCallExpression(MethodCallExpression expression) - { - _memberExpressionMappings.Push(new Dictionary<string, int>()); - Expression result = base.VisitMethodCallExpression(expression); - // We would usually get NULL if one of our inner member expresions was null. (Mapped to N) - // However, it's possible a method call will convert the null value from the failed join into any one of True, False, or Null. (Mapped to TNF) - // This could be optimized by actually checking what the method does. For example StartsWith("s") would leave null as null and would still allow us to inner join. - FixedMapping(_memberExpressionMappings.Pop(), TNF); - return result; - } - protected override Expression VisitMemberExpression(MemberExpression expression) { - ArgumentUtility.CheckNotNull("expression", expression); + // The member expression we're visiting might be on the end of a variety of things, such as: + // a.B + // a.B.C + // (a.B ?? a.C).D + // I'm not sure what processing re-linq does to strange member expressions. + // TODO: I suspect this code doesn't add the right joins for the last case. - Expression newExpression; - try - { - _memberExpressionDepth++; - newExpression = base.VisitExpression(expression.Expression); - } - finally - { - _memberExpressionDepth--; - } - bool isEntity = _isEntityDecider.IsEntity(expression.Type); + _memberExpressionDepth++; + var result = base.VisitMemberExpression(expression); + _memberExpressionDepth--; - if (isEntity) + ExpressionValues values = _values.Pop().Operation(pvs => pvs.MemberAccess(expression.Type)); + if (_isEntityDecider.IsEntity(expression.Type)) { - // See (h) why we do not check for _memberExpressionDepth here! - AddPossibility(ExpressionKeyVisitor.Visit(expression, null), N); + // Don't add joins for things like a.B == a.C where B and C are entities. + // We only need to join B when there's something like a.B.D. + // TODO: Add an exception for the Id property. + if (_memberExpressionDepth > 0) + AddJoin(expression); + + string key = ExpressionKeyVisitor.Visit(expression, null); + values.MemberExpressionValuesIfEmptyOuterJoined[key] = PossibleValueSet.CreateNull(expression.Type); } + SetResultValues(values); - if (_memberExpressionDepth > 0 && isEntity) + return result; + } + + private static bool IsNullConstantExpression(Expression expression) + { + if (expression is ConstantExpression) { - return AddJoin(expression); + var constant = (ConstantExpression)expression; + return constant.Value == null; } else { - if (newExpression != expression.Expression) - return Expression.MakeMemberAccess(newExpression, expression.Member); - return expression; + return false; } } - private void FixedMapping(Dictionary<string, int> sourceMapping, int value) + private void SetResultValues(ExpressionValues values) { - foreach (var me in sourceMapping.Keys) - { - AddPossibility(me, value); - } + _handled.Pop(); + _handled.Push(true); + _values.Push(values); } - private void BinaryMapping(Dictionary<string, int> leftMapping, Dictionary<string, int> rightMapping, int[,] op) + private void HandleBinaryOperation(Func<PossibleValueSet, PossibleValueSet, PossibleValueSet> operation) { - // Compute mapping for all member expressions in leftMapping. If the member expression is missing - // in rightMapping, use TNF as a "pessimistic approximation" instead (inside the ?: operator). See - // the text for an explanation of this. - foreach (var lhs in leftMapping) + var right = _values.Pop(); + var left = _values.Pop(); + SetResultValues(left.Operation(right, operation)); + } + + private void HandleUnaryOperation(Func<PossibleValueSet, PossibleValueSet> operation) + { + SetResultValues(_values.Pop().Operation(operation)); + } + + private class ExpressionValues + { + public ExpressionValues(PossibleValueSet valuesIfUnknownMemberExpression) { - AddPossibility(lhs.Key, op[lhs.Value, rightMapping.ContainsKey(lhs.Key) ? rightMapping[lhs.Key] : TNF]); + Values = valuesIfUnknownMemberExpression; + MemberExpressionValuesIfEmptyOuterJoined = new Dictionary<string, PossibleValueSet>(); } - // Compute mapping for all member expressions *only* in rightMapping (we did the common ones above). - // Again, use TNF as pessimistic approximation to result of left subcondition. - foreach (var rhs in rightMapping) + + /// <summary> + /// Possible values of expression if there's set of values for the requested member expression. + /// For example, if we have an expression "3" and we request the state for "a.B.C", we'll + /// use "3" from Values since it won't exist in MemberExpressionValuesIfEmptyOuterJoined. + /// </summary> + private PossibleValueSet Values { get; set; } + + /// <summary> + /// Stores the possible values of an expression that would result if the given member expression + /// string was emptily outer joined. For example a.B.C would result in "null" if we try to + /// outer join to B and there are no rows. Even if an expression tree does contain a particular + /// member experssion, it may not appear in this list. In that case, the emptily outer joined + /// value set for that member expression will be whatever's in Values instead. + /// </summary> + public Dictionary<string, PossibleValueSet> MemberExpressionValuesIfEmptyOuterJoined { get; private set; } + + public PossibleValueSet GetValues(string memberExpression) { - if (!leftMapping.ContainsKey(rhs.Key)) - { - AddPossibility(rhs.Key, op[rhs.Value, TNF]); - } + if (MemberExpressionValuesIfEmptyOuterJoined.ContainsKey(memberExpression)) + return MemberExpressionValuesIfEmptyOuterJoined[memberExpression]; + return Values; } - } - private void UnaryMapping(Dictionary<string, int> sourceMapping, int[] op) - { - foreach (var item in sourceMapping) + public IEnumerable<string> MemberExpressions { - AddPossibility(item.Key, op[item.Value]); + get { return MemberExpressionValuesIfEmptyOuterJoined.Keys; } } - } - private static bool IsNullConstantExpression(Expression expression) - { - if (expression is ConstantExpression) + public ExpressionValues Operation(ExpressionValues mergeWith, Func<PossibleValueSet, PossibleValueSet, PossibleValueSet> operation) { - var constant = (ConstantExpression)expression; - return constant.Value == null; + ExpressionValues result = new ExpressionValues(operation(Values, mergeWith.Values)); + foreach (string memberExpression in MemberExpressions.Union(mergeWith.MemberExpressions)) + { + var left = GetValues(memberExpression); + var right = mergeWith.GetValues(memberExpression); + result.MemberExpressionValuesIfEmptyOuterJoined.Add(memberExpression, operation(left, right)); + } + return result; } - else + + public ExpressionValues Operation(Func<PossibleValueSet, PossibleValueSet> operation) { - return false; + ExpressionValues result = new ExpressionValues(operation(Values)); + foreach (string memberExpression in MemberExpressions) + { + result.MemberExpressionValuesIfEmptyOuterJoined.Add(memberExpression, operation(GetValues(memberExpression))); + } + return result; } } - - private void AddPossibility(string memberPath, int value) - { - Dictionary<string, int> mapping = _memberExpressionMappings.Peek(); - if (mapping.ContainsKey(memberPath)) - mapping[memberPath] |= value; - else - mapping[memberPath] = value; - } } } Deleted: trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetectorDesignNotes.txt =================================================================== --- trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetectorDesignNotes.txt 2011-06-21 16:31:37 UTC (rev 5968) +++ trunk/nhibernate/src/NHibernate/Linq/Visitors/WhereJoinDetectorDesignNotes.txt 2011-06-27 00:59:46 UTC (rev 5969) @@ -1,309 +0,0 @@ -[Commiter's Notes: As with the code, this design document was contributed by Harald Mueller. - By default in Linq, a member expression such as a.B.C is converted into left outer joins so if we have something like a => (a.B.C == 1 || a.D.E == 2) we don't lose records due to the join where B or D is null. - This document describes how we optimize to inner joins when outer joins are not required, such as in the simple case of a => a.B.C == 1. - There was significant discussion on the developers mailing list regarding this topic. See also NH-2583.] - -Optimization of outer joins to inner joins for (||-4) semantics -=============================================================== - -It is interesting - and for some databases an advantage - to replace outer joins with inner joins if the outer joins are not necessary. "Not necessary" here means that the result must not be different whether using outer joins or inner joins. The question, obviously, is: For which joinable member expressions can we replace an outer join with an inner join without creating wrong results? - -It took me a few hours to find this out. Here is the result. - -A record (object) will be in the result if the evaluation of the condition in 3-value SQL logic will return true; it will not be in the result if the result is either logical-null or false. The difference between outer joining and inner joining is that with the latter, objects are missing from the set on which the condition is checked. Thus, inner joins "emulates" a result that is logical-null or false. And therefore, we can replace an outer join with an inner join only if the resulting condition was not true on the outer join in the first place when there was an "empty outer join" - i.e., the outer join had to add nulls because there was no joinable record. - -By the way, I will in the following call the nulls added by "dangling outer joins" "oj-nulls". They have to be distinguished from the two other sorts of nulls in SQL: -* "value-null" - a NULL in a column of a table -* "logical-null" - a NULL resulting from the evaluation of certain conditions on certain values (in SQL, at least one of these values must be a value-null or oj-null). -In contrast to value-nulls, oj-nulls have the peculiar property that they can exist even for non-nullable columns. - -If we look at the evaluation tree of a condition, we can assign to each node and for each joinable member expression one of the following small sets: - - t definitely makes the node true when the member expression is emptily outer-joined (i.e., joining it creates oj-nulls for all the simple properties) - n definitely makes the node logical-null when emptily outer-joined - f definitely makes the node false when emptily outer-joined - tn maybe makes it true, maybe logical-null when emptily outer-joined - tf maybe makes it true, maybe false when emptily outer-joined - nf maybe makes it logical-null, maybe false when emptily outer-joined - tnf maybe makes it true, maybe logical-null, maybe false when emptily outer-joined - -(An eighth set, the empty one, could only be assigned to a contradiction. We ignore such conditions in most of the following discussion). - -When we know these values at the top condition, we can then safely use inner joins for all member expressions for which the value does not contain t. -The reasoning is as follows: If for some result record, the empty outer join of a joinable member expression has made the top where condition logical-null or false, this record will be dropped from the result set - this is just the definition of how a query handles logical-null and false results. But an inner join cannot "do more harm" - it will also just drop the record from the result. Therefore, we can safely replace outer joins to such joinable member expressions with inner joins. - -In the following, we assemble the rules for computing the member expressions mappings introduced above. After a few examples, we look at the rules, and then more examples. - -(Remark: I use C# notation in the following, with the following semantics: -* Generally, the semantics for condition evaluation is 3-valued SQL semantics with condition results of true, logical-null, or false -* == null has the semantics of "is null" -* != null the semantics of "is not null" -* the final result contains objects where the condition evaluation yields true, but not logical-null or false - this is the standard mapping of 3-valued-semantics to 2-valued-semantics). - -As a first example, the condition - - x.A.B == 4 - -is logical-null when emptily outer-joining x.A, but never true or false. Thus, we have { x.A -> n }. Inner joining x.A (which drops records that are emptily joined) therefore yields the same 2-valued-logic result. - -On the other hand, the condition - - x.A.B == null && x.E == null - -can be made true or false by emptily outer-joining x.A, but never logical-null (resaon: The right side can only be true or false; and the left side also; so the complete condition is either true or false - depending on x.E's value). So, { x.A -> tf }, and therefore we cannot inner join x.A (there is a t in the mapping!). By direct reasoning, we see that inner joining x.A will drop all the non-joinable records, hence inner and outer joining are different for records where x.E is null, and will yield a different 2-valued-logic result. - -Finally, the condition - - x.A.B == null || x.E == null - -is always true when emptily outer-joining x.A, so { x.A -> t }. Hence, the result is always different from inner joining, where the empty join drops such records from the result. - -How can we compute the mapping from member expressions to possible values? First, we note that we can compute this mapping with different "preciseness". For example, one correct way would be to just collect all joinable member expressions into a set M, and then set { mE -> tnf } for all member expressions mE in M. Obviously, this prevents any inner joins. On the other hand, we can in principle quite precisely check condition like the following - the question is whether this is worth the effort: - - x.A.B == null x.A -> ... - !(x.A.B != null) x.A -> ... - (x.A.B ?? 4) == 4 x.A -> ... - !((x.A.B ?? 4) != 4) x.A -> ... - (x.A.B ?? x.E) == 4 x.A -> ... - (x.A.B ?? x.C.D) == 4 x.A -> ..., x.C -> ... - -In the following, I'll first give the rules for the 4 logical operators &&, ||, !, and ?!; then I'll give practical rules (I think) for common simple conditions; finally, I'll look at some more complex simple conditions. - -(a) && operator: - - && t n f tn tf nf tnf - - t t n f tn tf nf tnf tnf - n n f n nf nf nf nf - f f f f f f f - tn tn tnf nf tnf tnf - tf tf nf tnf tnf - nf nf nf nf - tnf tnf tnf - - - - -The single letters are easy: They follow the 3-valued SQL logic. E.g., if emptily joining a member expression will definitely return true for condition A and logical-null for condition B, it will definitely return (true && logical-null) = logical-null for A && B. -For the multiple values, we have to compute the union of the values in the small sets. For example, tn && tf is { t && t, t && f, n && t, n && f } = { t,f,n,f } = tnf. -If the member expression is missing on one side, we must assume any value for that side. After all, the condition on the other side could e.g. be 0 == 0, 0 == null, or 0 == 1. Therefore, the result is the same as if we would get tnf from the other side. See examples (ex.4) and (ex.5) below for some more thoughts on this. -The values below the diagonal are symmetric to the upper ones. - -(b) || operator: - - || t n f tn tf nf tnf - - t t t t t t t t t - n n n tn tn n tn tn - f f tn tf nf tnf tnf - tn tn tn tn tn tn - tf tf tnf tnf tnf - nf nf tnf tnf - tnf tnf tnf - - - - -The resoning is similar to &&. -Again, the values below the diagonal are symmetric to the upper ones. - -(c) ! operator: - - ! t n f tn tf nf tnf - f n t nf tf tn tnf - -The resoning is similar to &&. - -(d) Logical ?: operator: - -A ? B : C is equivalent to A && B || !A && C. From this, one can compute the mappings (I would need a three-dimensional "cube matrix" for their presentation, which is hard in text ... ok, a tree also would work ...). - -Now let us look at simple conditions. The following are *possible* member expression mappings. As said above, one can also assign the values more crudely - e.g., always assign tnf to all member expressions. However, the assignments below are reasonable precise - i.e., they assign the minimal sets for practical values (if I did not make a mistake). - -(e) mE*.P <op> <constant(s)> (where <constant(s)> are not value-nulls) - -mE* here is a symbol for a "nested sequence of member expressions" mE1, mE2, ..., .P is the final property. E.g. in - - a.B.C.D.E == 4 - -the member expressions are mE1=a.B, mE2=a.B.C, and mE3=a.B.C.D; the final property is E. In this case, we get { mE -> n } for all member expressions mE. Reason: Emptily outer joining any member expression will yield oj-null for P; but oj-null <op> <constant> is always logical-null. - -(f) mE*.P [<op>s <constant>s] == me'*.Q [<op>s <constant>s] - -The result depends on the definition of ==: We can either use the Linq definition where null == null is true (which requires an SQL translation of ... = ... OR ... IS NULL AND ... IS NULL); or the SQL definition where null == null is logical-null. - -In the first case, we reason as follows: - - - When one side (e.g. the left one) does empty outer-joining and yields oj-null for P, but the right side has non-oj-null values, the SQL - oj-null = value OR oj-null IS NULL AND value IS NULL - evaluates as logical-null OR true AND false, which is logical-null OR true, which is true. - - When both sides do empty outer-joining, we get - oj-null = oj-null OR oj-null IS NULL AND oj-null IS NULL - which is logical-null OR true AND true, which is true. - -So, empty outer joining will always yield true, and hence we get { mE -> t } for all member expressions mE. - -For the SQL definition of equality, we get the following: - - - When one side (e.g. the left one) does empty outer-joining and yields oj-null for P, but the right side has non-oj-null values, the SQL - oj-null = value - evaluates as logical-null. - - When both sides do empty outer-joining, we get - oj-null = oj-null - which is logical-null. - -So, empty outer joining will always yield logical-null, and hence we get { mE -> n } for all member expressions mE. - -(g) mE*.P [<op>s <constant>s] != me'*.Q [<op>s <constant>s] - -Again, the result depends on the definition of ==: We can either use the Linq definition where null != null is false (which requires an SQL translation of ... <> ... OR ... IS NULL AND ... IS NOT NULL OR ... IS NOT NULL AND ... IS NULL); or the SQL definition where null != null is logical-null. - -In the first case, we reason as follows: - - - When one side (e.g. the left one) does empty outer-joining and yields oj-null for P, but the right side has non-oj-null values, the SQL - oj-null <> value OR oj-null IS NULL AND value IS NOT NULL OR oj-null IS NOT NULL AND value IS NULL - evaluates as logical-null OR true AND true OR false AND false, which is true. - - When both sides do empty outer-joining, we get - oj-null <> oj-null OR oj-null IS NULL AND oj-null IS NOT NULL OR oj-null IS NOT NULL AND oj-null IS NULL - which is logical-null OR false OR false, which is logical-null. - -So, empty outer joining will yield true or logical-null, and hence we get { mE -> tn } for all member expressions mE. - -For the SQL definition of equality, we get the following: - - - When one side (e.g. the left one) does empty outer-joining and yields oj-null for P, but the right side has non-oj-null values, the SQL - oj-null <> value - evaluates as logical-null. - - When both sides do empty outer-joining, we get - oj-null <> oj-null - which is also logical-null. - -So, empty outer joining will always yield logical-null, and hence we get { mE -> n } for all member expressions mE. - -(h) mE*.P != null - -Here is a first attempt: Empty outer joining can only yield false, hence we get { mE -> f } for all member expressions mE. - -There is a small problem with this definition: If P itself is a pointer to a mapped entity, we would *not* record that P is guaranteed to point to a valid object. This hurts in the following query: - - x.A != null && x.A.B != 4 - -According to (e), the right side will create the mapping { x.A -> n }. The left side does not have a joinable member expression (x.A is not joined! it is just evaluated on x's table), so the mapping is { }. According to (a), we get for the whole condition { x.A -> nf }. Actually, we know that the result should be { x.A -> f }: An empty outer join of x.A will yield "false" fo... [truncated message content] |