diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index dbaff027e4..f754f17002 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1427,6 +1427,7 @@ _outEquivalenceMember(StringInfo str, EquivalenceMember *node) WRITE_NODE_FIELD(em_expr); WRITE_BITMAPSET_FIELD(em_relids); + WRITE_BITMAPSET_FIELD(em_nullable_relids); WRITE_BOOL_FIELD(em_is_const); WRITE_BOOL_FIELD(em_is_child); WRITE_OID_FIELD(em_datatype); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 56e93c2c41..7f9a006995 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -27,7 +27,7 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, - Expr *expr, Relids relids, + Expr *expr, Relids relids, Relids nullable_relids, bool is_child, Oid datatype); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); @@ -97,7 +97,9 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, Expr *item1; Expr *item2; Relids item1_relids, - item2_relids; + item2_relids, + item1_nullable_relids, + item2_nullable_relids; List *opfamilies; EquivalenceClass *ec1, *ec2; @@ -139,6 +141,12 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, return false; /* RHS is non-strict but not constant */ } + /* Calculate nullable-relid sets for each side of the clause */ + item1_nullable_relids = bms_intersect(item1_relids, + restrictinfo->nullable_relids); + item2_nullable_relids = bms_intersect(item2_relids, + restrictinfo->nullable_relids); + /* * We use the declared input types of the operator, not exprType() of the * inputs, as the nominal datatypes for opfamily lookup. This presumes @@ -273,7 +281,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, else if (ec1) { /* Case 3: add item2 to ec1 */ - em2 = add_eq_member(ec1, item2, item2_relids, false, item2_type); + em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids, + false, item2_type); ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; /* mark the RI as usable with this pair of EMs */ @@ -283,7 +292,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, else if (ec2) { /* Case 3: add item1 to ec2 */ - em1 = add_eq_member(ec2, item1, item1_relids, false, item1_type); + em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids, + false, item1_type); ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); ec2->ec_below_outer_join |= below_outer_join; /* mark the RI as usable with this pair of EMs */ @@ -306,8 +316,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, ec->ec_broken = false; ec->ec_sortref = 0; ec->ec_merged = NULL; - em1 = add_eq_member(ec, item1, item1_relids, false, item1_type); - em2 = add_eq_member(ec, item2, item2_relids, false, item2_type); + em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids, + false, item1_type); + em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids, + false, item2_type); root->eq_classes = lappend(root->eq_classes, ec); @@ -324,12 +336,13 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, */ static EquivalenceMember * add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, - bool is_child, Oid datatype) + Relids nullable_relids, bool is_child, Oid datatype) { EquivalenceMember *em = makeNode(EquivalenceMember); em->em_expr = expr; em->em_relids = relids; + em->em_nullable_relids = nullable_relids; em->em_is_const = false; em->em_is_child = is_child; em->em_datatype = datatype; @@ -443,7 +456,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, newec->ec_sortref = sortref; newec->ec_merged = NULL; newem = add_eq_member(newec, expr, pull_varnos((Node *) expr), - false, expr_datatype); + NULL, false, expr_datatype); /* * add_eq_member doesn't check for volatile functions, set-returning @@ -621,7 +634,9 @@ generate_base_implied_equalities_const(PlannerInfo *root, } process_implied_equality(root, eq_op, cur_em->em_expr, const_em->em_expr, - ec->ec_relids, + bms_copy(ec->ec_relids), + bms_union(cur_em->em_nullable_relids, + const_em->em_nullable_relids), ec->ec_below_outer_join, cur_em->em_is_const); } @@ -676,7 +691,9 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, } process_implied_equality(root, eq_op, prev_em->em_expr, cur_em->em_expr, - ec->ec_relids, + bms_copy(ec->ec_relids), + bms_union(prev_em->em_nullable_relids, + cur_em->em_nullable_relids), ec->ec_below_outer_join, false); } @@ -1077,7 +1094,9 @@ create_join_clause(PlannerInfo *root, leftem->em_expr, rightem->em_expr, bms_union(leftem->em_relids, - rightem->em_relids)); + rightem->em_relids), + bms_union(leftem->em_nullable_relids, + rightem->em_nullable_relids)); /* Mark the clause as redundant, or not */ rinfo->parent_ec = parent_ec; @@ -1295,7 +1314,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, left_type, right_type, inner_datatype; - Relids inner_relids; + Relids inner_relids, + inner_nullable_relids; ListCell *lc1; Assert(is_opclause(rinfo->clause)); @@ -1321,6 +1341,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, inner_datatype = left_type; inner_relids = rinfo->left_relids; } + inner_nullable_relids = bms_intersect(inner_relids, + rinfo->nullable_relids); /* Scan EquivalenceClasses for a match to outervar */ foreach(lc1, root->eq_classes) @@ -1375,7 +1397,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, newrinfo = build_implied_join_equality(eq_op, innervar, cur_em->em_expr, - inner_relids); + bms_copy(inner_relids), + bms_copy(inner_nullable_relids)); if (process_equivalence(root, newrinfo, true)) match = true; } @@ -1408,7 +1431,9 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) left_type, right_type; Relids left_relids, - right_relids; + right_relids, + left_nullable_relids, + right_nullable_relids; ListCell *lc1; /* Can't use an outerjoin_delayed clause here */ @@ -1423,6 +1448,10 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) rightvar = (Expr *) get_rightop(rinfo->clause); left_relids = rinfo->left_relids; right_relids = rinfo->right_relids; + left_nullable_relids = bms_intersect(left_relids, + rinfo->nullable_relids); + right_nullable_relids = bms_intersect(right_relids, + rinfo->nullable_relids); foreach(lc1, root->eq_classes) { @@ -1504,7 +1533,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) newrinfo = build_implied_join_equality(eq_op, leftvar, cur_em->em_expr, - left_relids); + bms_copy(left_relids), + bms_copy(left_nullable_relids)); if (process_equivalence(root, newrinfo, true)) matchleft = true; } @@ -1516,7 +1546,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) newrinfo = build_implied_join_equality(eq_op, rightvar, cur_em->em_expr, - right_relids); + bms_copy(right_relids), + bms_copy(right_nullable_relids)); if (process_equivalence(root, newrinfo, true)) matchright = true; } @@ -1636,11 +1667,27 @@ add_child_rel_equivalences(PlannerInfo *root, { /* Yes, generate transformed child version */ Expr *child_expr; + Relids new_nullable_relids; child_expr = (Expr *) adjust_appendrel_attrs((Node *) cur_em->em_expr, appinfo); - (void) add_eq_member(cur_ec, child_expr, child_rel->relids, + + /* + * Must translate nullable_relids. Note this code assumes + * parent and child relids are singletons. + */ + new_nullable_relids = cur_em->em_nullable_relids; + if (bms_overlap(new_nullable_relids, parent_rel->relids)) + { + new_nullable_relids = bms_difference(new_nullable_relids, + parent_rel->relids); + new_nullable_relids = bms_add_members(new_nullable_relids, + child_rel->relids); + } + + (void) add_eq_member(cur_ec, child_expr, + child_rel->relids, new_nullable_relids, true, cur_em->em_datatype); } } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 17dc3d0da1..64b1be0ae9 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -49,9 +49,12 @@ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, bool below_outer_join, Relids qualscope, Relids ojscope, - Relids outerjoin_nonnullable); + Relids outerjoin_nonnullable, + Relids deduced_nullable_relids); static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p, Relids *nullable_relids_p, bool is_pushed_down); +static bool check_equivalence_delay(PlannerInfo *root, + RestrictInfo *restrictinfo); static void check_mergejoinable(RestrictInfo *restrictinfo); static void check_hashjoinable(RestrictInfo *restrictinfo); @@ -343,7 +346,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, foreach(l, (List *) f->quals) distribute_qual_to_rels(root, (Node *) lfirst(l), false, below_outer_join, - *qualscope, NULL, NULL); + *qualscope, NULL, NULL, NULL); } else if (IsA(jtnode, JoinExpr)) { @@ -453,7 +456,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, foreach(qual, (List *) j->quals) distribute_qual_to_rels(root, (Node *) lfirst(qual), false, below_outer_join, - *qualscope, ojscope, nonnullable_rels); + *qualscope, + ojscope, nonnullable_rels, NULL); /* Now we can add the OuterJoinInfo to oj_info_list */ if (ojinfo) @@ -713,6 +717,8 @@ make_outerjoininfo(PlannerInfo *root, * baserels appearing on the outer (nonnullable) side of the join * (for FULL JOIN this includes both sides of the join, and must in fact * equal qualscope) + * 'deduced_nullable_relids': if is_deduced is TRUE, the nullable relids to + * impute to the clause; otherwise NULL * * 'qualscope' identifies what level of JOIN the qual came from syntactically. * 'ojscope' is needed if we decide to force the qual up to the outer-join @@ -724,7 +730,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, bool below_outer_join, Relids qualscope, Relids ojscope, - Relids outerjoin_nonnullable) + Relids outerjoin_nonnullable, + Relids deduced_nullable_relids) { Relids relids; bool is_pushed_down; @@ -832,12 +839,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * If the qual came from implied-equality deduction, it should not be * outerjoin-delayed, else deducer blew it. But we can't check this * because the ojinfo list may now contain OJs above where the qual - * belongs. + * belongs. For the same reason, we must rely on caller to supply the + * correct nullable_relids set. */ Assert(!ojscope); is_pushed_down = true; outerjoin_delayed = false; - nullable_relids = NULL; + nullable_relids = deduced_nullable_relids; /* Don't feed it back for more deductions */ maybe_equivalence = false; maybe_outer_join = false; @@ -990,7 +998,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, { if (maybe_equivalence) { - if (process_equivalence(root, restrictinfo, below_outer_join)) + if (check_equivalence_delay(root, restrictinfo) && + process_equivalence(root, restrictinfo, below_outer_join)) return; /* EC rejected it, so pass to distribute_restrictinfo_to_rels */ } @@ -1151,6 +1160,44 @@ check_outerjoin_delay(PlannerInfo *root, return outerjoin_delayed; } +/* + * check_equivalence_delay + * Detect whether a potential equivalence clause is rendered unsafe + * by outer-join-delay considerations. Return TRUE if it's safe. + * + * The initial tests in distribute_qual_to_rels will consider a mergejoinable + * clause to be a potential equivalence clause if it is not outerjoin_delayed. + * But since the point of equivalence processing is that we will recombine the + * two sides of the clause with others, we have to check that each side + * satisfies the not-outerjoin_delayed condition on its own; otherwise it might + * not be safe to evaluate everywhere we could place a derived equivalence + * condition. + */ +static bool +check_equivalence_delay(PlannerInfo *root, + RestrictInfo *restrictinfo) +{ + Relids relids; + Relids nullable_relids; + + /* fast path if no special joins */ + if (root->oj_info_list == NIL) + return true; + + /* must copy restrictinfo's relids to avoid changing it */ + relids = bms_copy(restrictinfo->left_relids); + /* check left side does not need delay */ + if (check_outerjoin_delay(root, &relids, &nullable_relids, true)) + return false; + + /* and similarly for the right side */ + relids = bms_copy(restrictinfo->right_relids); + if (check_outerjoin_delay(root, &relids, &nullable_relids, true)) + return false; + + return true; +} + /* * distribute_restrictinfo_to_rels * Push a completed RestrictInfo into the proper restriction or join @@ -1223,11 +1270,20 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, * variable-free. Otherwise the qual is applied at the lowest join level * that provides all its variables. * + * "nullable_relids" is the set of relids used in the expressions that are + * potentially nullable below the expressions. (This has to be supplied by + * caller because this function is used after deconstruct_jointree, so we + * don't have knowledge of where the clause items came from.) + * * "both_const" indicates whether both items are known pseudo-constant; * in this case it is worth applying eval_const_expressions() in case we * can produce constant TRUE or constant FALSE. (Otherwise it's not, * because the expressions went through eval_const_expressions already.) * + * Note: this function will copy item1 and item2, but it is caller's + * responsibility to make sure that the Relids parameters are fresh copies + * not shared with other uses. + * * This is currently used only when an EquivalenceClass is found to * contain pseudoconstants. See path/pathkeys.c for more details. */ @@ -1237,6 +1293,7 @@ process_implied_equality(PlannerInfo *root, Expr *item1, Expr *item2, Relids qualscope, + Relids nullable_relids, bool below_outer_join, bool both_const) { @@ -1268,15 +1325,12 @@ process_implied_equality(PlannerInfo *root, } } - /* Make a copy of qualscope to avoid problems if source EC changes */ - qualscope = bms_copy(qualscope); - /* * Push the new clause into all the appropriate restrictinfo lists. */ distribute_qual_to_rels(root, (Node *) clause, true, below_outer_join, - qualscope, NULL, NULL); + qualscope, NULL, NULL, nullable_relids); } /* @@ -1284,12 +1338,17 @@ process_implied_equality(PlannerInfo *root, * * This overlaps the functionality of process_implied_equality(), but we * must return the RestrictInfo, not push it into the joininfo tree. + * + * Note: this function will copy item1 and item2, but it is caller's + * responsibility to make sure that the Relids parameters are fresh copies + * not shared with other uses. */ RestrictInfo * build_implied_join_equality(Oid opno, Expr *item1, Expr *item2, - Relids qualscope) + Relids qualscope, + Relids nullable_relids) { RestrictInfo *restrictinfo; Expr *clause; @@ -1304,9 +1363,6 @@ build_implied_join_equality(Oid opno, (Expr *) copyObject(item1), (Expr *) copyObject(item2)); - /* Make a copy of qualscope to avoid problems if source EC changes */ - qualscope = bms_copy(qualscope); - /* * Build the RestrictInfo node itself. */ @@ -1315,7 +1371,7 @@ build_implied_join_equality(Oid opno, false, /* outerjoin_delayed */ false, /* pseudoconstant */ qualscope, /* required_relids */ - NULL); /* nullable_relids */ + nullable_relids); /* nullable_relids */ /* Set mergejoinability info always, and hashjoinability if enabled */ check_mergejoinable(restrictinfo); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index b5cde02b8c..fcdcdd0924 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -509,6 +509,7 @@ typedef struct EquivalenceMember Expr *em_expr; /* the expression represented */ Relids em_relids; /* all relids appearing in em_expr */ + Relids em_nullable_relids; /* nullable by lower outer joins */ bool em_is_const; /* expression is pseudoconstant? */ bool em_is_child; /* derived version for a child relation? */ Oid em_datatype; /* the "nominal type" used by the opfamily */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index d6b5fd5865..1865fce490 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -82,12 +82,14 @@ extern void process_implied_equality(PlannerInfo *root, Expr *item1, Expr *item2, Relids qualscope, + Relids nullable_relids, bool below_outer_join, bool both_const); extern RestrictInfo *build_implied_join_equality(Oid opno, Expr *item1, Expr *item2, - Relids qualscope); + Relids qualscope, + Relids nullable_relids); /* * prototypes for plan/setrefs.c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 1f0c4fd5ef..242d31a529 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2129,6 +2129,7 @@ on (x1 = xx1) where (xx2 is not null); -- regression test: check for bug with propagation of implied equality -- to outside an IN -- +analyze tenk1; -- ensure we get consistent plans here select count(*) from tenk1 a where unique1 in (select unique1 from tenk1 b join tenk1 c using (unique1) where b.unique2 = 42); @@ -2348,3 +2349,21 @@ select * from a left join b on i = x and i = y and x = i; (0 rows) rollback; +-- +-- test handling of potential equivalence clauses above outer joins +-- +select q1, unique2, thousand, hundred + from int8_tbl a left join tenk1 b on q1 = unique2 + where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123); + q1 | unique2 | thousand | hundred +----+---------+----------+--------- +(0 rows) + +select f1, unique2, case when unique2 is null then f1 else 0 end + from int4_tbl a left join tenk1 b on f1 = unique2 + where (case when unique2 is null then f1 else 0 end) = 0; + f1 | unique2 | case +----+---------+------ + 0 | 0 | 0 +(1 row) + diff --git a/src/test/regress/expected/join_1.out b/src/test/regress/expected/join_1.out index 26998eca78..8ce77940d4 100644 --- a/src/test/regress/expected/join_1.out +++ b/src/test/regress/expected/join_1.out @@ -2129,6 +2129,7 @@ on (x1 = xx1) where (xx2 is not null); -- regression test: check for bug with propagation of implied equality -- to outside an IN -- +analyze tenk1; -- ensure we get consistent plans here select count(*) from tenk1 a where unique1 in (select unique1 from tenk1 b join tenk1 c using (unique1) where b.unique2 = 42); @@ -2348,3 +2349,21 @@ select * from a left join b on i = x and i = y and x = i; (0 rows) rollback; +-- +-- test handling of potential equivalence clauses above outer joins +-- +select q1, unique2, thousand, hundred + from int8_tbl a left join tenk1 b on q1 = unique2 + where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123); + q1 | unique2 | thousand | hundred +----+---------+----------+--------- +(0 rows) + +select f1, unique2, case when unique2 is null then f1 else 0 end + from int4_tbl a left join tenk1 b on f1 = unique2 + where (case when unique2 is null then f1 else 0 end) = 0; + f1 | unique2 | case +----+---------+------ + 0 | 0 | 0 +(1 row) + diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index de088f6c1c..fad692483a 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -330,6 +330,8 @@ on (x1 = xx1) where (xx2 is not null); -- regression test: check for bug with propagation of implied equality -- to outside an IN -- +analyze tenk1; -- ensure we get consistent plans here + select count(*) from tenk1 a where unique1 in (select unique1 from tenk1 b join tenk1 c using (unique1) where b.unique2 = 42); @@ -512,3 +514,15 @@ create temp table b (x integer, y integer); select * from a left join b on i = x and i = y and x = i; rollback; + +-- +-- test handling of potential equivalence clauses above outer joins +-- + +select q1, unique2, thousand, hundred + from int8_tbl a left join tenk1 b on q1 = unique2 + where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123); + +select f1, unique2, case when unique2 is null then f1 else 0 end + from int4_tbl a left join tenk1 b on f1 = unique2 + where (case when unique2 is null then f1 else 0 end) = 0;