[关闭]
@Spongcer 2015-04-10T06:49:50.000000Z 字数 5417 阅读 1610

PostgreSQL-process_duplicate_ors

PG


  1. test=# EXPLAIN SELECT * FROM T1 WHERE (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1) ;
  2. QUERY PLAN
  3. ---------------------------------------------------------
  4. Seq Scan on t1 (cost=0.00..45.40 rows=1 width=16)
  5. Filter: ((a = 1) AND ((b = 1) OR (c = 1) OR (d = 1)))
  6. (2 行记录)
  7. test=#
  1. /*
  2. * process_duplicate_ors
  3. * Given a list of exprs which are ORed together, try to apply
  4. * the inverse OR distributive law.
  5. *
  6. * Returns the resulting expression (could be an AND clause, an OR
  7. * clause, or maybe even a single subexpression).
  8. */
  9. /*
  10. * 假设表T1有四列:A、B、C、D。
  11. *
  12. * 这个函数处理这种情况,对于一个选择,SELECT * FROM TEST
  13. * WHERE (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1);
  14. *
  15. * 语句中的WHERE条件:
  16. * (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)
  17. * 可以改写为:
  18. * (A=1)AND (B=1 OR C=1 OR D=1)
  19. * 这就是这个函数的主要功能。
  20. *
  21. * 这个函数的参数是一个list, 对于上述的WHERE条件,orlist的结构如下:
  22. * orlist中有一个元素,是OR_EXPR类型的BoolExpr,BoolExpr中的结构如下:
  23. * typedef struct BoolExpr
  24. * {
  25. * Expr xpr; = 略
  26. * BoolExprType boolop; = OR_EXPR
  27. * List *args; = OR中的3个条件,即(A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)
  28. * bool plusFlag; = 略
  29. * } BoolExpr;
  30. *
  31. * 下面分析函数的具体实现,大致的步骤为:
  32. * 1)分析每个OR中的公共项, 2)提取公共项, 3)合并剩余项为AND。
  33. */
  34. static Expr *
  35. process_duplicate_ors(List *orlist)
  36. {
  37. List *reference = NIL;
  38. int num_subclauses = 0;
  39. List *winners;
  40. List *neworlist;
  41. ListCell *temp;
  42. if (orlist == NIL)
  43. return NULL; /* probably can't happen */
  44. /* 如果只有一个。。。。,那就算了吧 */
  45. if (list_length(orlist) == 1) /* single-expression OR (can this
  46. * happen?) */
  47. return linitial(orlist);
  48. /*
  49. * Choose the shortest AND clause as the reference list --- obviously, any
  50. * subclause not in this clause isn't in all the clauses. If we find a
  51. * clause that's not an AND, we can treat it as a one-element AND clause,
  52. * which necessarily wins as shortest.
  53. */
  54. /*
  55. * “找最短”。
  56. * 在WHERE语句中:
  57. * (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)
  58. * OR操作串联了3个子语句,找到其中最短的一个,因为如果有公共项,那么最短的那个也一定
  59. * 包含公共项,那么通过找到最短的那个,在后面的操作里能减少 比较 的次数。
  60. * 在上面的WHERE语句中,3个子语句的长度相同,按照如下执行过程,找到的应该是(A=1 AND B=1),
  61. * 即第一个。
  62. */
  63. foreach(temp, orlist)
  64. {
  65. Expr *clause = (Expr *) lfirst(temp);
  66. if (and_clause((Node *) clause))
  67. {
  68. List *subclauses = ((BoolExpr *) clause)->args;
  69. int nclauses = list_length(subclauses);
  70. /*
  71. * 这里判断子语句里的长度,比如对于(A=1 AND B=1)子语句,
  72. * 他实际上是一个AND连接起来的两个 子子语句, 那么他的长度就是2。
  73. *
  74. * 通过nclauses记录最短的子语句,如果有更短的(nclauses < num_subclauses),
  75. * 那么就替换成最短的。
  76. */
  77. if (reference == NIL || nclauses < num_subclauses)
  78. {
  79. reference = subclauses;
  80. num_subclauses = nclauses;
  81. }
  82. }
  83. else
  84. {
  85. /*
  86. * 还有一种情况, 就是可能子句不是一个AND语句,这样看上去不大符合规则,
  87. * 那么把他看做一个整体,那这个就是最短元素。
  88. *
  89. ******************************
  90. * 如果代码执行到这里,那么只有两种情况:
  91. * 一种是 ... WHERE (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1)。
  92. * 一种是 ... WHERE ((A=1 OR C=1) AND B=1) OR (A=1 OR C=1).
  93. * 如果是这两种情况,都可以做如下简化:
  94. * 第一种情况简化为 A=1
  95. * 第二种情况化简为 (A=1 OR C=1)
  96. *
  97. * 第三种情况待补充...
  98. */
  99. reference = list_make1(clause);
  100. break;
  101. }
  102. }
  103. /*
  104. * Just in case, eliminate any duplicates in the reference list.
  105. */
  106. /* 找到最短的, 存到List */
  107. reference = list_union(NIL, reference);
  108. /*
  109. * Check each element of the reference list to see if it's in all the OR
  110. * clauses. Build a new list of winning clauses.
  111. */
  112. /*
  113. * “找公共项”。
  114. *
  115. * NOTE:这时候就能体现“找最短”带来的优势,外层循环次数会少一些。
  116. *
  117. * 如果WHERE语句是:
  118. * (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)
  119. * “找最短”中找到的一定是(A=1 AND B=1)。
  120. * 则外层会有两次循环...(foreach(temp, reference)),两次循环的变量分别为
  121. * A=1 和 B=1。
  122. * 内层有三次循环...(foreach(temp2, orlist)),三次循环的变量分别为
  123. * (A=1 AND B=1) 和 (A=1 AND C=1) 和 (A=1 AND D=1)
  124. *
  125. * 示例如下:
  126. * 假如现在外层循环第一次执行,即查找A=1的公共项,进而假如内层循环也是第一次执行,
  127. * 即在(A=1 AND B=1)中查找是否存在A=1这个公共项,发现是存在的(list_member),
  128. * 则依次判断内层循环的第二个子句...
  129. *
  130. * 如上例,具体来说,这些循环分别作的操作是:
  131. * 外层第一次:
  132. * 判断A=1是否在(A=1 AND B=1),在,判断下一个
  133. * 判断A=1是否在(A=1 AND C=1),在,判断下一个
  134. * 判断A=1是否在(A=1 AND D=1),在,A=1是公共项,记录(winners = lappend...)
  135. * 外层第二次:
  136. * 判断B=1是否在(A=1 AND B=1),在,判断下一个
  137. * 判断B=1是否在(A=1 AND C=1),不在,跳出循环,下一个不用判断了。
  138. * 判断B=1是否在(A=1 AND D=1),未执行,因为上一个不含公共项,就不可能提取了。
  139. */
  140. winners = NIL;
  141. foreach(temp, reference)
  142. {
  143. Expr *refclause = (Expr *) lfirst(temp);
  144. bool win = true;
  145. ListCell *temp2;
  146. foreach(temp2, orlist)
  147. {
  148. Expr *clause = (Expr *) lfirst(temp2);
  149. if (and_clause((Node *) clause))
  150. {
  151. if (!list_member(((BoolExpr *) clause)->args, refclause))
  152. {
  153. win = false;
  154. break;
  155. }
  156. }
  157. else
  158. {
  159. if (!equal(refclause, clause))
  160. {
  161. win = false;
  162. break;
  163. }
  164. }
  165. }
  166. if (win)
  167. winners = lappend(winners, refclause);
  168. }
  169. /*
  170. * If no winners, we can't transform the OR
  171. */
  172. if (winners == NIL)
  173. return make_orclause(orlist);
  174. /*
  175. * Generate new OR list consisting of the remaining sub-clauses.
  176. *
  177. * If any clause degenerates to empty, then we have a situation like (A
  178. * AND B) OR (A), which can be reduced to just A --- that is, the
  179. * additional conditions in other arms of the OR are irrelevant.
  180. *
  181. * Note that because we use list_difference, any multiple occurrences of a
  182. * winning clause in an AND sub-clause will be removed automatically.
  183. */
  184. /*
  185. * “提取公共项”。
  186. * 用list_difference删除公共项,实现细节不在赘述。
  187. */
  188. neworlist = NIL;
  189. foreach(temp, orlist)
  190. {
  191. Expr *clause = (Expr *) lfirst(temp);
  192. if (and_clause((Node *) clause))
  193. {
  194. List *subclauses = ((BoolExpr *) clause)->args;
  195. /* 看这里...看这里..., 消除公共项 */
  196. subclauses = list_difference(subclauses, winners);
  197. if (subclauses != NIL)
  198. {
  199. /* 消除后,剩余的拼接起来,拼接成:(B=1 OR C=1 OR D=1)*/
  200. if (list_length(subclauses) == 1)
  201. neworlist = lappend(neworlist, linitial(subclauses));
  202. else
  203. neworlist = lappend(neworlist, make_andclause(subclauses));
  204. }
  205. else
  206. {
  207. /*
  208. * 这说明子语句中,有一个全部是公共项,也就是如下形式:
  209. * ... WHERE (A=1 AND B=1) OR (A=1)
  210. *
  211. * 这时候公共项是A=1,第一个子句是(A=1 AND B=1),第二个子句是(A=1),
  212. * 第二个子句经过list_difference,返回的结果是NULL。
  213. * 对于这种情况,实际上可以化简为:A=1,因为(A=1 AND B=1)一定满足A=1的情况。
  214. */
  215. neworlist = NIL; /* degenerate case, see above */
  216. break;
  217. }
  218. }
  219. else
  220. {
  221. if (!list_member(winners, clause))
  222. neworlist = lappend(neworlist, clause);
  223. else
  224. {
  225. neworlist = NIL; /* degenerate case, see above */
  226. break;
  227. }
  228. }
  229. }
  230. /*
  231. * Append reduced OR to the winners list, if it's not degenerate, handling
  232. * the special case of one element correctly (can that really happen?).
  233. * Also be careful to maintain AND/OR flatness in case we pulled up a
  234. * sub-sub-OR-clause.
  235. */
  236. if (neworlist != NIL)
  237. {
  238. if (list_length(neworlist) == 1)
  239. winners = lappend(winners, linitial(neworlist));
  240. else
  241. /*neworlist里面应该是(B=1 OR C=1 OR D=1),所以用make_orclause */
  242. winners = lappend(winners, make_orclause(pull_ors(neworlist)));
  243. }
  244. /*
  245. * And return the constructed AND clause, again being wary of a single
  246. * element and AND/OR flatness.
  247. */
  248. if (list_length(winners) == 1)
  249. return (Expr *) linitial(winners);
  250. else
  251. /* 返回的形式是:(A=1)AND (B=1 OR C=1 OR D=1),所以会用make_andclause */
  252. return make_andclause(pull_ands(winners));
  253. }
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注