浅谈SQLite——查询处理及优化

　2010-06-25 00:00:00　来源：WEB开发网　　　

核心提示： 函数的参数注释得已经很详细了，不再多说，浅谈SQLite——查询处理及优化(10)，该函数的主要工作就是输出pCost，它包含查询策略信息及相应的代价，通过它，我们能够观察到数据库系统内部的一些东西，核心算法如下：1//遍历其所有索引,找到一个代价最小的索引2for(;pProbe;pIdx=p

函数的参数注释得已经很详细了，不再多说。该函数的主要工作就是输出pCost，它包含查询策略信息及相应的代价。

核心算法如下：

　　1　//遍历其所有索引,找到一个代价最小的索引　　　　2　for(;　pProbe;　pIdx=pProbe=pProbe->pNext){ 　　3　　　　　const　unsigned　int　*　const　aiRowEst　=　pProbe->aiRowEst; 　　4　　　　　double　cost;　　　　　　　　　　　　　　　　/*　Cost　of　using　pProbe　*/ 　　5　　　　　double　nRow;　　　　　　　　　　　　　　　　/*　Estimated　number　of　rows　in　result　set　*/ 　　6　　　　　int　rev;　　　　　　　　　　　　　　　　　　　　/*　True　to　scan　in　reverse　order　*/ 　　7　　　　　int　wsFlags　=　0; 　　8　　　　　Bitmask　used　=　0;　//该表达式使用的表的位码　　9　　10　　　　　int　nEq;　　　　　　　　//可以使用索引的等值表达式的个数　11　　　　　int　bInEst　=　0;　　　　　//如果存在　x　IN　(SELECT...),则设为true 　12　　　　　int　nInMul　=　1;　　　　　　　//处理IN子句　13　　　　　int　nBound　=　100;　　　　//估计需要扫描的表中的元素.　100表示需要扫描整个表.范围条件意味着只需要扫描表的某一部分. 　14　　　　　int　bSort　=　0;　　　　//是否需要排序　15　　　　　int　bLookup　=　0;　　　　//如果对索引中的每个列,需要对应的表进行查询,则为true 　16　　17　　　　　/*　Determine　the　values　of　nEq　and　nInMul　*/ 　18　　　　　//计算nEq和nInMul值　19　　　　　for(nEq=0;　nEq<pProbe->nColumn;　nEq++){ 　20　　　　　　　WhereTerm　*pTerm;　　　　　　　　　　　/*　A　single　term　of　the　WHERE　clause　*/ 　21　　　　　　　int　j　=　pProbe->aiColumn[nEq]; 　22　　　　　　　pTerm　=　findTerm(pWC,　iCur,　j,　notReady,　eqTermMask,　pIdx); 　23　　　　　　　if(　pTerm==0　)　//如果该条件在索引中找不到,则break. 　24　　　　　　　　　　　break; 　25　　　　　　　wsFlags　|=　(WHERE_COLUMN_EQ|WHERE_ROWID_EQ); 　26　　　　　　　if(　pTerm->eOperator　&　WO_IN　){ 　27　　　　　　　　　Expr　*pExpr　=　pTerm->pExpr; 　28　　　　　　　　　wsFlags　|=　WHERE_COLUMN_IN; 　29　　　　　　　　　if(　ExprHasProperty(pExpr,　EP_xIsSelect)　){　//IN　(SELECT...) 　30　　　　　　　　　　　nInMul　*=　25; 　31　　　　　　　　　　　bInEst　=　1; 　32　　　　　　　　　}else　if(　pExpr->x.pList　){ 　33　　　　　　　　　　　nInMul　*=　pExpr->x.pList->nExpr　+　1; 　34　　　　　　　　　} 　35　　　　　　　}else　if(　pTerm->eOperator　&　WO_ISNULL　){ 　36　　　　　　　　　wsFlags　|=　WHERE_COLUMN_NULL; 　37　　　　　　　} 　38　　　　　　　used　|=　pTerm->prereqRight;　//设置该表达式使用的表的位码　39　　　　　} 　40　　41　　　　　//计算nBound值　42　　　　　if(　nEq<pProbe->nColumn　){//考虑不能使用索引的列　43　　　　　　　int　j　=　pProbe->aiColumn[nEq]; 　44　　　　　　　if(　findTerm(pWC,　iCur,　j,　notReady,　WO_LT|WO_LE|WO_GT|WO_GE,　pIdx)　){ 　45　　　　　　　　　WhereTerm　*pTop　=　findTerm(pWC,　iCur,　j,　notReady,　WO_LT|WO_LE,　pIdx); 　46　　　　　　　　　WhereTerm　*pBtm　=　findTerm(pWC,　iCur,　j,　notReady,　WO_GT|WO_GE,　pIdx);//>= 　47　　　　　　　　　　48　　　　　　　　　//估计范围条件的代价　49　　　　　　　　　whereRangeScanEst(pParse,　pProbe,　nEq,　pBtm,　pTop,　&nBound); 　50　　　　　　　　　if(　pTop　){ 　51　　　　　　　　　　　wsFlags　|=　WHERE_TOP_LIMIT; 　52　　　　　　　　　　　used　|=　pTop->prereqRight; 　53　　　　　　　　　} 　54　　　　　　　　　if(　pBtm　){ 　55　　　　　　　　　　　wsFlags　|=　WHERE_BTM_LIMIT; 　56　　　　　　　　　　　used　|=　pBtm->prereqRight; 　57　　　　　　　　　} 　58　　　　　　　　　wsFlags　|=　(WHERE_COLUMN_RANGE|WHERE_ROWID_RANGE); 　59　　　　　　　} 　60　　　　　}else　if(　pProbe->onError!=OE_None　){//所有列都能使用索引　61　　　　　　　if(　(wsFlags　&　(WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0　){ 　62　　　　　　　　　wsFlags　|=　WHERE_UNIQUE; 　63　　　　　　　} 　64　　　　　} 　65　　66　　　　　if(　pOrderBy　){//处理order　by 　67　　　　　　　if(　(wsFlags　&　(WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0 　68　　　　　　　　　&&　isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev) 　69　　　　　　　){ 　70　　　　　　　　　wsFlags　|=　WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; 　71　　　　　　　　　wsFlags　|=　(rev　?　WHERE_REVERSE　:　0); 　72　　　　　　　}else{ 　73　　　　　　　　　bSort　=　1; 　74　　　　　　　} 　75　　　　　} 　76　　77　　　　　if(　pIdx　&&　wsFlags　){ 　78　　　　　　　Bitmask　m　=　pSrc->colUsed;　//m为src使用的列的位图　79　　　　　　　int　j; 　80　　　　　　　for(j=0;　j<pIdx->nColumn;　j++){ 　81　　　　　　　　　int　x　=　pIdx->aiColumn[j]; 　82　　　　　　　　　if(　x<BMS-1　){ 　83　　　　　　　　　　　m　&=　~(((Bitmask)1)<<x);　//将索引中列对应的位清0 　84　　　　　　　　　} 　85　　　　　　　} 　86　　　　　　　if(　m==0　){//如果索引包含src中的所有列,则只需要查询索引即可. 　87　　　　　　　　　wsFlags　|=　WHERE_IDX_ONLY; 　88　　　　　　　}else{ 　89　　　　　　　　　bLookup　=　1;//需要查询原表　90　　　　　　　} 　91　　　　　} 　92　　93　　　　　//估计输出行数,同时考虑IN运算　94　　　　　nRow　=　(double)(aiRowEst[nEq]　*　nInMul); 　95　　　　　if(　bInEst　&&　nRow*2>aiRowEst[0]　){ 　96　　　　　　　nRow　=　aiRowEst[0]/2; 　97　　　　　　　nInMul　=　(int)(nRow　/　aiRowEst[nEq]); 　98　　　　　} 　99　 100　　　　　//代价为输出的行数+二分查找的代价 101　　　　　cost　=　nRow　+　nInMul*estLog(aiRowEst[0]); 102　 103　　　　　//考虑范围条件影响 104　　　　　nRow　=　(nRow　*　(double)nBound)　/　(double)100; 105　　　　　cost　=　(cost　*　(double)nBound)　/　(double)100; 106　 107　　　　　//加上排序的代价:cost　*log　(cost) 108　　　　　if(　bSort　){ 109　　　　　　　cost　+=　cost*estLog(cost); 110　　　　　} 111　 112　　　　　//如果只查询索引,则代价减半 113　　　　　if(　pIdx　&&　bLookup==0　){ 114　　　　　　　cost　/=　(double)2; 115　　　　　} 116　 117　　　　　//如果当前的代价更小 118　　　　　if(　(!pIdx　||　wsFlags)　&&　cost<pCost->rCost　){ 119　　　　　　　pCost->rCost　=　cost;　//代价 120　　　　　　　pCost->nRow　=　nRow;　　//估计扫描的元组数 121　　　　　　　pCost->used　=　used;　//表达式使用的表的位图 122　　　　　　　pCost->plan.wsFlags　=　(wsFlags&wsFlagMask);　//查询策略标志(全表扫描,使用索引进行扫描) 123　　　　　　　pCost->plan.nEq　=　nEq;　//查询策略使用等值表达式个数 124　　　　　　　pCost->plan.u.pIdx　=　pIdx;　//查询策略使用的索引(全表扫描则为NULL) 125　　　　　} 126　 127　　 128　　　　　//如果SQL语句存在INDEXED　BY,则只考虑该索引 129　　　　　if(　pSrc->pIndex　)　break; 130　 131　　　　　/*　Reset　masks　for　the　next　index　in　the　loop　*/ 132　　　　　wsFlagMask　=　~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE); 133　　　　　eqTermMask　=　idxEqTermMask; 134　　　} 135　

可见，SQLite的代价模型非常简单。而通用数据库一般是将基于规则的优化和基于代价的优化结合起来，十分复杂。

后记：

查询优化是关系数据库中最复杂的技术之一，这点我深有感触，对于SQLite这样简单的优化处理，我断断续续也差不多看了一个来月。如果你不是做DB内核开发，你会认为这些东西用处也许不会太大。但是，作为一个DBA，或者经常做数据库应用开发的程序员，如果你不理解数据库系统的执行计划，是不合格的，因为你很难写出高效的SQL语句。SQLite虽然简单，但是，它却五脏俱全。通过它，我们能够观察到数据库系统内部的一些东西，而这些东西是有益处的。　

上一页 5 6 7 8 9 10