SQLite入门与分析(四)---Page Cache之事务处理(2)
写在前面:个人认为pager层是SQLite实现最为核心的模块,它具有四大功能:I/O,页面缓存,并发控制和日志恢复。而这些功能不仅是上层Btree的基础,而且对系统的性能和健壮性有关至关重要的影响。其中并发控制和日志恢复是事务处理实现的基础。SQLite并发控制的机制非常简单——封锁机制;别外,它的查询优化机制也非常简单——基于索引。这一切使得整个SQLite的实现变得简单,SQLite变得很小,运行速度也非常快,所以,特别适合嵌入式设备。好了,接下来讨论事务的剩余部分。
7、日志文件刷入磁盘(Flushing The Rollback Journal File To Mass Storage)
代码如下: Code /* **Sync日志文件,保证所有的脏页面写入磁盘日志文件 */ static int syncJournal(Pager *pPager){ PgHdr *pPg; int rc = SQLITE_OK; /* Sync the journal before modifying the main database ** (assuming there is a journal and it needs to be synced.) */ if( pPager->needSync ){ if( !pPager->tempFile ){ assert( pPager->journalOpen ); /* assert( !pPager->noSync ); // noSync might be set if synchronous ** was turned off after the transaction was started. Ticket #615 */ #ifndef NDEBUG { /* Make sure the pPager->nRec counter we are keeping agrees ** with the nRec computed from the size of the journal file. */ i64 jSz; rc = sqlite3OsFileSize(pPager->jfd,&jSz); if( rc!=0 ) return rc; assert( pPager->journalOff==jSz ); } #endif { /* Write the nRec value into the journal file header. If in ** full-synchronous mode,sync the journal first. This ensures that ** all data has really hit the disk before nRec is updated to mark ** it as a candidate for rollback. */ if( pPager->fullSync ){ TRACE2("SYNC journal of %dn",PAGERID(pPager)); //首先保证脏页面中所有的数据都已经写入日志文件 rc = sqlite3OsSync(pPager->jfd,0); if( rc!=0 ) return rc; } rc = sqlite3OsSeek(pPager->jfd,pPager->journalHdr + sizeof(aJournalMagic)); if( rc ) return rc; //页面的数目写入日志文件 rc = write32bits(pPager->jfd,pPager->nRec); if( rc ) return rc; rc = sqlite3OsSeek(pPager->jfd,pPager->journalOff); if( rc ) return rc; } TRACE2("SYNC journal of %dn",PAGERID(pPager)); rc = sqlite3OsSync(pPager->jfd,pPager->full_fsync); if( rc!=0 ) return rc; pPager->journalStarted = 1; } pPager->needSync = 0; /* Erase the needSync flag from every page. */ //清除needSync标志位 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ pPg->needSync = 0; } pPager->pFirstSynced = pPager->pFirst; } #ifndef NDEBUG /* If the Pager.needSync flag is clear then the PgHdr.needSync ** flag must also be clear for all pages. Verify that this ** invariant is true. */ else{ for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ assert( pPg->needSync==0 ); } assert( pPager->pFirstSynced==pPager->pFirst ); } #endif return rc; } 8、获取排斥锁(Obtaining An Exclusive Lock)
9、修改的页面写入文件(Writing Changes To The Database File)
以上两步的实现代码: Code /把所有的脏页面写入数据库 //到这里开始获取EXCLUSIVEQ锁,并将页面写回操作系统文件 static int pager_write_pagelist(PgHdr *pList){ Pager *pPager; int rc; if( pList==0 ) return SQLITE_OK; pPager = pList->pPager; /* At this point there may be either a RESERVED or EXCLUSIVE lock on the ** database file. If there is already an EXCLUSIVE lock,the following ** calls to sqlite3OsLock() are no-ops. ** ** Moving the lock from RESERVED to EXCLUSIVE actually involves going ** through an intermediate state PENDING. A PENDING lock prevents new ** readers from attaching to the database but is unsufficient for us to ** write. The idea of a PENDING lock is to prevent new readers from ** coming in while we wait for existing readers to clear. ** ** While the pager is in the RESERVED state,the original database file ** is unchanged and we can rollback without having to playback the ** journal into the original database file. Once we transition to ** EXCLUSIVE,it means the database file has been changed and any rollback ** will require a journal playback. */ //加EXCLUSIVE_LOCK锁 rc = pager_wait_on_lock(pPager,EXCLUSIVE_LOCK); if( rc!=SQLITE_OK ){ return rc; } while( pList ){ assert( pList->dirty ); rc = sqlite3OsSeek(pPager->fd,(pList->pgno-1)*(i64)pPager->pageSize); if( rc ) return rc; /* If there are dirty pages in the page cache with page numbers greater ** than Pager.dbSize,this means sqlite3pager_truncate() was called to ** make the file smaller (presumably by auto-vacuum code). Do not write ** any such pages to the file. */ if( pList->pgno<=pPager->dbSize ){ char *pData = CODEC2(pPager,PGHDR_TO_DATA(pList),pList->pgno,6); TRACE3("STORE %d page %dn",PAGERID(pPager),pList->pgno); //写入文件 rc = sqlite3OsWrite(pPager->fd,pData,pPager->pageSize); TEST_INCR(pPager->nWrite); } #ifndef NDEBUG else{ TRACE3("NOSTORE %d page %dn",pList->pgno); } #endif if( rc ) return rc; //设置dirty pList->dirty = 0; #ifdef SQLITE_CHECK_PAGES pList->pageHash = pager_pagehash(pList); #endif //指向下一个脏页面 pList = pList->pDirty; } return SQLITE_OK; } 10、修改结果刷入存储设备(Flushing Changes To Mass Storage) 最后来看看这几步是如何实现的: 其实以上以上几步是在函数sqlite3BtreeSync()---btree.c中调用的(而关于该函数的调用后面再讲)。 代码如下: Code//同步btree对应的数据库文件 //该函数返回之后,只需要提交写事务,删除日志文件 int sqlite3BtreeSync(Btree *p,const char *zMaster){ int rc = SQLITE_OK; if( p->inTrans==TRANS_WRITE ){ BtShared *pBt = p->pBt; Pgno nTrunc = 0; #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ rc = autoVacuumCommit(pBt,&nTrunc); if( rc!=SQLITE_OK ){ return rc; } } #endif //调用pager进行sync rc = sqlite3pager_sync(pBt->pPager,zMaster,nTrunc); } return rc; } //把pager所有脏页面写回文件 int sqlite3pager_sync(Pager *pPager,const char *zMaster,Pgno nTrunc){ int rc = SQLITE_OK; TRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%dn",pPager->zFilename,nTrunc); /* If this is an in-memory db,or no pages have been written to,or this ** function has already been called,it is a no-op. */ //pager不处于PAGER_SYNCED状态,dirtyCache为1,//则进行sync操作 if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){ PgHdr *pPg; assert( pPager->journalOpen ); /* If a master journal file name has already been written to the ** journal file,then no sync is required. This happens when it is ** written,then the process fails to upgrade from a RESERVED to an ** EXCLUSIVE lock. The next time the process tries to commit the ** transaction the m-j name will have already been written. */ if( !pPager->setMaster ){ //pager修改计数 rc = pager_incr_changecounter(pPager); if( rc!=SQLITE_OK ) goto sync_exit; #ifndef SQLITE_OMIT_AUTOVACUUM if( nTrunc!=0 ){ /* If this transaction has made the database smaller,then all pages ** being discarded by the truncation must be written to the journal ** file. */ Pgno i; void *pPage; int iSkip = PAGER_MJ_PGNO(pPager); for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){ if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){ rc = sqlite3pager_get(pPager,i,&pPage); if( rc!=SQLITE_OK ) goto sync_exit; rc = sqlite3pager_write(pPage); sqlite3pager_unref(pPage); if( rc!=SQLITE_OK ) goto sync_exit; } } } #endif rc = writeMasterJournal(pPager,zMaster); if( rc!=SQLITE_OK ) goto sync_exit; //sync日志文件 rc = syncJournal(pPager); if( rc!=SQLITE_OK ) goto sync_exit; } #ifndef SQLITE_OMIT_AUTOVACUUM if( nTrunc!=0 ){ rc = sqlite3pager_truncate(pPager,nTrunc); if( rc!=SQLITE_OK ) goto sync_exit; } #endif /* Write all dirty pages to the database file */ pPg = pager_get_all_dirty_pages(pPager); //把所有脏页面写回操作系统文件 rc = pager_write_pagelist(pPg); if( rc!=SQLITE_OK ) goto sync_exit; /* Sync the database file. */ //sync数据库文件 if( !pPager->noSync ){ rc = sqlite3OsSync(pPager->fd,0); } pPager->state = PAGER_SYNCED; }else if( MEMDB && nTrunc!=0 ){ rc = sqlite3pager_truncate(pPager,nTrunc); } sync_exit: return rc; } 下图可以进一步解释该过程: (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |