SQLite入门与分析(四)---Page Cache之事务处理(1)
写在前面:从本章开始,将对SQLite的每个模块进行讨论。讨论的顺序按照我阅读SQLite的顺序来进行,由于项目的需要,以及时间关系,不能给出一个完整的计划,但是我会先讨论我认为比较重要的内容。本节讨论SQLite的事务处理技术,事务处理是DBMS中最关键的技术,对SQLite也一样,它涉及到并发控制,以及故障恢复,由于内容较多,分为两节。好了,下面进入正题。 本节通过一个具体的例子来分析SQLite原子提交的实现(基于Version 3.3.6的代码)。 16|TableLock|0|2|1|episodes|00| 17|Goto|0|2|0||00|
1、初始状态(Initial State)
2、获取读锁(Acquiring A Read Lock)
3、读取数据 4、获取Reserved Lock 5、创建恢复日志(Creating A Rollback Journal File) 上面 5步的代码的实现: Code/事务指令的实现 //p1为数据库文件的索引号---0为main database;1为temporary tables使用的文件 //p2 不为0,一个写事务开始 case OP_Transaction: { //数据库的索引号 int i = pOp->p1; //指向数据库对应的btree Btree *pBt; assert( i>=0 && i<db->nDb ); assert( (p->btreeMask & (1<<i))!=0 ); //设置btree指针 pBt = db->aDb[i].pBt; if( pBt ){ //从这里btree开始事务,主要给文件加锁,并设置btree事务状态 rc = sqlite3BtreeBeginTrans(pBt,pOp->p2); if( rc==SQLITE_BUSY ){ p->pc = pc; p->rc = rc = SQLITE_BUSY; goto vdbe_return; } if( rc!=SQLITE_OK && rc!=SQLITE_READONLY /* && rc!=SQLITE_BUSY */ ){ goto abort_due_to_error; } } break; } //开始一个事务,如果第二个参数不为0,则一个写事务开始,否则是一个读事务 //如果wrflag>=2,一个exclusive事务开始,此时别的连接不能访问数据库 int sqlite3BtreeBeginTrans(Btree *p,int wrflag){ BtShared *pBt = p->pBt; int rc = SQLITE_OK; btreeIntegrity(p); /* If the btree is already in a write-transaction,or it ** is already in a read-transaction and a read-transaction ** is requested,this is a no-op. */ //如果b-tree处于一个写事务;或者处于一个读事务,一个读事务又请求,则返回SQLITE_OK if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ return SQLITE_OK; } /* Write transactions are not possible on a read-only database */ //写事务不能访问只读数据库 if( pBt->readOnly && wrflag ){ return SQLITE_READONLY; } /* If another database handle has already opened a write transaction ** on this shared-btree structure and a second write transaction is ** requested,return SQLITE_BUSY. */ //如果数据库已存在一个写事务,则该写事务请求时返回SQLITE_BUSY if( pBt->inTransaction==TRANS_WRITE && wrflag ){ return SQLITE_BUSY; } do { //如果数据库对应btree的第一个页面还没读进内存 //则把该页面读进内存,数据库也相应的加read lock if( pBt->pPage1==0 ){ //加read lock,并读页面到内存 rc = lockBtree(pBt); } if( rc==SQLITE_OK && wrflag ){ //对数据库文件加RESERVED_LOCK锁 rc = sqlite3pager_begin(pBt->pPage1->aData,wrflag>1); if( rc==SQLITE_OK ){ rc = newDatabase(pBt); } } if( rc==SQLITE_OK ){ if( wrflag ) pBt->inStmt = 0; }else{ unlockBtreeIfUnused(pBt); } }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && sqlite3InvokeBusyHandler(pBt->pBusyHandler) ); if( rc==SQLITE_OK ){ if( p->inTrans==TRANS_NONE ){ //btree的事务数加1 pBt->nTransaction++; } //设置btree事务状态 p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); if( p->inTrans>pBt->inTransaction ){ pBt->inTransaction = p->inTrans; } } btreeIntegrity(p); return rc; } /* **获取数据库的写锁,发生以下情况时去除写锁: ** * sqlite3pager_commit() is called. ** * sqlite3pager_rollback() is called. ** * sqlite3pager_close() is called. ** * sqlite3pager_unref() is called to on every outstanding page. ** pData指向数据库的打开的页面,此时并不修改,仅仅只是获取 ** 相应的pager,检查它是否处于read-lock状态。 **如果打开的不是临时文件,则打开日志文件. **如果数据库已经处于写状态,则do nothing */ int sqlite3pager_begin(void *pData,int exFlag){ PgHdr *pPg = DATA_TO_PGHDR(pData); Pager *pPager = pPg->pPager; int rc = SQLITE_OK; assert( pPg->nRef>0 ); assert( pPager->state!=PAGER_UNLOCK ); //pager已经处于share状态 if( pPager->state==PAGER_SHARED ){ assert( pPager->aInJournal==0 ); if( MEMDB ){ pPager->state = PAGER_EXCLUSIVE; pPager->origDbSize = pPager->dbSize; }else{ //对文件加 RESERVED_LOCK rc = sqlite3OsLock(pPager->fd,RESERVED_LOCK); if( rc==SQLITE_OK ){ //设置pager的状态 pPager->state = PAGER_RESERVED; if( exFlag ){ rc = pager_wait_on_lock(pPager,EXCLUSIVE_LOCK); } } if( rc!=SQLITE_OK ){ return rc; } pPager->dirtyCache = 0; TRACE2("TRANSACTION %dn",PAGERID(pPager)); //使用日志,不是临时文件,则打开日志文件 if( pPager->useJournal && !pPager->tempFile ){ //为pager打开日志文件,pager应该处于RESERVED或EXCLUSIVE状态 //会向日志文件写入header rc = pager_open_journal(pPager); } } } return rc; } //创建日志文件,pager应该处于RESERVED或EXCLUSIVE状态 static int pager_open_journal(Pager *pPager){ int rc; assert( !MEMDB ); assert( pPager->state>=PAGER_RESERVED ); assert( pPager->journalOpen==0 ); assert( pPager->useJournal ); assert( pPager->aInJournal==0 ); sqlite3pager_pagecount(pPager); //日志文件页面位图 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 ); if( pPager->aInJournal==0 ){ rc = SQLITE_NOMEM; goto failed_to_open_journal; } //打开日志文件 rc = sqlite3OsOpenExclusive(pPager->zJournal,&pPager->jfd,pPager->tempFile); //日志文件的位置指针 pPager->journalOff = 0; pPager->setMaster = 0; pPager->journalHdr = 0; if( rc!=SQLITE_OK ){ goto failed_to_open_journal; } /*一般来说,os此时创建的文件位于磁盘缓存,并没有实际 **存在于磁盘,下面三个操作就是为了把结果写入磁盘,而对于 **windows系统来说,并没有提供相应API,所以实际上没有意义. */ //fullSync操作对windows没有意义 sqlite3OsSetFullSync(pPager->jfd,pPager->full_fsync); sqlite3OsSetFullSync(pPager->fd,pPager->full_fsync); /* Attempt to open a file descriptor for the directory that contains a file. **This file descriptor can be used to fsync() the directory **in order to make sure the creation of a new file is actually written to disk. */ sqlite3OsOpenDirectory(pPager->jfd,pPager->zDirectory); pPager->journalOpen = 1; pPager->journalStarted = 0; pPager->needSync = 0; pPager->alwaysRollback = 0; pPager->nRec = 0; if( pPager->errCode ){ rc = pPager->errCode; goto failed_to_open_journal; } pPager->origDbSize = pPager->dbSize; //写入日志文件的header---24个字节 rc = writeJournalHdr(pPager); if( pPager->stmtAutoopen && rc==SQLITE_OK ){ rc = sqlite3pager_stmt_begin(pPager); } if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ rc = pager_unwritelock(pPager); if( rc==SQLITE_OK ){ rc = SQLITE_FULL; } } return rc; failed_to_open_journal: sqliteFree(pPager->aInJournal); pPager->aInJournal = 0; if( rc==SQLITE_NOMEM ){ /* If this was a malloc() failure,then we will not be closing the pager ** file. So delete any journal file we may have just created. Otherwise,** the system will get confused,we have a read-lock on the file and a ** mysterious journal has appeared in the filesystem. */ sqlite3OsDelete(pPager->zJournal); }else{ sqlite3OsUnlock(pPager->fd,NO_LOCK); pPager->state = PAGER_UNLOCK; } return rc; } /*写入日志文件头 **journal header的格式如下: ** - 8 bytes: 标志日志文件的魔数 ** - 4 bytes: 日志文件中记录数 ** - 4 bytes: Random number used for page hash. ** - 4 bytes: 原来数据库的大小(kb) ** - 4 bytes: 扇区大小512byte */ static int writeJournalHdr(Pager *pPager){ //日志文件头 char zHeader[sizeof(aJournalMagic)+16]; int rc = seekJournalHdr(pPager); if( rc ) return rc; pPager->journalHdr = pPager->journalOff; if( pPager->stmtHdrOff==0 ){ pPager->stmtHdrOff = pPager->journalHdr; } //设置文件指针指向header之后 pPager->journalOff += JOURNAL_HDR_SZ(pPager); /* FIX ME: ** ** Possibly for a pager not in no-sync mode,the journal magic should not ** be written until nRec is filled in as part of next syncJournal(). ** ** Actually maybe the whole journal header should be delayed until that ** point. Think about this. */ memcpy(zHeader,aJournalMagic,sizeof(aJournalMagic)); /* The nRec Field. 0xFFFFFFFF for no-sync journals. */ put32bits(&zHeader[sizeof(aJournalMagic)],pPager->noSync ? 0xffffffff : 0); /* The random check-hash initialiser */ sqlite3Randomness(sizeof(pPager->cksumInit),&pPager->cksumInit); put32bits(&zHeader[sizeof(aJournalMagic)+4],pPager->cksumInit); /* The initial database size */ put32bits(&zHeader[sizeof(aJournalMagic)+8],pPager->dbSize); /* The assumed sector size for this process */ put32bits(&zHeader[sizeof(aJournalMagic)+12],pPager->sectorSize); //写入文件头 rc = sqlite3OsWrite(pPager->jfd,zHeader,sizeof(zHeader)); /* The journal header has been written successfully. Seek the journal ** file descriptor to the end of the journal header sector. */ if( rc==SQLITE_OK ){ rc = sqlite3OsSeek(pPager->jfd,pPager->journalOff-1); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->jfd," 00",1); } } return rc; } 其实现过程如下图所示: (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |