Patches for Berkeley DB version 4.1.25

  1. Applications, using Berkeley DB's Concurrent Data Store product with the DB_CDB_ALLDB flag set, that open databases while also holding open cursors could hang.
  2. Apply the following patch to the db-4.1.25 release.  
    *** fileops/fop_util.c	8 Jan 2003 05:01:56 -0000	1.57
    --- fileops/fop_util.c	12 Jan 2003 19:44:29 -0000	1.58
    ***************
    *** 40,46 ****
      	u_int32_t __lockval;						\
      									\
      	if (LOCKING_ON((ENV))) {					\
    ! 		__lockval = 0;						\
      		__dbt.data = &__lockval;				\
      		__dbt.size = sizeof(__lockval);				\
      		if ((ret = (ENV)->lock_get((ENV), (ID),			\
    --- 40,46 ----
      	u_int32_t __lockval;						\
      									\
      	if (LOCKING_ON((ENV))) {					\
    ! 		__lockval = 1;						\
      		__dbt.data = &__lockval;				\
      		__dbt.size = sizeof(__lockval);				\
      		if ((ret = (ENV)->lock_get((ENV), (ID),			\
    

  3. The following patch addresses the following issues:
    • Applications with largely dirty caches could see performance problems in the cache allocation code.
    • Environment recovery could fail after the failure of a database open.
    • Catastrophic environment recovery could fail after a normal recovery performed when sections of the database environment log only contained database open/close pairs.
  4. Apply the following patch to the db-4.1.25 release.  
    *** dbinc/mp.h.orig	2004-02-02 10:24:53.000000000 -0800
    --- dbinc/mp.h	2004-02-02 10:26:27.000000000 -0800
    ***************
    *** 149,154 ****
    --- 149,161 ----
      	 * region lock).
      	 */
      	DB_MPOOL_STAT stat;		/* Per-cache mpool statistics. */
    +  
    + 	 /*
    + 	  * We track page puts so that we can decide when allocation is never
    + 	  * going to succeed.  We don't lock the field, all we care about is
    + 	  * if it changes.
    + 	  */
    + 	 u_int32_t  put_counter;                /* Count of page put calls. */
      };
      
      struct __db_mpool_hash {
    *** mp/mp_fput.c.orig	2002-08-13 06:26:41.000000000 -0700
    --- mp/mp_fput.c	2004-02-02 10:22:35.000000000 -0800
    ***************
    *** 19,24 ****
    --- 19,26 ----
      #include "dbinc/db_shash.h"
      #include "dbinc/mp.h"
      
    + static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
    + 
      /*
       * __memp_fput --
       *	Mpool file put function.
    ***************
    *** 198,202 ****
    --- 200,255 ----
      
      	MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
      
    + 	/*
    + 	 * On every buffer put we update the buffer generation number and check
    + 	 * for wraparound.
    + 	 */
    + 	if (++c_mp->lru_count == UINT32_T_MAX)
    + 		__memp_reset_lru(dbenv, dbmp->reginfo);
    + 
      	return (0);
      }
    + 
    + /*
    +  * __memp_reset_lru --
    +  *	Reset the cache LRU counter.
    +  */
    + static void
    + __memp_reset_lru(dbenv, memreg)
    + 	DB_ENV *dbenv;
    + 	REGINFO *memreg;
    + {
    + 	BH *bhp;
    + 	DB_MPOOL_HASH *hp;
    + 	MPOOL *c_mp;
    + 	int bucket;
    + 
    + 	c_mp = memreg->primary;
    + 
    + 	/*
    + 	 * Update the counter so all future allocations will start at the
    + 	 * bottom.
    + 	 */
    + 	c_mp->lru_count -= MPOOL_BASE_DECREMENT;
    + 
    + 	/* Adjust the priority of every buffer in the system. */
    + 	for (hp = R_ADDR(memreg, c_mp->htab),
    + 	    bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
    + 		/*
    + 		 * Skip empty buckets.
    + 		 *
    + 		 * We can check for empty buckets before locking as we
    + 		 * only care if the pointer is zero or non-zero.
    + 		 */
    + 		if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
    + 			continue;
    + 
    + 		MUTEX_LOCK(dbenv, &hp->hash_mutex);
    + 		for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
    + 		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
    + 			if (bhp->priority != UINT32_T_MAX &&
    + 			    bhp->priority > MPOOL_BASE_DECREMENT)
    + 				bhp->priority -= MPOOL_BASE_DECREMENT;
    + 		MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
    + 	}
    + }
    *** mp/mp_alloc.c.orig	2002-08-17 07:23:25.000000000 -0700
    --- mp/mp_alloc.c	2004-02-02 10:28:15.000000000 -0800
    ***************
    *** 25,31 ****
      } HS;
      
      static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
    - static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
      
      /*
       * __memp_alloc --
    --- 25,30 ----
    ***************
    *** 50,57 ****
      	MPOOL *c_mp;
      	MPOOLFILE *bh_mfp;
      	size_t freed_space;
    ! 	u_int32_t buckets, buffers, high_priority, max_na, priority;
    ! 	int aggressive, ret;
      	void *p;
      
      	dbenv = dbmp->dbenv;
    --- 49,57 ----
      	MPOOL *c_mp;
      	MPOOLFILE *bh_mfp;
      	size_t freed_space;
    ! 	u_int32_t buckets, buffers, high_priority, priority, put_counter;
    ! 	u_int32_t total_buckets;
    ! 	int aggressive, giveup, ret;
      	void *p;
      
      	dbenv = dbmp->dbenv;
    ***************
    *** 59,76 ****
      	dbht = R_ADDR(memreg, c_mp->htab);
      	hp_end = &dbht[c_mp->htab_buckets];
      
    ! 	buckets = buffers = 0;
    ! 	aggressive = 0;
      
      	c_mp->stat.st_alloc++;
      
      	/*
    - 	 * Get aggressive if we've tried to flush the number of pages as are
    - 	 * in the system without finding space.
    - 	 */
    - 	max_na = 5 * c_mp->htab_buckets;
    - 
    - 	/*
      	 * If we're allocating a buffer, and the one we're discarding is the
      	 * same size, we don't want to waste the time to re-integrate it into
      	 * the shared memory free list.  If the DB_MPOOLFILE argument isn't
    --- 59,71 ----
      	dbht = R_ADDR(memreg, c_mp->htab);
      	hp_end = &dbht[c_mp->htab_buckets];
      
    ! 	buckets = buffers = put_counter = total_buckets = 0;
    ! 	aggressive = giveup = 0;
    ! 	hp_tmp = NULL;
      
      	c_mp->stat.st_alloc++;
      
      	/*
      	 * If we're allocating a buffer, and the one we're discarding is the
      	 * same size, we don't want to waste the time to re-integrate it into
      	 * the shared memory free list.  If the DB_MPOOLFILE argument isn't
    ***************
    *** 81,99 ****
      		len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
      
      	R_LOCK(dbenv, memreg);
    - 
    - 	/*
    - 	 * On every buffer allocation we update the buffer generation number
    - 	 * and check for wraparound.
    - 	 */
    - 	if (++c_mp->lru_count == UINT32_T_MAX)
    - 		__memp_reset_lru(dbenv, memreg, c_mp);
    - 
      	/*
      	 * Anything newer than 1/10th of the buffer pool is ignored during
      	 * allocation (unless allocation starts failing).
      	 */
    - 	DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
      	high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
      
      	/*
    --- 76,85 ----
    ***************
    *** 120,129 ****
      		 * We're not holding the region locked here, these statistics
      		 * can't be trusted.
      		 */
    ! 		if (buckets != 0) {
    ! 			if (buckets > c_mp->stat.st_alloc_max_buckets)
    ! 				c_mp->stat.st_alloc_max_buckets = buckets;
    ! 			c_mp->stat.st_alloc_buckets += buckets;
      		}
      		if (buffers != 0) {
      			if (buffers > c_mp->stat.st_alloc_max_pages)
    --- 106,116 ----
      		 * We're not holding the region locked here, these statistics
      		 * can't be trusted.
      		 */
    ! 		total_buckets += buckets;
    ! 		if (total_buckets != 0) {
    ! 			if (total_buckets > c_mp->stat.st_alloc_max_buckets)
    ! 				c_mp->stat.st_alloc_max_buckets = total_buckets;
    ! 			c_mp->stat.st_alloc_buckets += total_buckets;
      		}
      		if (buffers != 0) {
      			if (buffers > c_mp->stat.st_alloc_max_pages)
    ***************
    *** 131,136 ****
    --- 118,129 ----
      			c_mp->stat.st_alloc_pages += buffers;
      		}
      		return (0);
    + 	} else if (giveup || c_mp->stat.st_pages == 0) {
    + 		R_UNLOCK(dbenv, memreg);
    + 
    + 		__db_err(dbenv,
    + 		    "unable to allocate space from the buffer cache");
    + 		return (ret);
      	}
      
      	/*
    ***************
    *** 138,163 ****
      	 * we need.  Reset our free-space counter.
      	 */
      	freed_space = 0;
      
      	/*
      	 * Walk the hash buckets and find the next two with potentially useful
      	 * buffers.  Free the buffer with the lowest priority from the buckets'
      	 * chains.
      	 */
    ! 	for (hp_tmp = NULL;;) {
      		/* Check for wrap around. */
      		hp = &dbht[c_mp->last_checked++];
      		if (hp >= hp_end) {
      			c_mp->last_checked = 0;
    ! 
    ! 			/*
    ! 			 * If we've gone through all of the hash buckets, try
    ! 			 * an allocation.  If the cache is small, the old page
    ! 			 * size is small, and the new page size is large, we
    ! 			 * might have freed enough memory (but not 3 times the
    ! 			 * memory).
    ! 			 */
    ! 			goto alloc;
      		}
      
      		/*
    --- 131,154 ----
      	 * we need.  Reset our free-space counter.
      	 */
      	freed_space = 0;
    + 	total_buckets += buckets;
    + 	buckets = 0;
      
      	/*
      	 * Walk the hash buckets and find the next two with potentially useful
      	 * buffers.  Free the buffer with the lowest priority from the buckets'
      	 * chains.
      	 */
    ! 	for (;;) {
    ! 		/* All pages have been freed, make one last try */
    ! 		if (c_mp->stat.st_pages == 0)
    ! 			goto alloc;
    ! 
      		/* Check for wrap around. */
      		hp = &dbht[c_mp->last_checked++];
      		if (hp >= hp_end) {
      			c_mp->last_checked = 0;
    ! 			hp = &dbht[c_mp->last_checked++];
      		}
      
      		/*
    ***************
    *** 172,210 ****
      		/*
      		 * The failure mode is when there are too many buffers we can't
      		 * write or there's not enough memory in the system.  We don't
    ! 		 * have a metric for deciding if allocation has no possible way
    ! 		 * to succeed, so we don't ever fail, we assume memory will be
    ! 		 * available if we wait long enough.
      		 *
    ! 		 * Get aggressive if we've tried to flush 5 times the number of
    ! 		 * hash buckets as are in the system -- it's possible we have
    ! 		 * been repeatedly trying to flush the same buffers, although
    ! 		 * it's unlikely.  Aggressive means:
      		 *
      		 * a: set a flag to attempt to flush high priority buffers as
      		 *    well as other buffers.
      		 * b: sync the mpool to force out queue extent pages.  While we
      		 *    might not have enough space for what we want and flushing
      		 *    is expensive, why not?
    ! 		 * c: sleep for a second -- hopefully someone else will run and
    ! 		 *    free up some memory.  Try to allocate memory too, in case
    ! 		 *    the other thread returns its memory to the region.
    ! 		 * d: look at a buffer in every hash bucket rather than choose
      		 *    the more preferable of two.
      		 *
      		 * !!!
      		 * This test ignores pathological cases like no buffers in the
      		 * system -- that shouldn't be possible.
      		 */
    ! 		if ((++buckets % max_na) == 0) {
    ! 			aggressive = 1;
    ! 
      			R_UNLOCK(dbenv, memreg);
      
    ! 			(void)__memp_sync_int(
    ! 			    dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
    ! 
    ! 			(void)__os_sleep(dbenv, 1, 0);
      
      			R_LOCK(dbenv, memreg);
      			goto alloc;
    --- 163,221 ----
      		/*
      		 * The failure mode is when there are too many buffers we can't
      		 * write or there's not enough memory in the system.  We don't
    ! 		 * have a way to know that allocation has no way to succeed.
    ! 		 * We fail if there were no pages returned to the cache after
    ! 		 * we've been trying for a relatively long time.
      		 *
    ! 		 * Get aggressive if we've tried to flush the number of hash
    ! 		 * buckets as are in the system and have not found any more
    ! 		 * space.  Aggressive means:
      		 *
      		 * a: set a flag to attempt to flush high priority buffers as
      		 *    well as other buffers.
      		 * b: sync the mpool to force out queue extent pages.  While we
      		 *    might not have enough space for what we want and flushing
      		 *    is expensive, why not?
    ! 		 * c: look at a buffer in every hash bucket rather than choose
      		 *    the more preferable of two.
    + 		 * d: start to think about giving up.
    + 		 *
    + 		 * If we get here twice, sleep for a second, hopefully someone
    + 		 * else will run and free up some memory.
    + 		 *
    + 		 * Always try to allocate memory too, in case some other thread
    + 		 * returns its memory to the region.
      		 *
      		 * !!!
      		 * This test ignores pathological cases like no buffers in the
      		 * system -- that shouldn't be possible.
      		 */
    ! 		if ((++buckets % c_mp->htab_buckets) == 0) {
    ! 			if (freed_space > 0)
    ! 				goto alloc;
      			R_UNLOCK(dbenv, memreg);
      
    ! 			switch (++aggressive) {
    ! 			case 1:
    ! 				break;
    ! 			case 2:
    ! 				put_counter = c_mp->put_counter;
    ! 				/* FALLTHROUGH */
    ! 			case 3:
    ! 			case 4:
    ! 			case 5:
    ! 			case 6:
    ! 				(void)__memp_sync_int(
    ! 				    dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
    ! 
    ! 				(void)__os_sleep(dbenv, 1, 0);
    ! 				break;
    ! 			default:
    ! 				aggressive = 1;
    ! 				if (put_counter == c_mp->put_counter)
    ! 					giveup = 1;
    ! 				break;
    ! 			}
      
      			R_LOCK(dbenv, memreg);
      			goto alloc;
    ***************
    *** 277,283 ****
      		 * thread may have acquired this buffer and incremented the ref
      		 * count after we wrote it, in which case we can't have it.
      		 *
    ! 		 * If there's a write error, avoid selecting this buffer again
      		 * by making it the bucket's least-desirable buffer.
      		 */
      		if (ret != 0 || bhp->ref != 0) {
    --- 288,295 ----
      		 * thread may have acquired this buffer and incremented the ref
      		 * count after we wrote it, in which case we can't have it.
      		 *
    ! 		 * If there's a write error and we're having problems finding
    ! 		 * something to allocate, avoid selecting this buffer again
      		 * by making it the bucket's least-desirable buffer.
      		 */
      		if (ret != 0 || bhp->ref != 0) {
    ***************
    *** 301,306 ****
    --- 313,320 ----
      
      		freed_space += __db_shsizeof(bhp);
      		__memp_bhfree(dbmp, hp, bhp, 1);
    + 		if (aggressive > 1)
    + 			aggressive = 1;
      
      		/*
      		 * Unlock this hash bucket and re-acquire the region lock. If
    ***************
    *** 362,415 ****
      	hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
      }
      
    - /*
    -  * __memp_reset_lru --
    -  *	Reset the cache LRU counter.
    -  */
    - static void
    - __memp_reset_lru(dbenv, memreg, c_mp)
    - 	DB_ENV *dbenv;
    - 	REGINFO *memreg;
    - 	MPOOL *c_mp;
    - {
    - 	BH *bhp;
    - 	DB_MPOOL_HASH *hp;
    - 	int bucket;
    - 
    - 	/*
    - 	 * Update the counter so all future allocations will start at the
    - 	 * bottom.
    - 	 */
    - 	c_mp->lru_count -= MPOOL_BASE_DECREMENT;
    - 
    - 	/* Release the region lock. */
    - 	R_UNLOCK(dbenv, memreg);
    - 
    - 	/* Adjust the priority of every buffer in the system. */
    - 	for (hp = R_ADDR(memreg, c_mp->htab),
    - 	    bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
    - 		/*
    - 		 * Skip empty buckets.
    - 		 *
    - 		 * We can check for empty buckets before locking as we
    - 		 * only care if the pointer is zero or non-zero.
    - 		 */
    - 		if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
    - 			continue;
    - 
    - 		MUTEX_LOCK(dbenv, &hp->hash_mutex);
    - 		for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
    - 		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
    - 			if (bhp->priority != UINT32_T_MAX &&
    - 			    bhp->priority > MPOOL_BASE_DECREMENT)
    - 				bhp->priority -= MPOOL_BASE_DECREMENT;
    - 		MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
    - 	}
    - 
    - 	/* Reacquire the region lock. */
    - 	R_LOCK(dbenv, memreg);
    - }
    - 
      #ifdef DIAGNOSTIC
      /*
       * __memp_check_order --
    --- 376,381 ----
    *** dbreg/dbreg_rec.c.orig	2002-08-17 07:22:52.000000000 -0700
    --- dbreg/dbreg_rec.c	2003-11-08 10:59:19.000000000 -0800
    ***************
    *** 174,192 ****
      			 * Typically, closes should match an open which means
      			 * that if this is a close, there should be a valid
      			 * entry in the dbentry table when we get here,
    ! 			 * however there is an exception.  If this is an
      			 * OPENFILES pass, then we may have started from
      			 * a log file other than the first, and the
      			 * corresponding open appears in an earlier file.
    ! 			 * We can ignore that case, but all others are errors.
      			 */
      			dbe = &dblp->dbentry[argp->fileid];
      			if (dbe->dbp == NULL && !dbe->deleted) {
      				/* No valid entry here. */
    ! 				if ((argp->opcode != LOG_CLOSE &&
    ! 				    argp->opcode != LOG_RCLOSE) ||
    ! 				    (op != DB_TXN_OPENFILES &&
    ! 				    op !=DB_TXN_POPENFILES)) {
      					__db_err(dbenv,
      					    "Improper file close at %lu/%lu",
      					    (u_long)lsnp->file,
    --- 174,193 ----
      			 * Typically, closes should match an open which means
      			 * that if this is a close, there should be a valid
      			 * entry in the dbentry table when we get here,
    ! 			 * however there are exceptions.  1. If this is an
      			 * OPENFILES pass, then we may have started from
      			 * a log file other than the first, and the
      			 * corresponding open appears in an earlier file.
    ! 			 * 2. If we are undoing an open on an abort or
    ! 			 * recovery, it's possible that we failed after
    ! 			 * the log record, but before we actually entered
    ! 			 * a handle here.
      			 */
      			dbe = &dblp->dbentry[argp->fileid];
      			if (dbe->dbp == NULL && !dbe->deleted) {
      				/* No valid entry here. */
    ! 				if (DB_REDO(op) ||
    ! 				    argp->opcode == LOG_CHECKPOINT) {
      					__db_err(dbenv,
      					    "Improper file close at %lu/%lu",
      					    (u_long)lsnp->file,
    *** env/env_recover.c.orig.1	2002-08-22 14:52:51.000000000 -0700
    --- env/env_recover.c	2003-11-15 08:20:59.000000000 -0800
    ***************
    *** 232,243 ****
      	 * we'll still need to do a vtruncate based on information we haven't
      	 * yet collected.
      	 */
    ! 	if (ret == DB_NOTFOUND) {
      		ret = 0;
    ! 		if (max_lsn == NULL)
    ! 			goto done;
    ! 	}
    ! 	if (ret != 0)
      		goto err;
      
      	hi_txn = txnid;
    --- 232,240 ----
      	 * we'll still need to do a vtruncate based on information we haven't
      	 * yet collected.
      	 */
    ! 	if (ret == DB_NOTFOUND) 
      		ret = 0;
    ! 	else if (ret != 0)
      		goto err;
      
      	hi_txn = txnid;
    ***************
    *** 331,337 ****
      
      	/* Find a low txnid. */
      	ret = 0;
    ! 	do {
      		/* txnid is after rectype, which is a u_int32. */
      		memcpy(&txnid,
      		    (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
    --- 328,334 ----
      
      	/* Find a low txnid. */
      	ret = 0;
    ! 	if (hi_txn != 0) do {
      		/* txnid is after rectype, which is a u_int32. */
      		memcpy(&txnid,
      		    (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
    ***************
    *** 344,354 ****
      	 * There are no transactions and we're not recovering to an LSN (see
      	 * above), so there is nothing to do.
      	 */
    ! 	if (ret == DB_NOTFOUND) {
      		ret = 0;
    - 		if (max_lsn == NULL)
    - 			goto done;
    - 	}
      
      	/* Reset to the first lsn. */
      	if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
    --- 341,348 ----
      	 * There are no transactions and we're not recovering to an LSN (see
      	 * above), so there is nothing to do.
      	 */
    ! 	if (ret == DB_NOTFOUND) 
      		ret = 0;
      
      	/* Reset to the first lsn. */
      	if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
    ***************
    *** 367,372 ****
    --- 361,370 ----
      	    txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
      		goto err;
      
    + 	/* If there were no transactions, then we can bail out early. */
    + 	if (hi_txn == 0 && max_lsn == NULL)
    + 		goto done;
    + 		
      	/*
      	 * Pass #2.
      	 *
    ***************
    *** 483,488 ****
    --- 481,487 ----
      	if ((ret = __dbreg_close_files(dbenv)) != 0)
      		goto err;
      
    + done:
      	if (max_lsn != NULL) {
      		region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
      
    ***************
    *** 538,544 ****
      		__db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
      		__db_err(dbenv, "%s %lx %s [%lu][%lu]",
      		    "Maximum transaction ID",
    ! 		    ((DB_TXNHEAD *)txninfo)->maxid,
      		    "Recovery checkpoint",
      		    (u_long)region->last_ckp.file,
      		    (u_long)region->last_ckp.offset);
    --- 537,544 ----
      		__db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
      		__db_err(dbenv, "%s %lx %s [%lu][%lu]",
      		    "Maximum transaction ID",
    ! 		    txninfo == NULL ? TXN_MINIMUM :
    ! 			((DB_TXNHEAD *)txninfo)->maxid,
      		    "Recovery checkpoint",
      		    (u_long)region->last_ckp.file,
      		    (u_long)region->last_ckp.offset);
    ***************
    *** 550,556 ****
      		    (u_long)lsn.file, (u_long)lsn.offset, pass);
      	}
      
    - done:
      err:	if (lockid != DB_LOCK_INVALIDID) {
      		if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)
      			ret = t_ret;
    --- 550,555 ----
    

  5. Fix a bug where cache buffer retrieval could race with a checkpoint call, potentially causing database environment recovery to fail. [#14657]
  6. Apply the following patch to the db-4.1.25 release.  
    *** mp/mp_fget.c.orig	2002-08-07 08:23:01.000000000 -0700
    --- mp/mp_fget.c	2006-05-30 20:32:20.000000000 -0700
    ***************
    *** 506,513 ****
      	 */
      	if (state != SECOND_MISS && bhp->ref == 1) {
      		bhp->priority = UINT32_T_MAX;
    ! 		SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
    ! 		SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
      		hp->hash_priority =
      		    SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
      	}
    --- 506,517 ----
      	 */
      	if (state != SECOND_MISS && bhp->ref == 1) {
      		bhp->priority = UINT32_T_MAX;
    ! 		/* Move the buffer if there are others in the bucket. */
    ! 		if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) != bhp
    ! 		    || SH_TAILQ_NEXT(bhp, hq, __bh) != NULL) {
    ! 			SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
    ! 			SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
    ! 		}
      		hp->hash_priority =
      		    SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
      	}
    *** mp/mp_fput.c.orig	2002-08-13 06:26:41.000000000 -0700
    --- mp/mp_fput.c	2006-05-30 20:55:11.000000000 -0700
    ***************
    *** 166,171 ****
    --- 166,176 ----
      	 * to the correct position in the list.
      	 */
      	argbhp = bhp;
    + 	/* Move the buffer if there are others in the bucket. */
    + 	if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == bhp
    + 	    && SH_TAILQ_NEXT(bhp, hq, __bh) != NULL)
    + 	    	goto done;
    + 
      	SH_TAILQ_REMOVE(&hp->hash_bucket, argbhp, hq, __bh);
      
      	prev = NULL;
    ***************
    *** 178,183 ****
    --- 183,189 ----
      	else
      		SH_TAILQ_INSERT_AFTER(&hp->hash_bucket, prev, argbhp, hq, __bh);
      
    + done:
      	/* Reset the hash bucket's priority. */
      	hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;