-
When looking for an already open log file, do not examine a file name
structure if its reference count is 0. This problem cannot cause data
corruption, but may cause program failure.
*** log/log_register.c.orig Fri Jan 15 10:35:52 1999
--- log/log_register.c.orig Fri Feb 12 08:35:00 1999
***************
*** 70,78 ****
*/
for (maxid = 0, fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
! if (fnp->ref == 0 && reuse_fnp == NULL) {
! /* Entry is not in use. */
! reuse_fnp = fnp;
continue;
}
if (!memcmp(dbp->fileid, fnp->ufid, DB_FILE_ID_LEN)) {
--- 70,78 ----
*/
for (maxid = 0, fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
! if (fnp->ref == 0) { /* Entry is not in use. */
! if (reuse_fnp == NULL)
! reuse_fnp = fnp;
continue;
}
if (!memcmp(dbp->fileid, fnp->ufid, DB_FILE_ID_LEN)) {
-
Berkeley DB recovery assumes that there are at least two checkpoints. It
was possible for log archival to leave the recovery area with only a single
checkpoint.
*** log/log_findckp.c.orig Sat Sep 26 21:57:22 1998
--- log/log_findckp.c Wed Feb 17 23:11:40 1999
***************
*** 28,34 ****
* __log_findckp --
*
* Looks for the most recent checkpoint that occurs before the most recent
! * checkpoint LSN. This is the point from which recovery can start and the
* point up to which archival/truncation can take place. Checkpoints in
* the log look like:
*
--- 28,37 ----
* __log_findckp --
*
* Looks for the most recent checkpoint that occurs before the most recent
! * checkpoint LSN, subject to the constraint that there must be at least two
! * checkpoints. The reason you need two checkpoints is that you might have
! * crashed during the most recent one and may not have a copy of all the
! * open files. This is the point from which recovery can start and the
* point up to which archival/truncation can take place. Checkpoints in
* the log look like:
*
***************
*** 56,62 ****
DB_LSN *lsnp;
{
DBT data;
! DB_LSN ckp_lsn, last_ckp, next_lsn;
__txn_ckp_args *ckp_args;
int ret, verbose;
--- 59,65 ----
DB_LSN *lsnp;
{
DBT data;
! DB_LSN ckp_lsn, final_ckp, last_ckp, next_lsn;
__txn_ckp_args *ckp_args;
int ret, verbose;
***************
*** 76,81 ****
--- 79,85 ----
else
return (ret);
+ final_ckp = last_ckp;
next_lsn = last_ckp;
do {
if (F_ISSET(lp, DB_AM_THREAD))
***************
*** 103,110 ****
last_ckp = next_lsn;
next_lsn = ckp_args->last_ckp;
__os_free(ckp_args, sizeof(*ckp_args));
} while (!IS_ZERO_LSN(next_lsn) &&
! log_compare(&last_ckp, &ckp_lsn) > 0);
if (F_ISSET(lp, DB_AM_THREAD))
__os_free(data.data, data.size);
--- 107,121 ----
last_ckp = next_lsn;
next_lsn = ckp_args->last_ckp;
__os_free(ckp_args, sizeof(*ckp_args));
+
+ /*
+ * Keep looping until either you 1) run out of checkpoints,
+ * 2) you've found a checkpoint before the most recent
+ * checkpoint's LSN and you have at least 2 checkpoints.
+ */
} while (!IS_ZERO_LSN(next_lsn) &&
! (log_compare(&last_ckp, &ckp_lsn) > 0 ||
! log_compare(&final_ckp, &last_ckp) == 0));
if (F_ISSET(lp, DB_AM_THREAD))
__os_free(data.data, data.size);
***************
*** 116,122 ****
* next_lsn must be 0 and we need to roll forward from the
* beginning of the log.
*/
! if (log_compare(&last_ckp, &ckp_lsn) > 0) {
get_first: if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0)
return (ret);
if (F_ISSET(lp, DB_AM_THREAD))
--- 127,134 ----
* next_lsn must be 0 and we need to roll forward from the
* beginning of the log.
*/
! if (log_compare(&last_ckp, &ckp_lsn) > 0 ||
! log_compare(&final_ckp, &last_ckp) == 0) {
get_first: if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0)
return (ret);
if (F_ISSET(lp, DB_AM_THREAD))
-
Version 2.6.5 cannot recover version 2.4.14 log files.
*** include/txn_auto.h.orig Thu Feb 4 12:05:21 1999
--- include/txn_auto.h Fri Feb 19 16:33:42 1999
***************
*** 12,18 ****
} __txn_regop_args;
! #define DB_txn_xa_regop (DB_txn_BEGIN + 2)
typedef struct _txn_xa_regop_args {
u_int32_t type;
--- 12,18 ----
} __txn_regop_args;
! #define DB_txn_xa_regop (DB_txn_BEGIN + 3)
typedef struct _txn_xa_regop_args {
u_int32_t type;
***************
*** 26,32 ****
} __txn_xa_regop_args;
! #define DB_txn_ckp (DB_txn_BEGIN + 3)
typedef struct _txn_ckp_args {
u_int32_t type;
--- 26,32 ----
} __txn_xa_regop_args;
! #define DB_txn_ckp (DB_txn_BEGIN + 2)
typedef struct _txn_ckp_args {
u_int32_t type;
*** txn/txn.src.orig Wed Dec 31 19:00:00 1969
--- txn/txn.src Fri Feb 19 23:02:45 1999
***************
*** 17,34 ****
END
/*
- * This is the standard log operation for prepare (since right now
- * we only use prepare in an XA environment).
- */
- BEGIN xa_regop
- ARG opcode u_int32_t lu
- DBT xid DBT s
- ARG formatID int32_t ld
- ARG gtrid u_int32_t u
- ARG bqual u_int32_t u
- END
-
- /*
* This is the checkpoint record. It contains the lsn that the checkpoint
* guarantees and a pointer to the last checkpoint so we can walk backwards
* by checkpoint.
--- 17,22 ----
***************
*** 43,46 ****
--- 31,46 ----
BEGIN ckp
POINTER ckp_lsn DB_LSN * lu
POINTER last_ckp DB_LSN * lu
+ END
+
+ /*
+ * This is the standard log operation for prepare (since right now
+ * we only use prepare in an XA environment).
+ */
+ BEGIN xa_regop
+ ARG opcode u_int32_t lu
+ DBT xid DBT s
+ ARG formatID int32_t ld
+ ARG gtrid u_int32_t u
+ ARG bqual u_int32_t u
END
-
Database file opens after recovery could sometimes fail.
*** log/log_rec.c.orig Wed Oct 21 21:07:17 1998
--- log/log_rec.c Mon Mar 1 21:45:01 1999
***************
*** 56,63 ****
#include "db_dispatch.h"
#include "common_ext.h"
! static int __log_open_file __P((DB_LOG *,
! u_int8_t *, char *, DBTYPE, u_int32_t));
/*
* PUBLIC: int __log_register_recover
--- 56,62 ----
#include "db_dispatch.h"
#include "common_ext.h"
! static int __log_open_file __P((DB_LOG *, __log_register_args *));
/*
* PUBLIC: int __log_register_recover
***************
*** 95,102 ****
* If we are redoing an open or undoing a close, then we need
* to open a file.
*/
! ret = __log_open_file(logp,
! argp->uid.data, argp->name.data, argp->ftype, argp->id);
if (ret == ENOENT) {
if (redo == TXN_OPENFILES)
__db_err(logp->dbenv, "warning: %s: %s",
--- 94,100 ----
* If we are redoing an open or undoing a close, then we need
* to open a file.
*/
! ret = __log_open_file(logp, argp);
if (ret == ENOENT) {
if (redo == TXN_OPENFILES)
__db_err(logp->dbenv, "warning: %s: %s",
***************
*** 133,140 ****
* closed and has therefore not been reopened yet. If
* so, we need to try to open it.
*/
! ret = __log_open_file(logp,
! argp->uid.data, argp->name.data, argp->ftype, argp->id);
if (ret == ENOENT) {
__db_err(logp->dbenv, "warning: %s: %s",
argp->name.data, strerror(ENOENT));
--- 131,137 ----
* closed and has therefore not been reopened yet. If
* so, we need to try to open it.
*/
! ret = __log_open_file(logp, argp);
if (ret == ENOENT) {
__db_err(logp->dbenv, "warning: %s: %s",
argp->name.data, strerror(ENOENT));
***************
*** 156,175 ****
* Returns 0 on success, non-zero on error.
*/
static int
! __log_open_file(lp, uid, name, ftype, ndx)
DB_LOG *lp;
! u_int8_t *uid;
! char *name;
! DBTYPE ftype;
! u_int32_t ndx;
{
DB *dbp;
int ret;
LOCK_LOGTHREAD(lp);
! if (ndx < lp->dbentry_cnt &&
! (lp->dbentry[ndx].deleted == 1 || lp->dbentry[ndx].dbp != NULL)) {
! lp->dbentry[ndx].refcount++;
UNLOCK_LOGTHREAD(lp);
return (0);
--- 153,171 ----
* Returns 0 on success, non-zero on error.
*/
static int
! __log_open_file(lp, argp)
DB_LOG *lp;
! __log_register_args *argp;
{
DB *dbp;
int ret;
LOCK_LOGTHREAD(lp);
! if (argp->id < lp->dbentry_cnt &&
! (lp->dbentry[argp->id].deleted == 1 ||
! lp->dbentry[argp->id].dbp != NULL)) {
! if (argp->opcode != LOG_CHECKPOINT)
! lp->dbentry[argp->id].refcount++;
UNLOCK_LOGTHREAD(lp);
return (0);
***************
*** 178,189 ****
/* Need to open file. */
dbp = NULL;
! if ((ret = db_open(name, ftype, 0, 0, lp->dbenv, NULL, &dbp)) == 0) {
/*
* Verify that we are opening the same file that we were
* referring to when we wrote this log record.
*/
! if (memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) {
(void)dbp->close(dbp, 0);
dbp = NULL;
ret = ENOENT;
--- 174,186 ----
/* Need to open file. */
dbp = NULL;
! if ((ret = db_open(argp->name.data,
! argp->ftype, 0, 0, lp->dbenv, NULL, &dbp)) == 0) {
/*
* Verify that we are opening the same file that we were
* referring to when we wrote this log record.
*/
! if (memcmp(argp->uid.data, dbp->fileid, DB_FILE_ID_LEN) != 0) {
(void)dbp->close(dbp, 0);
dbp = NULL;
ret = ENOENT;
***************
*** 191,197 ****
}
if (ret == 0 || ret == ENOENT)
! (void)__log_add_logid(lp, dbp, ndx);
return (ret);
}
--- 188,194 ----
}
if (ret == 0 || ret == ENOENT)
! (void)__log_add_logid(lp, dbp, argp->id);
return (ret);
}
***************
*** 295,303 ****
u_int32_t i;
LOCK_LOGTHREAD(logp);
for (i = 0; i < logp->dbentry_cnt; i++)
! if (logp->dbentry[i].dbp)
logp->dbentry[i].dbp->close(logp->dbentry[i].dbp, 0);
UNLOCK_LOGTHREAD(logp);
}
--- 292,305 ----
u_int32_t i;
LOCK_LOGTHREAD(logp);
+ F_SET(logp, DBC_RECOVER);
for (i = 0; i < logp->dbentry_cnt; i++)
! if (logp->dbentry[i].dbp) {
logp->dbentry[i].dbp->close(logp->dbentry[i].dbp, 0);
+ logp->dbentry[i].dbp = NULL;
+ logp->dbentry[i].deleted = 0;
+ }
+ F_CLR(logp, DBC_RECOVER);
UNLOCK_LOGTHREAD(logp);
}
-
If only a single checkpoint is found, perform recovery from the beginning
of the log.
*** common/db_apprec.c.orig Mon Oct 5 22:09:24 1998
--- common/db_apprec.c Thu Mar 4 22:03:15 1999
***************
*** 115,121 ****
__db_err(dbenv, "Invalid checkpoint record at [%ld][%ld]\n",
(u_long)ckp_lsn.file, (u_long)ckp_lsn.offset);
goto out;
! } else if ((ret = log_get(lp, &ckp_args->last_ckp, &data, DB_SET)) != 0)
goto first;
else
open_lsn = ckp_args->last_ckp;
--- 115,122 ----
__db_err(dbenv, "Invalid checkpoint record at [%ld][%ld]\n",
(u_long)ckp_lsn.file, (u_long)ckp_lsn.offset);
goto out;
! } else if (IS_ZERO_LSN(ckp_args->last_ckp) ||
! (ret = log_get(lp, &ckp_args->last_ckp, &data, DB_SET)) != 0)
goto first;
else
open_lsn = ckp_args->last_ckp;
-
The Btree access method delete-by-key code path did not always detect that
a key/data pair was also referenced by a cursor, which could cause a cursor
to reference incorrect data.
*** btree/bt_delete.c.orig Mon Dec 7 22:13:49 1998
--- btree/bt_delete.c Thu Mar 11 22:33:00 1999
***************
*** 73,85 ****
DBT *key;
u_int32_t flags;
{
- BOVERFLOW *bo;
- CURSOR *cp;
DBC *dbc;
! DBT dkey, ddata;
! PAGE *h;
! db_indx_t cnt, i, indx;
! int dpage, exact, ret, stack, t_ret;
DB_PANIC_CHECK(dbp);
--- 73,82 ----
DBT *key;
u_int32_t flags;
{
DBC *dbc;
! DBT data;
! u_int32_t f_init, f_next;
! int ret, t_ret;
DB_PANIC_CHECK(dbp);
***************
*** 91,195 ****
/* Allocate a cursor. */
if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
return (ret);
- DEBUG_LWRITE(dbc, txn, "bam_delete", key, NULL, flags);
-
- cp = dbc->internal;
- stack = 0;
! /* Search the tree for the key; delete only deletes exact matches. */
! if ((ret = __bam_search(dbc, key, S_DELETE, 1, NULL, &exact)) != 0)
! goto err;
! stack = 1;
! h = cp->csp->page;
! indx = cp->csp->indx;
/*
! * Delete the key/data pair, including any on-or-off page duplicates.
! * If this isn't an off-page duplicate set, or there are currently no
! * cursors walking through the off-page duplicates, we do the delete
! * here without further work. Alternatively, we have to walk through
! * the off-page duplicates, marking/deleting each item. The latter
! * is infinitely slower, and could be a performance problem at some
! * point.
*/
! bo = GET_BOVERFLOW(h, indx + O_INDX);
! if (B_TYPE(bo->type) == B_DUPLICATE &&
! __bam_ca_delete(dbp, h->pgno, indx, 1) != 0) {
! /* Set the cursor to reference the first off-page duplicate. */
! cp->pgno = h->pgno;
! cp->indx = indx;
! cp->dpgno = bo->pgno;
! cp->dindx = 0;
!
! /*
! * We're going to walk a cursor through the duplicate chain,
! * deleting as we go. Set DB_DBT_USERMEM, as this might be a
! * threaded application and the flags checking will catch us.
! * We don't want the actual keys or data, so request a partial
! * of length 0.
! */
! memset(&dkey, 0, sizeof(dkey));
! F_SET(&dkey, DB_DBT_USERMEM | DB_DBT_PARTIAL);
! memset(&ddata, 0, sizeof(ddata));
! F_SET(&ddata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
!
! /* Delete duplicates... */
! for (;;) {
! if ((ret = dbc->c_del(dbc, 0)) != 0)
! goto err;
! if ((ret =
! dbc->c_get(dbc, &dkey, &ddata, DB_NEXT_DUP)) != 0)
! if (ret == DB_NOTFOUND) {
! ret = 0;
! break;
! } else
! goto err;
! }
!
! dpage = 0;
! } else {
! /* Find out how many on-page duplicates there are. */
! for (cnt = 1, i = indx;; ++cnt) {
! i += P_INDX;
! if (i >= NUM_ENT(h) || h->inp[i] != h->inp[indx])
! break;
! }
!
! /* Delete them. */
! for (; cnt > 0; --cnt) {
! /*
! * XXX
! * Delete the key item first, otherwise the duplicate
! * checks in __bam_ditem() won't work!
! */
! if ((ret = __bam_ditem(dbc, h, indx)) != 0)
! goto err;
! if ((ret = __bam_ditem(dbc, h, indx)) != 0)
! goto err;
! }
! /* If the page is now empty, delete it. */
! dpage = NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT;
}
! /* If we're using record numbers, update internal page record counts. */
! if (F_ISSET(dbp, DB_BT_RECNUM) && (ret = __bam_adjust(dbc, -1)) != 0)
goto err;
! /*
! * Release the stack (we may need to acquire another one during page
! * deletion.
! */
! __bam_stkrel(dbc, 0);
! stack = 0;
!
! ret = dpage ? __bam_dpage(dbc, key) : 0;
!
! err: if (stack)
! __bam_stkrel(dbc, 0);
!
! /* Discard the cursor. */
! if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
return (ret);
--- 88,131 ----
/* Allocate a cursor. */
if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
return (ret);
! DEBUG_LWRITE(dbc, txn, "bam_delete", key, NULL, flags);
/*
! * Walk a cursor through the key/data pairs, deleting as we go. Set
! * the DB_DBT_USERMEM flag, as this might be a threaded application
! * and the flags checking will catch us. We don't actually want the
! * keys or data, so request a partial of length 0.
*/
! memset(&data, 0, sizeof(data));
! F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
! /* If locking, set read-modify-write flag. */
! f_init = DB_SET;
! f_next = DB_NEXT_DUP;
! if (dbp->dbenv->lk_info != NULL) {
! f_init |= DB_RMW;
! f_next |= DB_RMW;
}
! /* Walk through the set of key/data pairs, deleting as we go. */
! if ((ret = dbc->c_get(dbc, key, &data, f_init)) != 0)
goto err;
+ for (;;) {
+ if ((ret = dbc->c_del(dbc, 0)) != 0)
+ goto err;
+ if ((ret = dbc->c_get(dbc, key, &data, f_next)) != 0) {
+ if (ret == DB_NOTFOUND) {
+ ret = 0;
+ break;
+ }
+ goto err;
+ }
+ }
! err: /* Discard the cursor. */
! if ((t_ret = dbc->c_close(dbc)) != 0 &&
! (ret == 0 || ret == DB_NOTFOUND))
ret = t_ret;
return (ret);
-
Concurrent Data Store operations could sometimes fail because write
cursors were not correctly identified.
*** btree/bt_cursor.c.orig Thu Feb 4 21:44:59 1999
--- btree/bt_cursor.c Sat Mar 13 17:34:20 1999
***************
*** 1041,1047 ****
*cp = copy;
}
! if (F_ISSET(dbc, DBC_RMW))
(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
DB_LOCK_IWRITE, 0);
--- 1041,1047 ----
*cp = copy;
}
! if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
DB_LOCK_IWRITE, 0);
***************
*** 1685,1691 ****
/*
* If this is concurrent DB, upgrade the lock if necessary.
*/
! if (F_ISSET(dbc, DBC_RMW) && (ret = lock_get(dbp->dbenv->lk_info,
dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
&dbc->mylock)) != 0)
return (EAGAIN);
--- 1685,1692 ----
/*
* If this is concurrent DB, upgrade the lock if necessary.
*/
! if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW) &&
! (ret = lock_get(dbp->dbenv->lk_info,
dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
&dbc->mylock)) != 0)
return (EAGAIN);
*** btree/bt_recno.c.orig Fri Dec 11 15:03:16 1998
--- btree/bt_recno.c Sat Mar 13 17:07:35 1999
***************
*** 771,777 ****
/* The cursor was reset, no further delete adjustment is necessary. */
CD_CLR(dbp, cp);
! err: if (F_ISSET(dbc, DBC_RMW))
(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
DB_LOCK_IWRITE, 0);
--- 771,777 ----
/* The cursor was reset, no further delete adjustment is necessary. */
CD_CLR(dbp, cp);
! err: if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
DB_LOCK_IWRITE, 0);
-
The DB_SET_RANGE flag did not always correctly deal with on-page deleted
records in the Btree access method.
*** btree/bt_cursor.c.orig Thu Feb 4 21:44:59 1999
--- btree/bt_cursor.c Thu Mar 11 22:52:10 1999
***************
*** 42,48 ****
static int __bam_c_rget __P((DBC *, DBT *, u_int32_t));
static int __bam_c_search __P((DBC *, CURSOR *, const DBT *, u_int32_t, int *));
static int __bam_dsearch __P((DBC *, CURSOR *, DBT *, u_int32_t *));
- static int __bam_dupnodel __P((DBC *, CURSOR *, u_int32_t));
/* Discard the current page/lock held by a cursor. */
#undef DISCARD
--- 42,47 ----
***************
*** 454,469 ****
goto err;
/*
! * We may be referencing a duplicates page. Move to the next
! * non-deleted record. Check to make sure that we're didn't
! * switch records because there were no non-deleted entries.
*/
start = *cp;
! if ((ret = __bam_dupnodel(dbc, cp, cp->indx)) != 0)
! goto err;
! if (!POSSIBLE_DUPLICATE(cp, start)) {
! ret = DB_NOTFOUND;
goto err;
}
break;
case DB_SET_RECNO:
--- 453,477 ----
goto err;
/*
! * We cannot currently be referencing a deleted record, but we
! * may be referencing off-page duplicates.
! *
! * If we're referencing off-page duplicates, move off-page.
! * If we moved off-page, move to the next non-deleted record.
! * If we moved to the next non-deleted record, check to make
! * sure we didn't switch records because our current record
! * had no non-deleted data items.
*/
start = *cp;
! if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
goto err;
+ if (cp->dpgno != PGNO_INVALID && IS_CUR_DELETED(cp)) {
+ if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
+ goto err;
+ if (!POSSIBLE_DUPLICATE(cp, start)) {
+ ret = DB_NOTFOUND;
+ goto err;
+ }
}
break;
case DB_SET_RECNO:
***************
*** 514,529 ****
* may have returned an entry past the end of the page. If
* so, move to the next entry.
*/
! if (cp->indx == NUM_ENT(cp->page))
! if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
! goto err;
/*
! * We may be referencing a duplicates page. Move to the first
! * non-deleted duplicate. We don't care if we switch records,
! * we just want the next useful entry.
*/
! if ((ret = __bam_dupnodel(dbc, cp, cp->indx)) != 0)
goto err;
break;
}
--- 522,543 ----
* may have returned an entry past the end of the page. If
* so, move to the next entry.
*/
! if (cp->indx == NUM_ENT(cp->page) &&
! (ret = __bam_c_next(dbc, cp, 0)) != 0)
! goto err;
!
! /*
! * We may be referencing off-page duplicates, if so, move
! * off-page.
! */
! if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
! goto err;
/*
! * We may be referencing a deleted record, if so, move to
! * the next non-deleted record.
*/
! if (IS_CUR_DELETED(cp) && (ret = __bam_c_next(dbc, cp, 0)) != 0)
goto err;
break;
}
***************
*** 1549,1582 ****
if (needexact && *exactp == 0)
return (DB_NOTFOUND);
- return (0);
- }
-
- /*
- * __bam_dupnodel --
- * Check for an off-page duplicates entry, and if found, move to the
- * first or last non-deleted entry.
- */
- static int
- __bam_dupnodel(dbc, cp, indx)
- DBC *dbc;
- CURSOR *cp;
- u_int32_t indx;
- {
- int ret;
-
- /* Move to any off-page duplicates entry. */
- if ((ret = __bam_dup(dbc, cp, indx, 0)) != 0)
- return (ret);
-
- /* If we didn't move to a duplicates page, we're done. */
- if (cp->dpgno == PGNO_INVALID)
- return (0);
-
- /* If it's a deleted record, go to the next valid record. */
- if (IS_CUR_DELETED(cp))
- if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
- return (ret);
return (0);
}
--- 1563,1568 ----
-
If the buffer cache was completely dirty, transaction checkpoints could
pin down too many buffers and cause other operations to fail.
*** mp/mp_sync.c.orig Fri Dec 11 14:28:18 1998
--- mp/mp_sync.c Sat Mar 6 09:23:04 1999
***************
*** 39,45 ****
DB_ENV *dbenv;
MPOOL *mp;
MPOOLFILE *mfp;
! int ar_cnt, nalloc, next, ret, wrote;
MP_PANIC_CHECK(dbmp);
--- 39,45 ----
DB_ENV *dbenv;
MPOOL *mp;
MPOOLFILE *mfp;
! int ar_cnt, nalloc, next, maxpin, ret, wrote;
MP_PANIC_CHECK(dbmp);
***************
*** 119,128 ****
--- 119,133 ----
* finish. Since the application may have restarted the sync, clear
* any BH_WRITE flags that appear to be left over from previous calls.
*
+ * We don't want to pin down the entire buffer cache, otherwise we'll
+ * starve threads needing new pages. Don't pin down more than 80% of
+ * the cache.
+ *
* Keep a count of the total number of buffers we need to write in
* MPOOL->lsn_cnt, and for each file, in MPOOLFILE->lsn_count.
*/
ar_cnt = 0;
+ maxpin = ((mp->stat.st_page_dirty + mp->stat.st_page_clean) * 8) / 10;
for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) {
***************
*** 144,154 ****
* while holding a region lock, so we set the flag to
* force the checkpoint to be done again, from scratch,
* later.
*/
if (bhp->ref == 0) {
++bhp->ref;
bharray[ar_cnt] = bhp;
! if (++ar_cnt >= nalloc) {
F_SET(mp, MP_LSN_RETRY);
break;
}
--- 149,163 ----
* while holding a region lock, so we set the flag to
* force the checkpoint to be done again, from scratch,
* later.
+ *
+ * If we've pinned down too much of the cache stop, and
+ * set a flag to force the checkpoint to be tried again
+ * later.
*/
if (bhp->ref == 0) {
++bhp->ref;
bharray[ar_cnt] = bhp;
! if (++ar_cnt >= nalloc || ar_cnt >= maxpin) {
F_SET(mp, MP_LSN_RETRY);
break;
}
-
In non-threaded applications, change cursors to share a locker ID in
order to avoid self-deadlocks.
*** db/db_am.c.orig Tue Jan 5 09:39:30 1999
--- db/db_am.c Wed Feb 10 22:08:06 1999
***************
*** 69,75 ****
DBC **dbcp;
u_int32_t flags;
{
! DBC *dbc;
int ret;
db_lockmode_t mode;
u_int32_t op;
--- 69,75 ----
DBC **dbcp;
u_int32_t flags;
{
! DBC *dbc, *adbc;
int ret;
db_lockmode_t mode;
u_int32_t op;
***************
*** 91,99 ****
/* Set up locking information. */
if (F_ISSET(dbp, DB_AM_LOCKING | DB_AM_CDB)) {
! if ((ret =
! lock_id(dbp->dbenv->lk_info, &dbc->lid)) != 0)
! goto err;
memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
if (F_ISSET(dbp, DB_AM_CDB)) {
dbc->lock_dbt.size = DB_FILE_ID_LEN;
--- 91,110 ----
/* Set up locking information. */
if (F_ISSET(dbp, DB_AM_LOCKING | DB_AM_CDB)) {
! /*
! * If we are not threaded, then there is no need to
! * create new locker ids. We know that no one else
! * is running concurrently using this DB, so we can
! * take a peek at any cursors on the active queue.
! */
! if (!F_ISSET(dbp, DB_AM_THREAD) &&
! (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
! dbc->lid = adbc->lid;
! else
! if ((ret = lock_id(dbp->dbenv->lk_info,
! &dbc->lid)) != 0)
! goto err;
!
memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
if (F_ISSET(dbp, DB_AM_CDB)) {
dbc->lock_dbt.size = DB_FILE_ID_LEN;
-
In the Btree access method, when creating a new record and specifying a
dbt.off offset value, the DB_DBT_PARTIAL flag was not handled
correctly.
*** btree/bt_put.c.orig Sun Dec 6 11:09:14 1998
--- btree/bt_put.c Tue Mar 9 17:02:52 1999
***************
*** 64,70 ****
static int __bam_fixed __P((DBC *, DBT *));
static int __bam_ndup __P((DBC *, PAGE *, u_int32_t));
static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *));
! static int __bam_partial __P((DBC *, DBT *, PAGE *, u_int32_t, u_int32_t));
static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t));
/*
--- 64,71 ----
static int __bam_fixed __P((DBC *, DBT *));
static int __bam_ndup __P((DBC *, PAGE *, u_int32_t));
static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *));
! static int __bam_partial __P((DBC *,
! DBT *, PAGE *, u_int32_t, u_int32_t, u_int32_t));
static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t));
/*
***************
*** 206,212 ****
/* Handle partial puts: build the real record. */
if (F_ISSET(data, DB_DBT_PARTIAL)) {
tdbt = *data;
! if ((ret = __bam_partial(dbc, &tdbt, h, indx, data_size)) != 0)
return (ret);
data = &tdbt;
}
--- 207,214 ----
/* Handle partial puts: build the real record. */
if (F_ISSET(data, DB_DBT_PARTIAL)) {
tdbt = *data;
! if ((ret = __bam_partial(dbc,
! &tdbt, h, indx, data_size, flags)) != 0)
return (ret);
data = &tdbt;
}
***************
*** 711,721 ****
* Build the real record for a partial put.
*/
static int
! __bam_partial(dbc, dbt, h, indx, nbytes)
DBC *dbc;
DBT *dbt;
PAGE *h;
! u_int32_t indx, nbytes;
{
BKEYDATA *bk, tbk;
BOVERFLOW *bo;
--- 713,723 ----
* Build the real record for a partial put.
*/
static int
! __bam_partial(dbc, dbt, h, indx, nbytes, flags)
DBC *dbc;
DBT *dbt;
PAGE *h;
! u_int32_t indx, nbytes, flags;
{
BKEYDATA *bk, tbk;
BOVERFLOW *bo;
***************
*** 739,744 ****
--- 741,764 ----
dbc->rdata.ulen = nbytes;
}
+ /*
+ * We use nul bytes for any part of the record that isn't specified;
+ * get it over with.
+ */
+ memset(dbc->rdata.data, 0, nbytes);
+
+ /*
+ * In the next clauses, we need to do three things: a) set p to point
+ * to the place at which to copy the user's data, b) set tlen to the
+ * total length of the record, not including the bytes contributed by
+ * the user, and c) copy any valid data from an existing record.
+ */
+ if (LF_ISSET(BI_NEWKEY)) {
+ tlen = dbt->doff;
+ p = (u_int8_t *)dbc->rdata.data + dbt->doff;
+ goto ucopy;
+ }
+
/* Find the current record. */
if (indx < NUM_ENT(h)) {
bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
***************
*** 748,760 ****
B_TSET(bk->type, B_KEYDATA, 0);
bk->len = 0;
}
-
- /*
- * We use nul bytes for any part of the record that isn't specified,
- * get it over with.
- */
- memset(dbc->rdata.data, 0, nbytes);
-
if (B_TYPE(bk->type) == B_OVERFLOW) {
/*
* In the case of an overflow record, we shift things around
--- 768,773 ----
***************
*** 786,795 ****
memmove(p + dbt->size, p + dbt->dlen, len);
tlen += len;
}
-
- /* Copy in the application provided data. */
- memcpy(p, dbt->data, dbt->size);
- tlen += dbt->size;
} else {
/* Copy in any leading data from the original record. */
memcpy(dbc->rdata.data,
--- 799,804 ----
***************
*** 797,806 ****
tlen = dbt->doff;
p = (u_int8_t *)dbc->rdata.data + dbt->doff;
- /* Copy in the application provided data. */
- memcpy(p, dbt->data, dbt->size);
- tlen += dbt->size;
-
/* Copy in any trailing data from the original record. */
len = dbt->doff + dbt->dlen;
if (bk->len > len) {
--- 806,811 ----
***************
*** 808,813 ****
--- 813,825 ----
tlen += bk->len - len;
}
}
+
+ ucopy: /*
+ * Copy in the application provided data -- p and tlen must have been
+ * initialized above.
+ */
+ memcpy(p, dbt->data, dbt->size);
+ tlen += dbt->size;
/* Set the DBT to reference our new record. */
dbc->rdata.size = tlen;
-
It was possible for the last-known-LSN-on-disk to not be set correctly
during recovery, which could cause the loss of recovery's checkpoint
record.
*** log/log.c.orig Sun Oct 25 21:14:27 1998
--- log/log.c Sat Mar 13 13:25:09 1999
***************
*** 226,237 ****
}
/*
! * We know where the end of the log is. Since that record is on disk,
! * it's also the last-synced LSN.
*/
! lp->lsn = lsn;
lp->lsn.offset += dblp->c_len;
- lp->s_lsn = lp->lsn;
/* Set up the current buffer information, too. */
lp->len = dblp->c_len;
--- 226,237 ----
}
/*
! * We now know where the end of the log is. Set the first LSN that
! * we want to return to an application and the LSN of the last known
! * record on disk.
*/
! lp->lsn = lp->s_lsn = lsn;
lp->lsn.offset += dblp->c_len;
/* Set up the current buffer information, too. */
lp->len = dblp->c_len;
-
Test suite change: generate fail message if environment open doesn't work.
*** test/mpool.tcl.orig Fri Dec 11 11:50:50 1998
--- test/mpool.tcl Sun Mar 14 11:36:43 1999
***************
*** 253,258 ****
--- 253,259 ----
[expr $psize * 10]}
set cmd [concat $cmd $envopts]
set dbenv [eval $cmd]
+ error_check_good dbenv [is_valid_widget $dbenv env] TRUE
# First open and create the file.
-
Defend against the possibility that records from multiple log files are
present in the log buffer cache.
*** log/log_put.c.orig Fri Jan 15 10:35:41 1999
--- log/log_put.c Sun Mar 14 20:39:49 1999
***************
*** 300,307 ****
* buffer's starting LSN.
*/
current = 0;
! if (lp->b_off != 0 &&
! lsn->file >= lp->f_lsn.file && lsn->offset >= lp->f_lsn.offset) {
if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
return (ret);
--- 300,306 ----
* buffer's starting LSN.
*/
current = 0;
! if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) {
if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
return (ret);
-
Reclaim lockers when using lock_vec to release locks.
*** lock/lock.c.orig Fri Jan 22 15:17:18 1999
--- lock/lock.c Mon Mar 22 22:21:57 1999
***************
*** 99,104 ****
--- 99,105 ----
list[i].mode = lp->mode;
ret = __lock_put_internal(lt, lp, 0);
+ __lock_checklocker(lt, lp, 0);
break;
case DB_LOCK_PUT_ALL:
/* Find the locker. */
-
Re-order subsystem close when closing the environment so that the logging
subsystem can potentially flush buffers through the shared memory buffer
pool.
*** common/db_appinit.c.orig Fri Mar 19 11:36:39 1999
--- common/db_appinit.c Fri Mar 19 11:36:26 1999
***************
*** 268,277 ****
if (dbenv->tx_info && (t_ret = txn_close(dbenv->tx_info)) != 0)
if (ret == 0)
ret = t_ret;
! if (dbenv->mp_info && (t_ret = memp_close(dbenv->mp_info)) != 0)
if (ret == 0)
ret = t_ret;
! if (dbenv->lg_info && (t_ret = log_close(dbenv->lg_info)) != 0)
if (ret == 0)
ret = t_ret;
if (dbenv->lk_info && (t_ret = lock_close(dbenv->lk_info)) != 0)
--- 268,277 ----
if (dbenv->tx_info && (t_ret = txn_close(dbenv->tx_info)) != 0)
if (ret == 0)
ret = t_ret;
! if (dbenv->lg_info && (t_ret = log_close(dbenv->lg_info)) != 0)
if (ret == 0)
ret = t_ret;
! if (dbenv->mp_info && (t_ret = memp_close(dbenv->mp_info)) != 0)
if (ret == 0)
ret = t_ret;
if (dbenv->lk_info && (t_ret = lock_close(dbenv->lk_info)) != 0)
-
Never attempt to grow the shared regions when initially connecting to the
Berkeley DB environment.
*** common/db_region.c.orig Tue Nov 10 11:40:50 1998
--- common/db_region.c Sat Apr 3 11:12:24 1999
***************
*** 393,410 ****
}
/*
- * Problem #2: We want a bigger region than has previously been
- * created. Detected by checking if the region is smaller than
- * our caller requested. If it is, we grow the region, (which
- * does the detach and re-attach for us).
- */
- if (grow_region != 0 &&
- (ret = __db_rgrow(infop, grow_region)) != 0) {
- (void)__db_mutex_unlock(&rlp->lock, infop->fd);
- goto err;
- }
-
- /*
* Problem #3: when we checked the size of the file, it was
* still growing as part of creation. Detected by the fact
* that infop->size isn't the same size as the region.
--- 393,398 ----
-
Update the version numbers from Berkeley DB 2.6.5 to Berkeley DB 2.6.6.
*** docs/index.html.orig Thu Feb 4 12:02:31 1999
--- docs/index.html Sun Mar 14 20:54:21 1999
***************
*** 10,16 ****
<p><br><p>
! <h1><b>Berkeley DB: version 2.6.5, 02/04/99</b></h1>
<hr size=1 noshade>
--- 10,16 ----
<p><br><p>
! <h1><b>Berkeley DB: version 2.6.6, 03/14/99</b></h1>
<hr size=1 noshade>
*** include/db.h.orig Thu Feb 4 12:02:57 1999
--- include/db.h Sun Mar 14 20:54:53 1999
***************
*** 72,79 ****
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 5
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.5: (02/04/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
--- 72,79 ----
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 6
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.6: (03/14/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
*** README.orig Thu Feb 4 12:05:06 1999
--- README Sun Mar 14 21:00:02 1999
***************
*** 1,8 ****
# @(#)README 10.71 (Sleepycat) 12/16/98
! This is version 2.6.5 (02/04/99) of Sleepycat Software's Berkeley DB
product. To view the documentation for this release, point your web
! browser at the distribution file db-2.6.5/docs/index.html.
Sleepycat Software db@sleepycat.com
394 E. Riding Dr. +1-510-526-3972
--- 1,8 ----
# @(#)README 10.71 (Sleepycat) 12/16/98
! This is version 2.6.6 (03/14/99) of Sleepycat Software's Berkeley DB
product. To view the documentation for this release, point your web
! browser at the distribution file db-2.6.6/docs/index.html.
Sleepycat Software db@sleepycat.com
394 E. Riding Dr. +1-510-526-3972
*** build_win32/libdb.rc.orig Thu Feb 4 12:05:22 1999
--- build_win32/libdb.rc Sun Mar 14 20:55:43 1999
***************
*** 1,6 ****
1 VERSIONINFO
! FILEVERSION 2,0,6,5
! PRODUCTVERSION 2,0,6,5
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
--- 1,6 ----
1 VERSIONINFO
! FILEVERSION 2,0,6,6
! PRODUCTVERSION 2,0,6,6
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
***************
*** 18,29 ****
BEGIN
VALUE "CompanyName", "Sleepycat Software\0"
VALUE "FileDescription", "Berkeley DB 2.0 DLL\0"
! VALUE "FileVersion", "2.6.5\0"
VALUE "InternalName", "libdb.dll\0"
VALUE "LegalCopyright", "Copyright © Sleepycat Software Inc. 1997, 1998\0"
VALUE "OriginalFilename", "libdb.dll\0"
VALUE "ProductName", "Sleepycat Software libdb\0"
! VALUE "ProductVersion", "2.6.5\0"
END
END
BLOCK "VarFileInfo"
--- 18,29 ----
BEGIN
VALUE "CompanyName", "Sleepycat Software\0"
VALUE "FileDescription", "Berkeley DB 2.0 DLL\0"
! VALUE "FileVersion", "2.6.6\0"
VALUE "InternalName", "libdb.dll\0"
VALUE "LegalCopyright", "Copyright © Sleepycat Software Inc. 1997, 1998\0"
VALUE "OriginalFilename", "libdb.dll\0"
VALUE "ProductName", "Sleepycat Software libdb\0"
! VALUE "ProductVersion", "2.6.6\0"
END
END
BLOCK "VarFileInfo"
*** build_vms/db.h.orig Mon Mar 15 18:09:50 1999
--- build_vms/db.h Mon Mar 15 18:10:42 1999
***************
*** 69,76 ****
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 5
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.5: (02/04/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
--- 69,76 ----
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 6
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.6: (03/14/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
*** build_win16/db.h.orig Mon Mar 15 18:09:50 1999
--- build_win16/db.h Mon Mar 15 18:11:05 1999
***************
*** 75,82 ****
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 5
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.5: (02/04/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
--- 75,82 ----
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 6
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.6: (03/14/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
*** build_win32/db.h.orig Mon Mar 15 18:09:50 1999
--- build_win32/db.h Mon Mar 15 18:11:25 1999
***************
*** 75,82 ****
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 5
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.5: (02/04/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
--- 75,82 ----
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 6
! #define DB_VERSION_PATCH 6
! #define DB_VERSION_STRING "Sleepycat Software: Berkeley DB 2.6.6: (03/14/99)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */