mirror of
https://github.com/zebrajr/postgres.git
synced 2025-12-06 12:20:15 +01:00
pgindent run for 9.4
This includes removing tabs after periods in C comments, which was applied to back branches, so this change should not effect backpatching.
This commit is contained in:
parent
fb85cd4320
commit
0a78320057
|
|
@ -2,4 +2,8 @@
|
||||||
* For the raison d'etre of this file, check the comment above the definition
|
* For the raison d'etre of this file, check the comment above the definition
|
||||||
* of the PGAC_C_INLINE macro in config/c-compiler.m4.
|
* of the PGAC_C_INLINE macro in config/c-compiler.m4.
|
||||||
*/
|
*/
|
||||||
static inline int fun () { return 0; }
|
static inline int
|
||||||
|
fun()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -795,6 +795,7 @@ cube_inter(PG_FUNCTION_ARGS)
|
||||||
if (DIM(a) < DIM(b))
|
if (DIM(a) < DIM(b))
|
||||||
{
|
{
|
||||||
NDBOX *tmp = b;
|
NDBOX *tmp = b;
|
||||||
|
|
||||||
b = a;
|
b = a;
|
||||||
a = tmp;
|
a = tmp;
|
||||||
swapped = true;
|
swapped = true;
|
||||||
|
|
@ -1236,14 +1237,14 @@ cube_distance(PG_FUNCTION_ARGS)
|
||||||
/* compute within the dimensions of (b) */
|
/* compute within the dimensions of (b) */
|
||||||
for (i = 0; i < DIM(b); i++)
|
for (i = 0; i < DIM(b); i++)
|
||||||
{
|
{
|
||||||
d = distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i));
|
d = distance_1D(LL_COORD(a, i), UR_COORD(a, i), LL_COORD(b, i), UR_COORD(b, i));
|
||||||
distance += d * d;
|
distance += d * d;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* compute distance to zero for those dimensions in (a) absent in (b) */
|
/* compute distance to zero for those dimensions in (a) absent in (b) */
|
||||||
for (i = DIM(b); i < DIM(a); i++)
|
for (i = DIM(b); i < DIM(a); i++)
|
||||||
{
|
{
|
||||||
d = distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0);
|
d = distance_1D(LL_COORD(a, i), UR_COORD(a, i), 0.0, 0.0);
|
||||||
distance += d * d;
|
distance += d * d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1297,11 +1298,11 @@ cube_is_point_internal(NDBOX *cube)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Even if the point-flag is not set, all the lower-left coordinates
|
* Even if the point-flag is not set, all the lower-left coordinates might
|
||||||
* might match the upper-right coordinates, so that the value is in
|
* match the upper-right coordinates, so that the value is in fact a
|
||||||
* fact a point. Such values don't arise with current code - the point
|
* point. Such values don't arise with current code - the point flag is
|
||||||
* flag is always set if appropriate - but they might be present on-disk
|
* always set if appropriate - but they might be present on-disk in
|
||||||
* in clusters upgraded from pre-9.4 versions.
|
* clusters upgraded from pre-9.4 versions.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < DIM(cube); i++)
|
for (i = 0; i < DIM(cube); i++)
|
||||||
{
|
{
|
||||||
|
|
@ -1317,6 +1318,7 @@ cube_dim(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
NDBOX *c = PG_GETARG_NDBOX(0);
|
NDBOX *c = PG_GETARG_NDBOX(0);
|
||||||
int dim = DIM(c);
|
int dim = DIM(c);
|
||||||
|
|
||||||
PG_FREE_IF_COPY(c, 0);
|
PG_FREE_IF_COPY(c, 0);
|
||||||
PG_RETURN_INT32(dim);
|
PG_RETURN_INT32(dim);
|
||||||
}
|
}
|
||||||
|
|
@ -1330,7 +1332,7 @@ cube_ll_coord(PG_FUNCTION_ARGS)
|
||||||
double result;
|
double result;
|
||||||
|
|
||||||
if (DIM(c) >= n && n > 0)
|
if (DIM(c) >= n && n > 0)
|
||||||
result = Min(LL_COORD(c, n-1), UR_COORD(c, n-1));
|
result = Min(LL_COORD(c, n - 1), UR_COORD(c, n - 1));
|
||||||
else
|
else
|
||||||
result = 0;
|
result = 0;
|
||||||
|
|
||||||
|
|
@ -1347,7 +1349,7 @@ cube_ur_coord(PG_FUNCTION_ARGS)
|
||||||
double result;
|
double result;
|
||||||
|
|
||||||
if (DIM(c) >= n && n > 0)
|
if (DIM(c) >= n && n > 0)
|
||||||
result = Max(LL_COORD(c, n-1), UR_COORD(c, n-1));
|
result = Max(LL_COORD(c, n - 1), UR_COORD(c, n - 1));
|
||||||
else
|
else
|
||||||
result = 0;
|
result = 0;
|
||||||
|
|
||||||
|
|
@ -1382,15 +1384,15 @@ cube_enlarge(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
for (i = 0, j = dim; i < DIM(a); i++, j++)
|
for (i = 0, j = dim; i < DIM(a); i++, j++)
|
||||||
{
|
{
|
||||||
if (LL_COORD(a,i) >= UR_COORD(a,i))
|
if (LL_COORD(a, i) >= UR_COORD(a, i))
|
||||||
{
|
{
|
||||||
result->x[i] = UR_COORD(a,i) - r;
|
result->x[i] = UR_COORD(a, i) - r;
|
||||||
result->x[j] = LL_COORD(a,i) + r;
|
result->x[j] = LL_COORD(a, i) + r;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
result->x[i] = LL_COORD(a,i) - r;
|
result->x[i] = LL_COORD(a, i) - r;
|
||||||
result->x[j] = UR_COORD(a,i) + r;
|
result->x[j] = UR_COORD(a, i) + r;
|
||||||
}
|
}
|
||||||
if (result->x[i] > result->x[j])
|
if (result->x[i] > result->x[j])
|
||||||
{
|
{
|
||||||
|
|
@ -1503,7 +1505,7 @@ cube_c_f8(PG_FUNCTION_ARGS)
|
||||||
result->x[DIM(result) + i] = cube->x[DIM(cube) + i];
|
result->x[DIM(result) + i] = cube->x[DIM(cube) + i];
|
||||||
}
|
}
|
||||||
result->x[DIM(result) - 1] = x;
|
result->x[DIM(result) - 1] = x;
|
||||||
result->x[2*DIM(result) - 1] = x;
|
result->x[2 * DIM(result) - 1] = x;
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FREE_IF_COPY(cube, 0);
|
PG_FREE_IF_COPY(cube, 0);
|
||||||
|
|
@ -1521,7 +1523,8 @@ cube_c_f8_f8(PG_FUNCTION_ARGS)
|
||||||
int size;
|
int size;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (IS_POINT(cube) && (x1 == x2)){
|
if (IS_POINT(cube) && (x1 == x2))
|
||||||
|
{
|
||||||
size = POINT_SIZE((DIM(cube) + 1));
|
size = POINT_SIZE((DIM(cube) + 1));
|
||||||
result = (NDBOX *) palloc0(size);
|
result = (NDBOX *) palloc0(size);
|
||||||
SET_VARSIZE(result, size);
|
SET_VARSIZE(result, size);
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@ static const struct FileFdwOption valid_options[] = {
|
||||||
{"encoding", ForeignTableRelationId},
|
{"encoding", ForeignTableRelationId},
|
||||||
{"force_not_null", AttributeRelationId},
|
{"force_not_null", AttributeRelationId},
|
||||||
{"force_null", AttributeRelationId},
|
{"force_null", AttributeRelationId},
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* force_quote is not supported by file_fdw because it's for COPY TO.
|
* force_quote is not supported by file_fdw because it's for COPY TO.
|
||||||
*/
|
*/
|
||||||
|
|
@ -253,6 +254,7 @@ file_fdw_validator(PG_FUNCTION_ARGS)
|
||||||
errmsg("conflicting or redundant options")));
|
errmsg("conflicting or redundant options")));
|
||||||
filename = defGetString(def);
|
filename = defGetString(def);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* force_not_null is a boolean option; after validation we can discard
|
* force_not_null is a boolean option; after validation we can discard
|
||||||
* it - it will be retrieved later in get_file_fdw_attribute_options()
|
* it - it will be retrieved later in get_file_fdw_attribute_options()
|
||||||
|
|
@ -443,12 +445,15 @@ get_file_fdw_attribute_options(Oid relid)
|
||||||
|
|
||||||
heap_close(rel, AccessShareLock);
|
heap_close(rel, AccessShareLock);
|
||||||
|
|
||||||
/* Return DefElem only when some column(s) have force_not_null / force_null options set */
|
/*
|
||||||
|
* Return DefElem only when some column(s) have force_not_null /
|
||||||
|
* force_null options set
|
||||||
|
*/
|
||||||
if (fnncolumns != NIL)
|
if (fnncolumns != NIL)
|
||||||
options = lappend(options, makeDefElem("force_not_null", (Node *) fnncolumns));
|
options = lappend(options, makeDefElem("force_not_null", (Node *) fnncolumns));
|
||||||
|
|
||||||
if (fncolumns != NIL)
|
if (fncolumns != NIL)
|
||||||
options = lappend(options,makeDefElem("force_null", (Node *) fncolumns));
|
options = lappend(options, makeDefElem("force_null", (Node *) fncolumns));
|
||||||
|
|
||||||
return options;
|
return options;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1245,7 +1245,7 @@ hstore_to_json_loose(PG_FUNCTION_ARGS)
|
||||||
dst;
|
dst;
|
||||||
|
|
||||||
if (count == 0)
|
if (count == 0)
|
||||||
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}",2));
|
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
|
||||||
|
|
||||||
initStringInfo(&tmp);
|
initStringInfo(&tmp);
|
||||||
initStringInfo(&dst);
|
initStringInfo(&dst);
|
||||||
|
|
@ -1335,7 +1335,7 @@ hstore_to_json(PG_FUNCTION_ARGS)
|
||||||
dst;
|
dst;
|
||||||
|
|
||||||
if (count == 0)
|
if (count == 0)
|
||||||
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}",2));
|
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
|
||||||
|
|
||||||
initStringInfo(&tmp);
|
initStringInfo(&tmp);
|
||||||
initStringInfo(&dst);
|
initStringInfo(&dst);
|
||||||
|
|
@ -1381,7 +1381,8 @@ hstore_to_jsonb(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
for (i = 0; i < count; i++)
|
for (i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
JsonbValue key, val;
|
JsonbValue key,
|
||||||
|
val;
|
||||||
|
|
||||||
key.estSize = sizeof(JEntry);
|
key.estSize = sizeof(JEntry);
|
||||||
key.type = jbvString;
|
key.type = jbvString;
|
||||||
|
|
@ -1432,7 +1433,8 @@ hstore_to_jsonb_loose(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
for (i = 0; i < count; i++)
|
for (i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
JsonbValue key, val;
|
JsonbValue key,
|
||||||
|
val;
|
||||||
|
|
||||||
key.estSize = sizeof(JEntry);
|
key.estSize = sizeof(JEntry);
|
||||||
key.type = jbvString;
|
key.type = jbvString;
|
||||||
|
|
@ -1508,6 +1510,7 @@ hstore_to_jsonb_loose(PG_FUNCTION_ARGS)
|
||||||
val.type = jbvNumeric;
|
val.type = jbvNumeric;
|
||||||
val.val.numeric = DatumGetNumeric(
|
val.val.numeric = DatumGetNumeric(
|
||||||
DirectFunctionCall3(numeric_in, CStringGetDatum(tmp.data), 0, -1));
|
DirectFunctionCall3(numeric_in, CStringGetDatum(tmp.data), 0, -1));
|
||||||
|
|
||||||
val.estSize += VARSIZE_ANY(val.val.numeric) +sizeof(JEntry);
|
val.estSize += VARSIZE_ANY(val.val.numeric) +sizeof(JEntry);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -209,6 +209,7 @@ page_header(PG_FUNCTION_ARGS)
|
||||||
if (tupdesc->attrs[0]->atttypid == TEXTOID)
|
if (tupdesc->attrs[0]->atttypid == TEXTOID)
|
||||||
{
|
{
|
||||||
char lsnchar[64];
|
char lsnchar[64];
|
||||||
|
|
||||||
snprintf(lsnchar, sizeof(lsnchar), "%X/%X",
|
snprintf(lsnchar, sizeof(lsnchar), "%X/%X",
|
||||||
(uint32) (lsn >> 32), (uint32) lsn);
|
(uint32) (lsn >> 32), (uint32) lsn);
|
||||||
values[0] = CStringGetTextDatum(lsnchar);
|
values[0] = CStringGetTextDatum(lsnchar);
|
||||||
|
|
|
||||||
|
|
@ -369,11 +369,12 @@ test_sync(int writes_per_op)
|
||||||
{
|
{
|
||||||
for (writes = 0; writes < writes_per_op; writes++)
|
for (writes = 0; writes < writes_per_op; writes++)
|
||||||
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This can generate write failures if the filesystem
|
* This can generate write failures if the filesystem has
|
||||||
* has a large block size, e.g. 4k, and there is no
|
* a large block size, e.g. 4k, and there is no support
|
||||||
* support for O_DIRECT writes smaller than the
|
* for O_DIRECT writes smaller than the file system block
|
||||||
* file system block size, e.g. XFS.
|
* size, e.g. XFS.
|
||||||
*/
|
*/
|
||||||
die("write failed");
|
die("write failed");
|
||||||
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
||||||
|
|
|
||||||
|
|
@ -34,8 +34,8 @@ generate_old_dump(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set umask for this function, all functions it calls, and all
|
* Set umask for this function, all functions it calls, and all
|
||||||
* subprocesses/threads it creates. We can't use fopen_priv()
|
* subprocesses/threads it creates. We can't use fopen_priv() as Windows
|
||||||
* as Windows uses threads and umask is process-global.
|
* uses threads and umask is process-global.
|
||||||
*/
|
*/
|
||||||
old_umask = umask(S_IRWXG | S_IRWXO);
|
old_umask = umask(S_IRWXG | S_IRWXO);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ exec_prog(const char *log_file, const char *opt_log_file,
|
||||||
va_list ap;
|
va_list ap;
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
static DWORD mainThreadId = 0;
|
static DWORD mainThreadId = 0;
|
||||||
|
|
||||||
/* We assume we are called from the primary thread first */
|
/* We assume we are called from the primary thread first */
|
||||||
if (mainThreadId == 0)
|
if (mainThreadId == 0)
|
||||||
|
|
@ -73,14 +73,15 @@ static DWORD mainThreadId = 0;
|
||||||
pg_log(PG_VERBOSE, "%s\n", cmd);
|
pg_log(PG_VERBOSE, "%s\n", cmd);
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For some reason, Windows issues a file-in-use error if we write data
|
* For some reason, Windows issues a file-in-use error if we write data to
|
||||||
* to the log file from a non-primary thread just before we create a
|
* the log file from a non-primary thread just before we create a
|
||||||
* subprocess that also writes to the same log file. One fix is to
|
* subprocess that also writes to the same log file. One fix is to sleep
|
||||||
* sleep for 100ms. A cleaner fix is to write to the log file _after_
|
* for 100ms. A cleaner fix is to write to the log file _after_ the
|
||||||
* the subprocess has completed, so we do this only when writing from
|
* subprocess has completed, so we do this only when writing from a
|
||||||
* a non-primary thread. fflush(), running system() twice, and
|
* non-primary thread. fflush(), running system() twice, and pre-creating
|
||||||
* pre-creating the file do not see to help.
|
* the file do not see to help.
|
||||||
*/
|
*/
|
||||||
if (mainThreadId != GetCurrentThreadId())
|
if (mainThreadId != GetCurrentThreadId())
|
||||||
result = system(cmd);
|
result = system(cmd);
|
||||||
|
|
@ -154,6 +155,7 @@ static DWORD mainThreadId = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can't do this on Windows because it will keep the "pg_ctl start"
|
* We can't do this on Windows because it will keep the "pg_ctl start"
|
||||||
* output filename open until the server stops, so we do the \n\n above on
|
* output filename open until the server stops, so we do the \n\n above on
|
||||||
|
|
|
||||||
|
|
@ -270,7 +270,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
|
||||||
i_relfilenode,
|
i_relfilenode,
|
||||||
i_reltablespace;
|
i_reltablespace;
|
||||||
char query[QUERY_ALLOC];
|
char query[QUERY_ALLOC];
|
||||||
char *last_namespace = NULL, *last_tablespace = NULL;
|
char *last_namespace = NULL,
|
||||||
|
*last_tablespace = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pg_largeobject contains user data that does not appear in pg_dumpall
|
* pg_largeobject contains user data that does not appear in pg_dumpall
|
||||||
|
|
@ -373,9 +374,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
|
||||||
curr->nsp_alloc = false;
|
curr->nsp_alloc = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Many of the namespace and tablespace strings are identical,
|
* Many of the namespace and tablespace strings are identical, so we
|
||||||
* so we try to reuse the allocated string pointers where possible
|
* try to reuse the allocated string pointers where possible to reduce
|
||||||
* to reduce memory consumption.
|
* memory consumption.
|
||||||
*/
|
*/
|
||||||
/* Can we reuse the previous string allocation? */
|
/* Can we reuse the previous string allocation? */
|
||||||
if (last_namespace && strcmp(nspname, last_namespace) == 0)
|
if (last_namespace && strcmp(nspname, last_namespace) == 0)
|
||||||
|
|
|
||||||
|
|
@ -213,6 +213,7 @@ parseCommandLine(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char *pgoptions = psprintf("%s %s", FIX_DEFAULT_READ_ONLY,
|
char *pgoptions = psprintf("%s %s", FIX_DEFAULT_READ_ONLY,
|
||||||
getenv("PGOPTIONS"));
|
getenv("PGOPTIONS"));
|
||||||
|
|
||||||
pg_putenv("PGOPTIONS", pgoptions);
|
pg_putenv("PGOPTIONS", pgoptions);
|
||||||
pfree(pgoptions);
|
pfree(pgoptions);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -339,10 +339,10 @@ reap_child(bool wait_for_child)
|
||||||
thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
|
thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Move last active thead arg struct into the now-dead slot,
|
* Move last active thead arg struct into the now-dead slot, and the
|
||||||
* and the now-dead slot to the end for reuse by the next thread.
|
* now-dead slot to the end for reuse by the next thread. Though the
|
||||||
* Though the thread struct is in use by another thread, we can
|
* thread struct is in use by another thread, we can safely swap the
|
||||||
* safely swap the struct pointers within the array.
|
* struct pointers within the array.
|
||||||
*/
|
*/
|
||||||
tmp_args = cur_thread_args[thread_num];
|
tmp_args = cur_thread_args[thread_num];
|
||||||
cur_thread_args[thread_num] = cur_thread_args[parallel_jobs - 1];
|
cur_thread_args[thread_num] = cur_thread_args[parallel_jobs - 1];
|
||||||
|
|
|
||||||
|
|
@ -167,7 +167,8 @@ typedef struct
|
||||||
{
|
{
|
||||||
Oid db_oid; /* oid of the database */
|
Oid db_oid; /* oid of the database */
|
||||||
char *db_name; /* database name */
|
char *db_name; /* database name */
|
||||||
char db_tablespace[MAXPGPATH]; /* database default tablespace path */
|
char db_tablespace[MAXPGPATH]; /* database default tablespace
|
||||||
|
* path */
|
||||||
RelInfoArr rel_arr; /* array of all user relinfos */
|
RelInfoArr rel_arr; /* array of all user relinfos */
|
||||||
} DbInfo;
|
} DbInfo;
|
||||||
|
|
||||||
|
|
@ -454,7 +455,7 @@ pg_log(eLogType type, const char *fmt,...)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||||
void
|
void
|
||||||
pg_fatal(const char *fmt,...)
|
pg_fatal(const char *fmt,...)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2),noreturn));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2), noreturn));
|
||||||
void end_progress_output(void);
|
void end_progress_output(void);
|
||||||
void
|
void
|
||||||
prep_status(const char *fmt,...)
|
prep_status(const char *fmt,...)
|
||||||
|
|
|
||||||
|
|
@ -240,28 +240,26 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We set this here to make sure atexit() shuts down the server,
|
* We set this here to make sure atexit() shuts down the server, but only
|
||||||
* but only if we started the server successfully. We do it
|
* if we started the server successfully. We do it before checking for
|
||||||
* before checking for connectivity in case the server started but
|
* connectivity in case the server started but there is a connectivity
|
||||||
* there is a connectivity failure. If pg_ctl did not return success,
|
* failure. If pg_ctl did not return success, we will exit below.
|
||||||
* we will exit below.
|
|
||||||
*
|
*
|
||||||
* Pre-9.1 servers do not have PQping(), so we could be leaving the server
|
* Pre-9.1 servers do not have PQping(), so we could be leaving the server
|
||||||
* running if authentication was misconfigured, so someday we might went to
|
* running if authentication was misconfigured, so someday we might went
|
||||||
* be more aggressive about doing server shutdowns even if pg_ctl fails,
|
* to be more aggressive about doing server shutdowns even if pg_ctl
|
||||||
* but now (2013-08-14) it seems prudent to be cautious. We don't want to
|
* fails, but now (2013-08-14) it seems prudent to be cautious. We don't
|
||||||
* shutdown a server that might have been accidentally started during the
|
* want to shutdown a server that might have been accidentally started
|
||||||
* upgrade.
|
* during the upgrade.
|
||||||
*/
|
*/
|
||||||
if (pg_ctl_return)
|
if (pg_ctl_return)
|
||||||
os_info.running_cluster = cluster;
|
os_info.running_cluster = cluster;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pg_ctl -w might have failed because the server couldn't be started,
|
* pg_ctl -w might have failed because the server couldn't be started, or
|
||||||
* or there might have been a connection problem in _checking_ if the
|
* there might have been a connection problem in _checking_ if the server
|
||||||
* server has started. Therefore, even if pg_ctl failed, we continue
|
* has started. Therefore, even if pg_ctl failed, we continue and test
|
||||||
* and test for connectivity in case we get a connection reason for the
|
* for connectivity in case we get a connection reason for the failure.
|
||||||
* failure.
|
|
||||||
*/
|
*/
|
||||||
if ((conn = get_db_conn(cluster, "template1")) == NULL ||
|
if ((conn = get_db_conn(cluster, "template1")) == NULL ||
|
||||||
PQstatus(conn) != CONNECTION_OK)
|
PQstatus(conn) != CONNECTION_OK)
|
||||||
|
|
@ -278,7 +276,8 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If pg_ctl failed, and the connection didn't fail, and throw_error is
|
* If pg_ctl failed, and the connection didn't fail, and throw_error is
|
||||||
* enabled, fail now. This could happen if the server was already running.
|
* enabled, fail now. This could happen if the server was already
|
||||||
|
* running.
|
||||||
*/
|
*/
|
||||||
if (!pg_ctl_return)
|
if (!pg_ctl_return)
|
||||||
pg_fatal("pg_ctl failed to start the %s server, or connection failed\n",
|
pg_fatal("pg_ctl failed to start the %s server, or connection failed\n",
|
||||||
|
|
|
||||||
|
|
@ -78,10 +78,9 @@ get_tablespace_paths(void)
|
||||||
* Effectively, this is checking only for tables/indexes in
|
* Effectively, this is checking only for tables/indexes in
|
||||||
* non-existent tablespace directories. Databases located in
|
* non-existent tablespace directories. Databases located in
|
||||||
* non-existent tablespaces already throw a backend error.
|
* non-existent tablespaces already throw a backend error.
|
||||||
* Non-existent tablespace directories can occur when a data
|
* Non-existent tablespace directories can occur when a data directory
|
||||||
* directory that contains user tablespaces is moved as part
|
* that contains user tablespaces is moved as part of pg_upgrade
|
||||||
* of pg_upgrade preparation and the symbolic links are not
|
* preparation and the symbolic links are not updated.
|
||||||
* updated.
|
|
||||||
*/
|
*/
|
||||||
if (stat(os_info.old_tablespaces[tblnum], &statBuf) != 0)
|
if (stat(os_info.old_tablespaces[tblnum], &statBuf) != 0)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ prep_status(const char *fmt,...)
|
||||||
|
|
||||||
|
|
||||||
static
|
static
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 0)))
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 0)))
|
||||||
void
|
void
|
||||||
pg_log_v(eLogType type, const char *fmt, va_list ap)
|
pg_log_v(eLogType type, const char *fmt, va_list ap)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -163,8 +163,10 @@ bool use_quiet; /* quiet logging onto stderr */
|
||||||
int agg_interval; /* log aggregates instead of individual
|
int agg_interval; /* log aggregates instead of individual
|
||||||
* transactions */
|
* transactions */
|
||||||
int progress = 0; /* thread progress report every this seconds */
|
int progress = 0; /* thread progress report every this seconds */
|
||||||
int progress_nclients = 0; /* number of clients for progress report */
|
int progress_nclients = 0; /* number of clients for progress
|
||||||
int progress_nthreads = 0; /* number of threads for progress report */
|
* report */
|
||||||
|
int progress_nthreads = 0; /* number of threads for progress
|
||||||
|
* report */
|
||||||
bool is_connect; /* establish connection for each transaction */
|
bool is_connect; /* establish connection for each transaction */
|
||||||
bool is_latencies; /* report per-command latencies */
|
bool is_latencies; /* report per-command latencies */
|
||||||
int main_pid; /* main process id used in log filename */
|
int main_pid; /* main process id used in log filename */
|
||||||
|
|
@ -913,28 +915,28 @@ top:
|
||||||
commands = sql_files[st->use_file];
|
commands = sql_files[st->use_file];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle throttling once per transaction by sleeping. It is simpler
|
* Handle throttling once per transaction by sleeping. It is simpler to
|
||||||
* to do this here rather than at the end, because so much complicated
|
* do this here rather than at the end, because so much complicated logic
|
||||||
* logic happens below when statements finish.
|
* happens below when statements finish.
|
||||||
*/
|
*/
|
||||||
if (throttle_delay && ! st->is_throttled)
|
if (throttle_delay && !st->is_throttled)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Use inverse transform sampling to randomly generate a delay, such
|
* Use inverse transform sampling to randomly generate a delay, such
|
||||||
* that the series of delays will approximate a Poisson distribution
|
* that the series of delays will approximate a Poisson distribution
|
||||||
* centered on the throttle_delay time.
|
* centered on the throttle_delay time.
|
||||||
*
|
*
|
||||||
* 10000 implies a 9.2 (-log(1/10000)) to 0.0 (log 1) delay multiplier,
|
* 10000 implies a 9.2 (-log(1/10000)) to 0.0 (log 1) delay
|
||||||
* and results in a 0.055 % target underestimation bias:
|
* multiplier, and results in a 0.055 % target underestimation bias:
|
||||||
*
|
*
|
||||||
* SELECT 1.0/AVG(-LN(i/10000.0)) FROM generate_series(1,10000) AS i;
|
* SELECT 1.0/AVG(-LN(i/10000.0)) FROM generate_series(1,10000) AS i;
|
||||||
* = 1.000552717032611116335474
|
* = 1.000552717032611116335474
|
||||||
*
|
*
|
||||||
* If transactions are too slow or a given wait is shorter than
|
* If transactions are too slow or a given wait is shorter than a
|
||||||
* a transaction, the next transaction will start right away.
|
* transaction, the next transaction will start right away.
|
||||||
*/
|
*/
|
||||||
int64 wait = (int64) (throttle_delay *
|
int64 wait = (int64) (throttle_delay *
|
||||||
1.00055271703 * -log(getrand(thread, 1, 10000)/10000.0));
|
1.00055271703 * -log(getrand(thread, 1, 10000) / 10000.0));
|
||||||
|
|
||||||
thread->throttle_trigger += wait;
|
thread->throttle_trigger += wait;
|
||||||
|
|
||||||
|
|
@ -943,7 +945,7 @@ top:
|
||||||
st->throttling = true;
|
st->throttling = true;
|
||||||
st->is_throttled = true;
|
st->is_throttled = true;
|
||||||
if (debug)
|
if (debug)
|
||||||
fprintf(stderr, "client %d throttling "INT64_FORMAT" us\n",
|
fprintf(stderr, "client %d throttling " INT64_FORMAT " us\n",
|
||||||
st->id, wait);
|
st->id, wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -961,6 +963,7 @@ top:
|
||||||
{
|
{
|
||||||
/* Measure lag of throttled transaction relative to target */
|
/* Measure lag of throttled transaction relative to target */
|
||||||
int64 lag = now_us - st->until;
|
int64 lag = now_us - st->until;
|
||||||
|
|
||||||
thread->throttle_lag += lag;
|
thread->throttle_lag += lag;
|
||||||
if (lag > thread->throttle_lag_max)
|
if (lag > thread->throttle_lag_max)
|
||||||
thread->throttle_lag_max = lag;
|
thread->throttle_lag_max = lag;
|
||||||
|
|
@ -1011,6 +1014,7 @@ top:
|
||||||
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
|
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
|
||||||
latency = INSTR_TIME_GET_MICROSEC(diff);
|
latency = INSTR_TIME_GET_MICROSEC(diff);
|
||||||
st->txn_latencies += latency;
|
st->txn_latencies += latency;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX In a long benchmark run of high-latency transactions, this
|
* XXX In a long benchmark run of high-latency transactions, this
|
||||||
* int64 addition eventually overflows. For example, 100 threads
|
* int64 addition eventually overflows. For example, 100 threads
|
||||||
|
|
@ -1174,14 +1178,16 @@ top:
|
||||||
st->use_file = (int) getrand(thread, 0, num_files - 1);
|
st->use_file = (int) getrand(thread, 0, num_files - 1);
|
||||||
commands = sql_files[st->use_file];
|
commands = sql_files[st->use_file];
|
||||||
st->is_throttled = false;
|
st->is_throttled = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No transaction is underway anymore, which means there is nothing
|
* No transaction is underway anymore, which means there is
|
||||||
* to listen to right now. When throttling rate limits are active,
|
* nothing to listen to right now. When throttling rate limits
|
||||||
* a sleep will happen next, as the next transaction starts. And
|
* are active, a sleep will happen next, as the next transaction
|
||||||
* then in any case the next SQL command will set listen back to 1.
|
* starts. And then in any case the next SQL command will set
|
||||||
|
* listen back to 1.
|
||||||
*/
|
*/
|
||||||
st->listen = 0;
|
st->listen = 0;
|
||||||
trans_needs_throttle = (throttle_delay>0);
|
trans_needs_throttle = (throttle_delay > 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1201,11 +1207,12 @@ top:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This ensures that a throttling delay is inserted before proceeding
|
* This ensures that a throttling delay is inserted before proceeding with
|
||||||
* with sql commands, after the first transaction. The first transaction
|
* sql commands, after the first transaction. The first transaction
|
||||||
* throttling is performed when first entering doCustom.
|
* throttling is performed when first entering doCustom.
|
||||||
*/
|
*/
|
||||||
if (trans_needs_throttle) {
|
if (trans_needs_throttle)
|
||||||
|
{
|
||||||
trans_needs_throttle = false;
|
trans_needs_throttle = false;
|
||||||
goto top;
|
goto top;
|
||||||
}
|
}
|
||||||
|
|
@ -1553,12 +1560,12 @@ init(bool is_no_vacuum)
|
||||||
* Note: TPC-B requires at least 100 bytes per row, and the "filler"
|
* Note: TPC-B requires at least 100 bytes per row, and the "filler"
|
||||||
* fields in these table declarations were intended to comply with that.
|
* fields in these table declarations were intended to comply with that.
|
||||||
* The pgbench_accounts table complies with that because the "filler"
|
* The pgbench_accounts table complies with that because the "filler"
|
||||||
* column is set to blank-padded empty string. But for all other tables the
|
* column is set to blank-padded empty string. But for all other tables
|
||||||
* column defaults to NULL and so don't actually take any space. We could
|
* the column defaults to NULL and so don't actually take any space. We
|
||||||
* fix that by giving them non-null default values. However, that would
|
* could fix that by giving them non-null default values. However, that
|
||||||
* completely break comparability of pgbench results with prior versions.
|
* would completely break comparability of pgbench results with prior
|
||||||
* Since pgbench has never pretended to be fully TPC-B compliant anyway, we
|
* versions. Since pgbench has never pretended to be fully TPC-B compliant
|
||||||
* stick with the historical behavior.
|
* anyway, we stick with the historical behavior.
|
||||||
*/
|
*/
|
||||||
struct ddlinfo
|
struct ddlinfo
|
||||||
{
|
{
|
||||||
|
|
@ -2211,6 +2218,7 @@ printResults(int ttype, int normal_xacts, int nclients,
|
||||||
/* compute and show latency average and standard deviation */
|
/* compute and show latency average and standard deviation */
|
||||||
double latency = 0.001 * total_latencies / normal_xacts;
|
double latency = 0.001 * total_latencies / normal_xacts;
|
||||||
double sqlat = (double) total_sqlats / normal_xacts;
|
double sqlat = (double) total_sqlats / normal_xacts;
|
||||||
|
|
||||||
printf("latency average: %.3f ms\n"
|
printf("latency average: %.3f ms\n"
|
||||||
"latency stddev: %.3f ms\n",
|
"latency stddev: %.3f ms\n",
|
||||||
latency, 0.001 * sqrt(sqlat - 1000000.0 * latency * latency));
|
latency, 0.001 * sqrt(sqlat - 1000000.0 * latency * latency));
|
||||||
|
|
@ -2288,7 +2296,7 @@ int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
/* systematic long/short named options*/
|
/* systematic long/short named options */
|
||||||
{"client", required_argument, NULL, 'c'},
|
{"client", required_argument, NULL, 'c'},
|
||||||
{"connect", no_argument, NULL, 'C'},
|
{"connect", no_argument, NULL, 'C'},
|
||||||
{"debug", no_argument, NULL, 'd'},
|
{"debug", no_argument, NULL, 'd'},
|
||||||
|
|
@ -2559,6 +2567,7 @@ main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
/* get a double from the beginning of option value */
|
/* get a double from the beginning of option value */
|
||||||
double throttle_value = atof(optarg);
|
double throttle_value = atof(optarg);
|
||||||
|
|
||||||
if (throttle_value <= 0.0)
|
if (throttle_value <= 0.0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "invalid rate limit: %s\n", optarg);
|
fprintf(stderr, "invalid rate limit: %s\n", optarg);
|
||||||
|
|
@ -2963,11 +2972,15 @@ threadRun(void *arg)
|
||||||
int nstate = thread->nstate;
|
int nstate = thread->nstate;
|
||||||
int remains = nstate; /* number of remaining clients */
|
int remains = nstate; /* number of remaining clients */
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* for reporting progress: */
|
/* for reporting progress: */
|
||||||
int64 thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
|
int64 thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
|
||||||
int64 last_report = thread_start;
|
int64 last_report = thread_start;
|
||||||
int64 next_report = last_report + (int64) progress * 1000000;
|
int64 next_report = last_report + (int64) progress * 1000000;
|
||||||
int64 last_count = 0, last_lats = 0, last_sqlats = 0, last_lags = 0;
|
int64 last_count = 0,
|
||||||
|
last_lats = 0,
|
||||||
|
last_sqlats = 0,
|
||||||
|
last_lags = 0;
|
||||||
|
|
||||||
AggVals aggs;
|
AggVals aggs;
|
||||||
|
|
||||||
|
|
@ -3162,17 +3175,25 @@ threadRun(void *arg)
|
||||||
{
|
{
|
||||||
instr_time now_time;
|
instr_time now_time;
|
||||||
int64 now;
|
int64 now;
|
||||||
|
|
||||||
INSTR_TIME_SET_CURRENT(now_time);
|
INSTR_TIME_SET_CURRENT(now_time);
|
||||||
now = INSTR_TIME_GET_MICROSEC(now_time);
|
now = INSTR_TIME_GET_MICROSEC(now_time);
|
||||||
if (now >= next_report)
|
if (now >= next_report)
|
||||||
{
|
{
|
||||||
/* generate and show report */
|
/* generate and show report */
|
||||||
int64 count = 0, lats = 0, sqlats = 0;
|
int64 count = 0,
|
||||||
|
lats = 0,
|
||||||
|
sqlats = 0;
|
||||||
int64 lags = thread->throttle_lag;
|
int64 lags = thread->throttle_lag;
|
||||||
int64 run = now - last_report;
|
int64 run = now - last_report;
|
||||||
double tps, total_run, latency, sqlat, stdev, lag;
|
double tps,
|
||||||
|
total_run,
|
||||||
|
latency,
|
||||||
|
sqlat,
|
||||||
|
stdev,
|
||||||
|
lag;
|
||||||
|
|
||||||
for (i = 0 ; i < nstate ; i++)
|
for (i = 0; i < nstate; i++)
|
||||||
{
|
{
|
||||||
count += state[i].cnt;
|
count += state[i].cnt;
|
||||||
lats += state[i].txn_latencies;
|
lats += state[i].txn_latencies;
|
||||||
|
|
@ -3202,7 +3223,7 @@ threadRun(void *arg)
|
||||||
last_sqlats = sqlats;
|
last_sqlats = sqlats;
|
||||||
last_lags = lags;
|
last_lags = lags;
|
||||||
last_report = now;
|
last_report = now;
|
||||||
next_report += (int64) progress * 1000000;
|
next_report += (int64) progress *1000000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
@ -3211,23 +3232,32 @@ threadRun(void *arg)
|
||||||
{
|
{
|
||||||
instr_time now_time;
|
instr_time now_time;
|
||||||
int64 now;
|
int64 now;
|
||||||
|
|
||||||
INSTR_TIME_SET_CURRENT(now_time);
|
INSTR_TIME_SET_CURRENT(now_time);
|
||||||
now = INSTR_TIME_GET_MICROSEC(now_time);
|
now = INSTR_TIME_GET_MICROSEC(now_time);
|
||||||
if (now >= next_report)
|
if (now >= next_report)
|
||||||
{
|
{
|
||||||
/* generate and show report */
|
/* generate and show report */
|
||||||
int64 count = 0, lats = 0, sqlats = 0, lags = 0;
|
int64 count = 0,
|
||||||
|
lats = 0,
|
||||||
|
sqlats = 0,
|
||||||
|
lags = 0;
|
||||||
int64 run = now - last_report;
|
int64 run = now - last_report;
|
||||||
double tps, total_run, latency, sqlat, lag, stdev;
|
double tps,
|
||||||
|
total_run,
|
||||||
|
latency,
|
||||||
|
sqlat,
|
||||||
|
lag,
|
||||||
|
stdev;
|
||||||
|
|
||||||
for (i = 0 ; i < progress_nclients ; i++)
|
for (i = 0; i < progress_nclients; i++)
|
||||||
{
|
{
|
||||||
count += state[i].cnt;
|
count += state[i].cnt;
|
||||||
lats += state[i].txn_latencies;
|
lats += state[i].txn_latencies;
|
||||||
sqlats += state[i].txn_sqlats;
|
sqlats += state[i].txn_sqlats;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0 ; i < progress_nthreads ; i++)
|
for (i = 0; i < progress_nthreads; i++)
|
||||||
lags += thread[i].throttle_lag;
|
lags += thread[i].throttle_lag;
|
||||||
|
|
||||||
total_run = (now - thread_start) / 1000000.0;
|
total_run = (now - thread_start) / 1000000.0;
|
||||||
|
|
@ -3253,7 +3283,7 @@ threadRun(void *arg)
|
||||||
last_sqlats = sqlats;
|
last_sqlats = sqlats;
|
||||||
last_lags = lags;
|
last_lags = lags;
|
||||||
last_report = now;
|
last_report = now;
|
||||||
next_report += (int64) progress * 1000000;
|
next_report += (int64) progress *1000000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* PTHREAD_FORK_EMULATION */
|
#endif /* PTHREAD_FORK_EMULATION */
|
||||||
|
|
|
||||||
|
|
@ -429,8 +429,8 @@ bf_init(PX_Cipher *c, const uint8 *key, unsigned klen, const uint8 *iv)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test if key len is supported. BF_set_key silently cut large keys and it
|
* Test if key len is supported. BF_set_key silently cut large keys and it
|
||||||
* could be a problem when user transfer crypted data from one server
|
* could be a problem when user transfer crypted data from one server to
|
||||||
* to another.
|
* another.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (bf_is_strong == -1)
|
if (bf_is_strong == -1)
|
||||||
|
|
|
||||||
|
|
@ -319,6 +319,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Datum val; /* definitely detoasted Datum */
|
Datum val; /* definitely detoasted Datum */
|
||||||
|
|
||||||
val = PointerGetDatum(PG_DETOAST_DATUM(origval));
|
val = PointerGetDatum(PG_DETOAST_DATUM(origval));
|
||||||
print_literal(s, typid, OidOutputFunctionCall(typoutput, val));
|
print_literal(s, typid, OidOutputFunctionCall(typoutput, val));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -69,8 +69,8 @@ test_shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
|
||||||
wait_for_workers_to_become_ready(wstate, hdr);
|
wait_for_workers_to_become_ready(wstate, hdr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Once we reach this point, all workers are ready. We no longer need
|
* Once we reach this point, all workers are ready. We no longer need to
|
||||||
* to kill them if we die; they'll die on their own as the message queues
|
* kill them if we die; they'll die on their own as the message queues
|
||||||
* shut down.
|
* shut down.
|
||||||
*/
|
*/
|
||||||
cancel_on_dsm_detach(seg, cleanup_background_workers,
|
cancel_on_dsm_detach(seg, cleanup_background_workers,
|
||||||
|
|
@ -194,16 +194,16 @@ setup_background_workers(int nworkers, dsm_segment *seg)
|
||||||
* Arrange to kill all the workers if we abort before all workers are
|
* Arrange to kill all the workers if we abort before all workers are
|
||||||
* finished hooking themselves up to the dynamic shared memory segment.
|
* finished hooking themselves up to the dynamic shared memory segment.
|
||||||
*
|
*
|
||||||
* If we die after all the workers have finished hooking themselves up
|
* If we die after all the workers have finished hooking themselves up to
|
||||||
* to the dynamic shared memory segment, we'll mark the two queues to
|
* the dynamic shared memory segment, we'll mark the two queues to which
|
||||||
* which we're directly connected as detached, and the worker(s)
|
* we're directly connected as detached, and the worker(s) connected to
|
||||||
* connected to those queues will exit, marking any other queues to
|
* those queues will exit, marking any other queues to which they are
|
||||||
* which they are connected as detached. This will cause any
|
* connected as detached. This will cause any as-yet-unaware workers
|
||||||
* as-yet-unaware workers connected to those queues to exit in their
|
* connected to those queues to exit in their turn, and so on, until
|
||||||
* turn, and so on, until everybody exits.
|
* everybody exits.
|
||||||
*
|
*
|
||||||
* But suppose the workers which are supposed to connect to the queues
|
* But suppose the workers which are supposed to connect to the queues to
|
||||||
* to which we're directly attached exit due to some error before they
|
* which we're directly attached exit due to some error before they
|
||||||
* actually attach the queues. The remaining workers will have no way of
|
* actually attach the queues. The remaining workers will have no way of
|
||||||
* knowing this. From their perspective, they're still waiting for those
|
* knowing this. From their perspective, they're still waiting for those
|
||||||
* workers to start, when in fact they've already died.
|
* workers to start, when in fact they've already died.
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,7 @@
|
||||||
|
|
||||||
#include "test_shm_mq.h"
|
#include "test_shm_mq.h"
|
||||||
|
|
||||||
PG_MODULE_MAGIC;
|
PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(test_shm_mq);
|
||||||
PG_FUNCTION_INFO_V1(test_shm_mq);
|
|
||||||
PG_FUNCTION_INFO_V1(test_shm_mq_pipelined);
|
PG_FUNCTION_INFO_V1(test_shm_mq_pipelined);
|
||||||
|
|
||||||
void _PG_init(void);
|
void _PG_init(void);
|
||||||
|
|
@ -59,8 +58,8 @@ test_shm_mq(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since this test sends data using the blocking interfaces, it cannot
|
* Since this test sends data using the blocking interfaces, it cannot
|
||||||
* send data to itself. Therefore, a minimum of 1 worker is required.
|
* send data to itself. Therefore, a minimum of 1 worker is required. Of
|
||||||
* Of course, a negative worker count is nonsensical.
|
* course, a negative worker count is nonsensical.
|
||||||
*/
|
*/
|
||||||
if (nworkers < 1)
|
if (nworkers < 1)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
|
|
@ -224,10 +223,10 @@ test_shm_mq_pipelined(PG_FUNCTION_ARGS)
|
||||||
if (wait)
|
if (wait)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If we made no progress, wait for one of the other processes
|
* If we made no progress, wait for one of the other processes to
|
||||||
* to which we are connected to set our latch, indicating that
|
* which we are connected to set our latch, indicating that they
|
||||||
* they have read or written data and therefore there may now be
|
* have read or written data and therefore there may now be work
|
||||||
* work for us to do.
|
* for us to do.
|
||||||
*/
|
*/
|
||||||
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
|
||||||
|
|
@ -58,12 +58,12 @@ test_shm_mq_main(Datum main_arg)
|
||||||
/*
|
/*
|
||||||
* Establish signal handlers.
|
* Establish signal handlers.
|
||||||
*
|
*
|
||||||
* We want CHECK_FOR_INTERRUPTS() to kill off this worker process just
|
* We want CHECK_FOR_INTERRUPTS() to kill off this worker process just as
|
||||||
* as it would a normal user backend. To make that happen, we establish
|
* it would a normal user backend. To make that happen, we establish a
|
||||||
* a signal handler that is a stripped-down version of die(). We don't
|
* signal handler that is a stripped-down version of die(). We don't have
|
||||||
* have any equivalent of the backend's command-read loop, where interrupts
|
* any equivalent of the backend's command-read loop, where interrupts can
|
||||||
* can be processed immediately, so make sure ImmediateInterruptOK is
|
* be processed immediately, so make sure ImmediateInterruptOK is turned
|
||||||
* turned off.
|
* off.
|
||||||
*/
|
*/
|
||||||
pqsignal(SIGTERM, handle_sigterm);
|
pqsignal(SIGTERM, handle_sigterm);
|
||||||
ImmediateInterruptOK = false;
|
ImmediateInterruptOK = false;
|
||||||
|
|
@ -76,8 +76,8 @@ test_shm_mq_main(Datum main_arg)
|
||||||
* memory segment to which we must attach for further instructions. In
|
* memory segment to which we must attach for further instructions. In
|
||||||
* order to attach to dynamic shared memory, we need a resource owner.
|
* order to attach to dynamic shared memory, we need a resource owner.
|
||||||
* Once we've mapped the segment in our address space, attach to the table
|
* Once we've mapped the segment in our address space, attach to the table
|
||||||
* of contents so we can locate the various data structures we'll need
|
* of contents so we can locate the various data structures we'll need to
|
||||||
* to find within the segment.
|
* find within the segment.
|
||||||
*/
|
*/
|
||||||
CurrentResourceOwner = ResourceOwnerCreate(NULL, "test_shm_mq worker");
|
CurrentResourceOwner = ResourceOwnerCreate(NULL, "test_shm_mq worker");
|
||||||
seg = dsm_attach(DatumGetInt32(main_arg));
|
seg = dsm_attach(DatumGetInt32(main_arg));
|
||||||
|
|
@ -114,8 +114,8 @@ test_shm_mq_main(Datum main_arg)
|
||||||
attach_to_queues(seg, toc, myworkernumber, &inqh, &outqh);
|
attach_to_queues(seg, toc, myworkernumber, &inqh, &outqh);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Indicate that we're fully initialized and ready to begin the main
|
* Indicate that we're fully initialized and ready to begin the main part
|
||||||
* part of the parallel operation.
|
* of the parallel operation.
|
||||||
*
|
*
|
||||||
* Once we signal that we're ready, the user backend is entitled to assume
|
* Once we signal that we're ready, the user backend is entitled to assume
|
||||||
* that our on_dsm_detach callbacks will fire before we disconnect from
|
* that our on_dsm_detach callbacks will fire before we disconnect from
|
||||||
|
|
|
||||||
|
|
@ -279,6 +279,7 @@ ginarraytriconsistent(PG_FUNCTION_ARGS)
|
||||||
res = GIN_MAYBE;
|
res = GIN_MAYBE;
|
||||||
break;
|
break;
|
||||||
case GinEqualStrategy:
|
case GinEqualStrategy:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Must have all elements in check[] true; no discrimination
|
* Must have all elements in check[] true; no discrimination
|
||||||
* against nulls here. This is because array_contain_compare and
|
* against nulls here. This is because array_contain_compare and
|
||||||
|
|
|
||||||
|
|
@ -251,6 +251,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
|
||||||
Assert(blkno != btree->rootBlkno);
|
Assert(blkno != btree->rootBlkno);
|
||||||
ptr->blkno = blkno;
|
ptr->blkno = blkno;
|
||||||
ptr->buffer = buffer;
|
ptr->buffer = buffer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* parent may be wrong, but if so, the ginFinishSplit call will
|
* parent may be wrong, but if so, the ginFinishSplit call will
|
||||||
* recurse to call ginFindParents again to fix it.
|
* recurse to call ginFindParents again to fix it.
|
||||||
|
|
@ -328,7 +329,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||||
GinPlaceToPageRC rc;
|
GinPlaceToPageRC rc;
|
||||||
uint16 xlflags = 0;
|
uint16 xlflags = 0;
|
||||||
Page childpage = NULL;
|
Page childpage = NULL;
|
||||||
Page newlpage = NULL, newrpage = NULL;
|
Page newlpage = NULL,
|
||||||
|
newrpage = NULL;
|
||||||
|
|
||||||
if (GinPageIsData(page))
|
if (GinPageIsData(page))
|
||||||
xlflags |= GIN_INSERT_ISDATA;
|
xlflags |= GIN_INSERT_ISDATA;
|
||||||
|
|
@ -346,8 +348,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to put the incoming tuple on the page. placeToPage will decide
|
* Try to put the incoming tuple on the page. placeToPage will decide if
|
||||||
* if the page needs to be split.
|
* the page needs to be split.
|
||||||
*/
|
*/
|
||||||
rc = btree->placeToPage(btree, stack->buffer, stack,
|
rc = btree->placeToPage(btree, stack->buffer, stack,
|
||||||
insertdata, updateblkno,
|
insertdata, updateblkno,
|
||||||
|
|
@ -450,6 +452,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||||
if (childbuf != InvalidBuffer)
|
if (childbuf != InvalidBuffer)
|
||||||
{
|
{
|
||||||
Page childpage = BufferGetPage(childbuf);
|
Page childpage = BufferGetPage(childbuf);
|
||||||
|
|
||||||
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
|
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
|
||||||
|
|
||||||
data.leftChildBlkno = BufferGetBlockNumber(childbuf);
|
data.leftChildBlkno = BufferGetBlockNumber(childbuf);
|
||||||
|
|
@ -505,8 +508,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Construct a new root page containing downlinks to the new left
|
* Construct a new root page containing downlinks to the new left
|
||||||
* and right pages. (do this in a temporary copy first rather
|
* and right pages. (do this in a temporary copy first rather than
|
||||||
* than overwriting the original page directly, so that we can still
|
* overwriting the original page directly, so that we can still
|
||||||
* abort gracefully if this fails.)
|
* abort gracefully if this fails.)
|
||||||
*/
|
*/
|
||||||
newrootpg = PageGetTempPage(newrpage);
|
newrootpg = PageGetTempPage(newrpage);
|
||||||
|
|
@ -627,8 +630,8 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
|
||||||
bool first = true;
|
bool first = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* freestack == false when we encounter an incompletely split page during a
|
* freestack == false when we encounter an incompletely split page during
|
||||||
* scan, while freestack == true is used in the normal scenario that a
|
* a scan, while freestack == true is used in the normal scenario that a
|
||||||
* split is finished right after the initial insert.
|
* split is finished right after the initial insert.
|
||||||
*/
|
*/
|
||||||
if (!freestack)
|
if (!freestack)
|
||||||
|
|
@ -650,8 +653,8 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
|
||||||
* then continue with the current one.
|
* then continue with the current one.
|
||||||
*
|
*
|
||||||
* Note: we have to finish *all* incomplete splits we encounter, even
|
* Note: we have to finish *all* incomplete splits we encounter, even
|
||||||
* if we have to move right. Otherwise we might choose as the target
|
* if we have to move right. Otherwise we might choose as the target a
|
||||||
* a page that has no downlink in the parent, and splitting it further
|
* page that has no downlink in the parent, and splitting it further
|
||||||
* would fail.
|
* would fail.
|
||||||
*/
|
*/
|
||||||
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
|
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
|
||||||
|
|
|
||||||
|
|
@ -49,8 +49,8 @@ typedef struct
|
||||||
dlist_head segments; /* a list of leafSegmentInfos */
|
dlist_head segments; /* a list of leafSegmentInfos */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following fields represent how the segments are split across
|
* The following fields represent how the segments are split across pages,
|
||||||
* pages, if a page split is required. Filled in by leafRepackItems.
|
* if a page split is required. Filled in by leafRepackItems.
|
||||||
*/
|
*/
|
||||||
dlist_node *lastleft; /* last segment on left page */
|
dlist_node *lastleft; /* last segment on left page */
|
||||||
int lsize; /* total size on left page */
|
int lsize; /* total size on left page */
|
||||||
|
|
@ -83,9 +83,9 @@ typedef struct
|
||||||
int nmodifieditems;
|
int nmodifieditems;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following fields represent the items in this segment. If 'items'
|
* The following fields represent the items in this segment. If 'items' is
|
||||||
* is not NULL, it contains a palloc'd array of the itemsin this segment.
|
* not NULL, it contains a palloc'd array of the itemsin this segment. If
|
||||||
* If 'seg' is not NULL, it contains the items in an already-compressed
|
* 'seg' is not NULL, it contains the items in an already-compressed
|
||||||
* format. It can point to an on-disk page (!modified), or a palloc'd
|
* format. It can point to an on-disk page (!modified), or a palloc'd
|
||||||
* segment in memory. If both are set, they must represent the same items.
|
* segment in memory. If both are set, they must represent the same items.
|
||||||
*/
|
*/
|
||||||
|
|
@ -386,7 +386,7 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
|
||||||
if (offset != maxoff + 1)
|
if (offset != maxoff + 1)
|
||||||
memmove(ptr + sizeof(PostingItem),
|
memmove(ptr + sizeof(PostingItem),
|
||||||
ptr,
|
ptr,
|
||||||
(maxoff - offset + 1) * sizeof(PostingItem));
|
(maxoff - offset + 1) *sizeof(PostingItem));
|
||||||
}
|
}
|
||||||
memcpy(ptr, data, sizeof(PostingItem));
|
memcpy(ptr, data, sizeof(PostingItem));
|
||||||
|
|
||||||
|
|
@ -464,8 +464,8 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* This needs to go to some other location in the tree. (The
|
* This needs to go to some other location in the tree. (The
|
||||||
* caller should've chosen the insert location so that at least
|
* caller should've chosen the insert location so that at
|
||||||
* the first item goes here.)
|
* least the first item goes here.)
|
||||||
*/
|
*/
|
||||||
Assert(i > 0);
|
Assert(i > 0);
|
||||||
break;
|
break;
|
||||||
|
|
@ -769,16 +769,16 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
||||||
* We don't try to re-encode the segments here, even though some of them
|
* We don't try to re-encode the segments here, even though some of them
|
||||||
* might be really small now that we've removed some items from them. It
|
* might be really small now that we've removed some items from them. It
|
||||||
* seems like a waste of effort, as there isn't really any benefit from
|
* seems like a waste of effort, as there isn't really any benefit from
|
||||||
* larger segments per se; larger segments only help to pack more items
|
* larger segments per se; larger segments only help to pack more items in
|
||||||
* in the same space. We might as well delay doing that until the next
|
* the same space. We might as well delay doing that until the next
|
||||||
* insertion, which will need to re-encode at least part of the page
|
* insertion, which will need to re-encode at least part of the page
|
||||||
* anyway.
|
* anyway.
|
||||||
*
|
*
|
||||||
* Also note if the page was in uncompressed, pre-9.4 format before, it
|
* Also note if the page was in uncompressed, pre-9.4 format before, it is
|
||||||
* is now represented as one huge segment that contains all the items.
|
* now represented as one huge segment that contains all the items. It
|
||||||
* It might make sense to split that, to speed up random access, but we
|
* might make sense to split that, to speed up random access, but we don't
|
||||||
* don't bother. You'll have to REINDEX anyway if you want the full gain
|
* bother. You'll have to REINDEX anyway if you want the full gain of the
|
||||||
* of the new tighter index format.
|
* new tighter index format.
|
||||||
*/
|
*/
|
||||||
if (removedsomething)
|
if (removedsomething)
|
||||||
{
|
{
|
||||||
|
|
@ -795,6 +795,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
||||||
{
|
{
|
||||||
leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
|
leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
|
||||||
iter.cur);
|
iter.cur);
|
||||||
|
|
||||||
if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
|
if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
|
||||||
modified = true;
|
modified = true;
|
||||||
if (modified && seginfo->action != GIN_SEGMENT_DELETE)
|
if (modified && seginfo->action != GIN_SEGMENT_DELETE)
|
||||||
|
|
@ -863,7 +864,8 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
|
||||||
|
|
||||||
walbufbegin = palloc(
|
walbufbegin = palloc(
|
||||||
sizeof(ginxlogRecompressDataLeaf) +
|
sizeof(ginxlogRecompressDataLeaf) +
|
||||||
BLCKSZ + /* max size needed to hold the segment data */
|
BLCKSZ + /* max size needed to hold the segment
|
||||||
|
* data */
|
||||||
nmodified * 2 + /* (segno + action) per action */
|
nmodified * 2 + /* (segno + action) per action */
|
||||||
sizeof(XLogRecData));
|
sizeof(XLogRecData));
|
||||||
walbufend = walbufbegin;
|
walbufend = walbufbegin;
|
||||||
|
|
@ -965,9 +967,9 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
|
||||||
int segsize;
|
int segsize;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the page was in pre-9.4 format before, convert the header, and
|
* If the page was in pre-9.4 format before, convert the header, and force
|
||||||
* force all segments to be copied to the page whether they were modified
|
* all segments to be copied to the page whether they were modified or
|
||||||
* or not.
|
* not.
|
||||||
*/
|
*/
|
||||||
if (!GinPageIsCompressed(page))
|
if (!GinPageIsCompressed(page))
|
||||||
{
|
{
|
||||||
|
|
@ -1022,6 +1024,7 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
|
||||||
dlist_node *node;
|
dlist_node *node;
|
||||||
dlist_node *firstright;
|
dlist_node *firstright;
|
||||||
leafSegmentInfo *seginfo;
|
leafSegmentInfo *seginfo;
|
||||||
|
|
||||||
/* these must be static so they can be returned to caller */
|
/* these must be static so they can be returned to caller */
|
||||||
static ginxlogSplitDataLeaf split_xlog;
|
static ginxlogSplitDataLeaf split_xlog;
|
||||||
static XLogRecData rdata[3];
|
static XLogRecData rdata[3];
|
||||||
|
|
@ -1121,6 +1124,7 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||||
Page page = BufferGetPage(buf);
|
Page page = BufferGetPage(buf);
|
||||||
OffsetNumber off = stack->off;
|
OffsetNumber off = stack->off;
|
||||||
PostingItem *pitem;
|
PostingItem *pitem;
|
||||||
|
|
||||||
/* these must be static so they can be returned to caller */
|
/* these must be static so they can be returned to caller */
|
||||||
static XLogRecData rdata;
|
static XLogRecData rdata;
|
||||||
static ginxlogInsertDataInternal data;
|
static ginxlogInsertDataInternal data;
|
||||||
|
|
@ -1216,8 +1220,8 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
|
||||||
*prdata = rdata;
|
*prdata = rdata;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First construct a new list of PostingItems, which includes all the
|
* First construct a new list of PostingItems, which includes all the old
|
||||||
* old items, and the new item.
|
* items, and the new item.
|
||||||
*/
|
*/
|
||||||
memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
|
memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
|
||||||
(off - 1) * sizeof(PostingItem));
|
(off - 1) * sizeof(PostingItem));
|
||||||
|
|
@ -1402,8 +1406,8 @@ addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
|
||||||
leafSegmentInfo *newseg;
|
leafSegmentInfo *newseg;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the page is completely empty, just construct one new segment to
|
* If the page is completely empty, just construct one new segment to hold
|
||||||
* hold all the new items.
|
* all the new items.
|
||||||
*/
|
*/
|
||||||
if (dlist_is_empty(&leaf->segments))
|
if (dlist_is_empty(&leaf->segments))
|
||||||
{
|
{
|
||||||
|
|
@ -1567,10 +1571,10 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
|
||||||
if (npacked != seginfo->nitems)
|
if (npacked != seginfo->nitems)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Too large. Compress again to the target size, and create
|
* Too large. Compress again to the target size, and
|
||||||
* a new segment to represent the remaining items. The new
|
* create a new segment to represent the remaining items.
|
||||||
* segment is inserted after this one, so it will be
|
* The new segment is inserted after this one, so it will
|
||||||
* processed in the next iteration of this loop.
|
* be processed in the next iteration of this loop.
|
||||||
*/
|
*/
|
||||||
if (seginfo->seg)
|
if (seginfo->seg)
|
||||||
pfree(seginfo->seg);
|
pfree(seginfo->seg);
|
||||||
|
|
@ -1741,8 +1745,8 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
|
||||||
GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
|
GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Write as many of the items to the root page as fit. In segments
|
* Write as many of the items to the root page as fit. In segments of max
|
||||||
* of max GinPostingListSegmentMaxSize bytes each.
|
* GinPostingListSegmentMaxSize bytes each.
|
||||||
*/
|
*/
|
||||||
nrootitems = 0;
|
nrootitems = 0;
|
||||||
rootsize = 0;
|
rootsize = 0;
|
||||||
|
|
|
||||||
|
|
@ -136,6 +136,7 @@ GinFormTuple(GinState *ginstate,
|
||||||
if (data)
|
if (data)
|
||||||
{
|
{
|
||||||
char *ptr = GinGetPosting(itup);
|
char *ptr = GinGetPosting(itup);
|
||||||
|
|
||||||
memcpy(ptr, data, dataSize);
|
memcpy(ptr, data, dataSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
|
||||||
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
|
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
|
||||||
{
|
{
|
||||||
int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
|
int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
|
||||||
|
|
||||||
scanEntry->predictNumberResult += n;
|
scanEntry->predictNumberResult += n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -463,11 +464,11 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
|
||||||
* considerably, if the frequent term can be put in the additional set.
|
* considerably, if the frequent term can be put in the additional set.
|
||||||
*
|
*
|
||||||
* There can be many legal ways to divide them entries into these two
|
* There can be many legal ways to divide them entries into these two
|
||||||
* sets. A conservative division is to just put everything in the
|
* sets. A conservative division is to just put everything in the required
|
||||||
* required set, but the more you can put in the additional set, the more
|
* set, but the more you can put in the additional set, the more you can
|
||||||
* you can skip during the scan. To maximize skipping, we try to put as
|
* skip during the scan. To maximize skipping, we try to put as many
|
||||||
* many frequent items as possible into additional, and less frequent
|
* frequent items as possible into additional, and less frequent ones into
|
||||||
* ones into required. To do that, sort the entries by frequency
|
* required. To do that, sort the entries by frequency
|
||||||
* (predictNumberResult), and put entries into the required set in that
|
* (predictNumberResult), and put entries into the required set in that
|
||||||
* order, until the consistent function says that none of the remaining
|
* order, until the consistent function says that none of the remaining
|
||||||
* entries can form a match, without any items from the required set. The
|
* entries can form a match, without any items from the required set. The
|
||||||
|
|
@ -635,8 +636,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
|
||||||
if (stepright)
|
if (stepright)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We've processed all the entries on this page. If it was the last
|
* We've processed all the entries on this page. If it was the
|
||||||
* page in the tree, we're done.
|
* last page in the tree, we're done.
|
||||||
*/
|
*/
|
||||||
if (GinPageRightMost(page))
|
if (GinPageRightMost(page))
|
||||||
{
|
{
|
||||||
|
|
@ -647,8 +648,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Step to next page, following the right link. then find the first
|
* Step to next page, following the right link. then find the
|
||||||
* ItemPointer greater than advancePast.
|
* first ItemPointer greater than advancePast.
|
||||||
*/
|
*/
|
||||||
entry->buffer = ginStepRight(entry->buffer,
|
entry->buffer = ginStepRight(entry->buffer,
|
||||||
ginstate->index,
|
ginstate->index,
|
||||||
|
|
@ -781,6 +782,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
|
||||||
gotitem = true;
|
gotitem = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Not a lossy page. Skip over any offsets <= advancePast, and
|
* Not a lossy page. Skip over any offsets <= advancePast, and
|
||||||
* return that.
|
* return that.
|
||||||
|
|
@ -788,8 +790,9 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
|
||||||
if (entry->matchResult->blockno == advancePastBlk)
|
if (entry->matchResult->blockno == advancePastBlk)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* First, do a quick check against the last offset on the page.
|
* First, do a quick check against the last offset on the
|
||||||
* If that's > advancePast, so are all the other offsets.
|
* page. If that's > advancePast, so are all the other
|
||||||
|
* offsets.
|
||||||
*/
|
*/
|
||||||
if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
|
if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
|
||||||
{
|
{
|
||||||
|
|
@ -890,8 +893,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We might have already tested this item; if so, no need to repeat work.
|
* We might have already tested this item; if so, no need to repeat work.
|
||||||
* (Note: the ">" case can happen, if advancePast is exact but we previously
|
* (Note: the ">" case can happen, if advancePast is exact but we
|
||||||
* had to set curItem to a lossy-page pointer.)
|
* previously had to set curItem to a lossy-page pointer.)
|
||||||
*/
|
*/
|
||||||
if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
|
if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
|
||||||
return;
|
return;
|
||||||
|
|
@ -942,8 +945,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
||||||
/*
|
/*
|
||||||
* Ok, we now know that there are no matches < minItem.
|
* Ok, we now know that there are no matches < minItem.
|
||||||
*
|
*
|
||||||
* If minItem is lossy, it means that there were no exact items on
|
* If minItem is lossy, it means that there were no exact items on the
|
||||||
* the page among requiredEntries, because lossy pointers sort after exact
|
* page among requiredEntries, because lossy pointers sort after exact
|
||||||
* items. However, there might be exact items for the same page among
|
* items. However, there might be exact items for the same page among
|
||||||
* additionalEntries, so we mustn't advance past them.
|
* additionalEntries, so we mustn't advance past them.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1085,6 +1088,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
||||||
if (entry->isFinished)
|
if (entry->isFinished)
|
||||||
key->entryRes[i] = GIN_FALSE;
|
key->entryRes[i] = GIN_FALSE;
|
||||||
#if 0
|
#if 0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This case can't currently happen, because we loaded all the entries
|
* This case can't currently happen, because we loaded all the entries
|
||||||
* for this item earlier.
|
* for this item earlier.
|
||||||
|
|
@ -1119,6 +1123,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the 'default' case shouldn't happen, but if the consistent
|
* the 'default' case shouldn't happen, but if the consistent
|
||||||
* function returns something bogus, this is the safe result
|
* function returns something bogus, this is the safe result
|
||||||
|
|
@ -1129,11 +1134,10 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have a tuple, and we know if it matches or not. If it's a
|
* We have a tuple, and we know if it matches or not. If it's a non-match,
|
||||||
* non-match, we could continue to find the next matching tuple, but
|
* we could continue to find the next matching tuple, but let's break out
|
||||||
* let's break out and give scanGetItem a chance to advance the other
|
* and give scanGetItem a chance to advance the other keys. They might be
|
||||||
* keys. They might be able to skip past to a much higher TID, allowing
|
* able to skip past to a much higher TID, allowing us to save work.
|
||||||
* us to save work.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* clean up after consistentFn calls */
|
/* clean up after consistentFn calls */
|
||||||
|
|
@ -1205,12 +1209,11 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It's a match. We can conclude that nothing < matches, so
|
* It's a match. We can conclude that nothing < matches, so the
|
||||||
* the other key streams can skip to this item.
|
* other key streams can skip to this item.
|
||||||
*
|
*
|
||||||
* Beware of lossy pointers, though; from a lossy pointer, we
|
* Beware of lossy pointers, though; from a lossy pointer, we can
|
||||||
* can only conclude that nothing smaller than this *block*
|
* only conclude that nothing smaller than this *block* matches.
|
||||||
* matches.
|
|
||||||
*/
|
*/
|
||||||
if (ItemPointerIsLossyPage(&key->curItem))
|
if (ItemPointerIsLossyPage(&key->curItem))
|
||||||
{
|
{
|
||||||
|
|
@ -1229,8 +1232,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is the first key, remember this location as a
|
* If this is the first key, remember this location as a potential
|
||||||
* potential match, and proceed to check the rest of the keys.
|
* match, and proceed to check the rest of the keys.
|
||||||
*
|
*
|
||||||
* Otherwise, check if this is the same item that we checked the
|
* Otherwise, check if this is the same item that we checked the
|
||||||
* previous keys for (or a lossy pointer for the same page). If
|
* previous keys for (or a lossy pointer for the same page). If
|
||||||
|
|
@ -1247,7 +1250,7 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
||||||
if (ItemPointerIsLossyPage(&key->curItem) ||
|
if (ItemPointerIsLossyPage(&key->curItem) ||
|
||||||
ItemPointerIsLossyPage(item))
|
ItemPointerIsLossyPage(item))
|
||||||
{
|
{
|
||||||
Assert (GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
|
Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
|
||||||
match = (GinItemPointerGetBlockNumber(&key->curItem) ==
|
match = (GinItemPointerGetBlockNumber(&key->curItem) ==
|
||||||
GinItemPointerGetBlockNumber(item));
|
GinItemPointerGetBlockNumber(item));
|
||||||
}
|
}
|
||||||
|
|
@ -1264,8 +1267,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now *item contains the first ItemPointer after previous result that
|
* Now *item contains the first ItemPointer after previous result that
|
||||||
* satisfied all the keys for that exact TID, or a lossy reference
|
* satisfied all the keys for that exact TID, or a lossy reference to the
|
||||||
* to the same page.
|
* same page.
|
||||||
*
|
*
|
||||||
* We must return recheck = true if any of the keys are marked recheck.
|
* We must return recheck = true if any of the keys are marked recheck.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -115,6 +115,7 @@ static bool
|
||||||
shimBoolConsistentFn(GinScanKey key)
|
shimBoolConsistentFn(GinScanKey key)
|
||||||
{
|
{
|
||||||
GinTernaryValue result;
|
GinTernaryValue result;
|
||||||
|
|
||||||
result = DatumGetGinTernaryValue(FunctionCall7Coll(
|
result = DatumGetGinTernaryValue(FunctionCall7Coll(
|
||||||
key->triConsistentFmgrInfo,
|
key->triConsistentFmgrInfo,
|
||||||
key->collation,
|
key->collation,
|
||||||
|
|
|
||||||
|
|
@ -210,7 +210,7 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
|
||||||
uint64 val = itemptr_to_uint64(&ipd[totalpacked]);
|
uint64 val = itemptr_to_uint64(&ipd[totalpacked]);
|
||||||
uint64 delta = val - prev;
|
uint64 delta = val - prev;
|
||||||
|
|
||||||
Assert (val > prev);
|
Assert(val > prev);
|
||||||
|
|
||||||
if (endptr - ptr >= 6)
|
if (endptr - ptr >= 6)
|
||||||
encode_varbyte(delta, &ptr);
|
encode_varbyte(delta, &ptr);
|
||||||
|
|
@ -374,8 +374,8 @@ ginMergeItemPointers(ItemPointerData *a, uint32 na,
|
||||||
dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
|
dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the argument arrays don't overlap, we can just append them to
|
* If the argument arrays don't overlap, we can just append them to each
|
||||||
* each other.
|
* other.
|
||||||
*/
|
*/
|
||||||
if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
|
if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,7 @@ initGinState(GinState *state, Relation index)
|
||||||
fmgr_info_copy(&(state->extractQueryFn[i]),
|
fmgr_info_copy(&(state->extractQueryFn[i]),
|
||||||
index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
|
index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
|
||||||
CurrentMemoryContext);
|
CurrentMemoryContext);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check opclass capability to do tri-state or binary logic consistent
|
* Check opclass capability to do tri-state or binary logic consistent
|
||||||
* check.
|
* check.
|
||||||
|
|
|
||||||
|
|
@ -208,8 +208,8 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if we have root and there are empty pages in tree, then we don't release
|
* if we have root and there are empty pages in tree, then we don't
|
||||||
* lock to go further processing and guarantee that tree is unused
|
* release lock to go further processing and guarantee that tree is unused
|
||||||
*/
|
*/
|
||||||
if (!(isRoot && hasVoidPage))
|
if (!(isRoot && hasVoidPage))
|
||||||
{
|
{
|
||||||
|
|
@ -302,11 +302,11 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||||
data.rightLink = GinPageGetOpaque(page)->rightlink;
|
data.rightLink = GinPageGetOpaque(page)->rightlink;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can't pass buffer_std = TRUE, because we didn't set pd_lower
|
* We can't pass buffer_std = TRUE, because we didn't set pd_lower on
|
||||||
* on pre-9.4 versions. The page might've been binary-upgraded from
|
* pre-9.4 versions. The page might've been binary-upgraded from an
|
||||||
* an older version, and hence not have pd_lower set correctly.
|
* older version, and hence not have pd_lower set correctly. Ditto for
|
||||||
* Ditto for the left page, but removing the item from the parent
|
* the left page, but removing the item from the parent updated its
|
||||||
* updated its pd_lower, so we know that's OK at this point.
|
* pd_lower, so we know that's OK at this point.
|
||||||
*/
|
*/
|
||||||
rdata[0].buffer = dBuffer;
|
rdata[0].buffer = dBuffer;
|
||||||
rdata[0].buffer_std = FALSE;
|
rdata[0].buffer_std = FALSE;
|
||||||
|
|
@ -538,7 +538,8 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if we already created a temporary page, make changes in place
|
* if we already created a temporary page, make changes in
|
||||||
|
* place
|
||||||
*/
|
*/
|
||||||
if (tmppage == origpage)
|
if (tmppage == origpage)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -341,8 +341,8 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
|
||||||
payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
|
payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First clear incomplete-split flag on child page if this finishes
|
* First clear incomplete-split flag on child page if this finishes a
|
||||||
* a split.
|
* split.
|
||||||
*/
|
*/
|
||||||
if (!isLeaf)
|
if (!isLeaf)
|
||||||
{
|
{
|
||||||
|
|
@ -472,8 +472,8 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||||
payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
|
payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First clear incomplete-split flag on child page if this finishes
|
* First clear incomplete-split flag on child page if this finishes a
|
||||||
* a split
|
* split
|
||||||
*/
|
*/
|
||||||
if (!isLeaf)
|
if (!isLeaf)
|
||||||
{
|
{
|
||||||
|
|
@ -711,9 +711,9 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
||||||
Buffer buffer;
|
Buffer buffer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Restore the metapage. This is essentially the same as a full-page image,
|
* Restore the metapage. This is essentially the same as a full-page
|
||||||
* so restore the metapage unconditionally without looking at the LSN, to
|
* image, so restore the metapage unconditionally without looking at the
|
||||||
* avoid torn page hazards.
|
* LSN, to avoid torn page hazards.
|
||||||
*/
|
*/
|
||||||
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
|
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
|
||||||
if (!BufferIsValid(metabuffer))
|
if (!BufferIsValid(metabuffer))
|
||||||
|
|
|
||||||
|
|
@ -387,6 +387,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
|
||||||
|
|
||||||
for (ptr = dist; ptr; ptr = ptr->next)
|
for (ptr = dist; ptr; ptr = ptr->next)
|
||||||
npage++;
|
npage++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the caller should've checked this already, but doesn't hurt to check
|
* the caller should've checked this already, but doesn't hurt to check
|
||||||
* again.
|
* again.
|
||||||
|
|
|
||||||
|
|
@ -2123,8 +2123,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
||||||
bool need_tuple_data;
|
bool need_tuple_data;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For logical decoding, we need the tuple even if we're doing a
|
* For logical decoding, we need the tuple even if we're doing a full
|
||||||
* full page write, so make sure to log it separately. (XXX We could
|
* page write, so make sure to log it separately. (XXX We could
|
||||||
* alternatively store a pointer into the FPW).
|
* alternatively store a pointer into the FPW).
|
||||||
*
|
*
|
||||||
* Also, if this is a catalog, we need to transmit combocids to
|
* Also, if this is a catalog, we need to transmit combocids to
|
||||||
|
|
@ -2165,9 +2165,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
||||||
rdata[2].next = NULL;
|
rdata[2].next = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make a separate rdata entry for the tuple's buffer if we're
|
* Make a separate rdata entry for the tuple's buffer if we're doing
|
||||||
* doing logical decoding, so that an eventual FPW doesn't
|
* logical decoding, so that an eventual FPW doesn't remove the
|
||||||
* remove the tuple's data.
|
* tuple's data.
|
||||||
*/
|
*/
|
||||||
if (need_tuple_data)
|
if (need_tuple_data)
|
||||||
{
|
{
|
||||||
|
|
@ -2487,9 +2487,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
||||||
rdata[1].next = NULL;
|
rdata[1].next = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make a separate rdata entry for the tuple's buffer if
|
* Make a separate rdata entry for the tuple's buffer if we're
|
||||||
* we're doing logical decoding, so that an eventual FPW
|
* doing logical decoding, so that an eventual FPW doesn't remove
|
||||||
* doesn't remove the tuple's data.
|
* the tuple's data.
|
||||||
*/
|
*/
|
||||||
if (need_tuple_data)
|
if (need_tuple_data)
|
||||||
{
|
{
|
||||||
|
|
@ -2919,7 +2919,7 @@ l1:
|
||||||
xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
|
xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
|
||||||
|
|
||||||
rdata[1].next = &(rdata[2]);
|
rdata[1].next = &(rdata[2]);
|
||||||
rdata[2].data = (char*)&xlhdr;
|
rdata[2].data = (char *) &xlhdr;
|
||||||
rdata[2].len = SizeOfHeapHeader;
|
rdata[2].len = SizeOfHeapHeader;
|
||||||
rdata[2].buffer = InvalidBuffer;
|
rdata[2].buffer = InvalidBuffer;
|
||||||
rdata[2].next = NULL;
|
rdata[2].next = NULL;
|
||||||
|
|
@ -3951,8 +3951,7 @@ HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
|
||||||
/*
|
/*
|
||||||
* Since the HOT attributes are a superset of the key attributes and
|
* Since the HOT attributes are a superset of the key attributes and
|
||||||
* the key attributes are a superset of the id attributes, this logic
|
* the key attributes are a superset of the id attributes, this logic
|
||||||
* is guaranteed to identify the next column that needs to be
|
* is guaranteed to identify the next column that needs to be checked.
|
||||||
* checked.
|
|
||||||
*/
|
*/
|
||||||
if (hot_result && next_hot_attnum > FirstLowInvalidHeapAttributeNumber)
|
if (hot_result && next_hot_attnum > FirstLowInvalidHeapAttributeNumber)
|
||||||
check_now = next_hot_attnum;
|
check_now = next_hot_attnum;
|
||||||
|
|
@ -3981,12 +3980,11 @@ HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Advance the next attribute numbers for the sets that contain
|
* Advance the next attribute numbers for the sets that contain the
|
||||||
* the attribute we just checked. As we work our way through the
|
* attribute we just checked. As we work our way through the columns,
|
||||||
* columns, the next_attnum values will rise; but when each set
|
* the next_attnum values will rise; but when each set becomes empty,
|
||||||
* becomes empty, bms_first_member() will return -1 and the attribute
|
* bms_first_member() will return -1 and the attribute number will end
|
||||||
* number will end up with a value less than
|
* up with a value less than FirstLowInvalidHeapAttributeNumber.
|
||||||
* FirstLowInvalidHeapAttributeNumber.
|
|
||||||
*/
|
*/
|
||||||
if (hot_result && check_now == next_hot_attnum)
|
if (hot_result && check_now == next_hot_attnum)
|
||||||
{
|
{
|
||||||
|
|
@ -4929,12 +4927,13 @@ l5:
|
||||||
if (xmax == add_to_xmax)
|
if (xmax == add_to_xmax)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Note that it's not possible for the original tuple to be updated:
|
* Note that it's not possible for the original tuple to be
|
||||||
* we wouldn't be here because the tuple would have been invisible and
|
* updated: we wouldn't be here because the tuple would have been
|
||||||
* we wouldn't try to update it. As a subtlety, this code can also
|
* invisible and we wouldn't try to update it. As a subtlety,
|
||||||
* run when traversing an update chain to lock future versions of a
|
* this code can also run when traversing an update chain to lock
|
||||||
* tuple. But we wouldn't be here either, because the add_to_xmax
|
* future versions of a tuple. But we wouldn't be here either,
|
||||||
* would be different from the original updater.
|
* because the add_to_xmax would be different from the original
|
||||||
|
* updater.
|
||||||
*/
|
*/
|
||||||
Assert(HEAP_XMAX_IS_LOCKED_ONLY(old_infomask));
|
Assert(HEAP_XMAX_IS_LOCKED_ONLY(old_infomask));
|
||||||
|
|
||||||
|
|
@ -5026,18 +5025,18 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
|
||||||
if (TransactionIdIsCurrentTransactionId(xid))
|
if (TransactionIdIsCurrentTransactionId(xid))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Updated by our own transaction? Just return failure. This shouldn't
|
* Updated by our own transaction? Just return failure. This
|
||||||
* normally happen.
|
* shouldn't normally happen.
|
||||||
*/
|
*/
|
||||||
return HeapTupleSelfUpdated;
|
return HeapTupleSelfUpdated;
|
||||||
}
|
}
|
||||||
else if (TransactionIdIsInProgress(xid))
|
else if (TransactionIdIsInProgress(xid))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If the locking transaction is running, what we do depends on whether
|
* If the locking transaction is running, what we do depends on
|
||||||
* the lock modes conflict: if they do, then we must wait for it to
|
* whether the lock modes conflict: if they do, then we must wait for
|
||||||
* finish; otherwise we can fall through to lock this tuple version
|
* it to finish; otherwise we can fall through to lock this tuple
|
||||||
* without waiting.
|
* version without waiting.
|
||||||
*/
|
*/
|
||||||
if (DoLockModesConflict(LOCKMODE_from_mxstatus(status),
|
if (DoLockModesConflict(LOCKMODE_from_mxstatus(status),
|
||||||
LOCKMODE_from_mxstatus(wantedstatus)))
|
LOCKMODE_from_mxstatus(wantedstatus)))
|
||||||
|
|
@ -5046,8 +5045,8 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we set needwait above, then this value doesn't matter; otherwise,
|
* If we set needwait above, then this value doesn't matter;
|
||||||
* this value signals to caller that it's okay to proceed.
|
* otherwise, this value signals to caller that it's okay to proceed.
|
||||||
*/
|
*/
|
||||||
return HeapTupleMayBeUpdated;
|
return HeapTupleMayBeUpdated;
|
||||||
}
|
}
|
||||||
|
|
@ -5133,8 +5132,8 @@ l4:
|
||||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check the tuple XMIN against prior XMAX, if any. If we reached
|
* Check the tuple XMIN against prior XMAX, if any. If we reached the
|
||||||
* the end of the chain, we're done, so return success.
|
* end of the chain, we're done, so return success.
|
||||||
*/
|
*/
|
||||||
if (TransactionIdIsValid(priorXmax) &&
|
if (TransactionIdIsValid(priorXmax) &&
|
||||||
!TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data),
|
!TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data),
|
||||||
|
|
@ -5219,9 +5218,9 @@ l4:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* LOCK_ONLY present alone (a pg_upgraded tuple
|
* LOCK_ONLY present alone (a pg_upgraded tuple marked
|
||||||
* marked as share-locked in the old cluster) shouldn't
|
* as share-locked in the old cluster) shouldn't be
|
||||||
* be seen in the middle of an update chain.
|
* seen in the middle of an update chain.
|
||||||
*/
|
*/
|
||||||
elog(ERROR, "invalid lock status in tuple");
|
elog(ERROR, "invalid lock status in tuple");
|
||||||
}
|
}
|
||||||
|
|
@ -5801,11 +5800,11 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
||||||
else if (flags & FRM_RETURN_IS_XID)
|
else if (flags & FRM_RETURN_IS_XID)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* NB -- some of these transformations are only valid because
|
* NB -- some of these transformations are only valid because we
|
||||||
* we know the return Xid is a tuple updater (i.e. not merely a
|
* know the return Xid is a tuple updater (i.e. not merely a
|
||||||
* locker.) Also note that the only reason we don't explicitely
|
* locker.) Also note that the only reason we don't explicitely
|
||||||
* worry about HEAP_KEYS_UPDATED is because it lives in t_infomask2
|
* worry about HEAP_KEYS_UPDATED is because it lives in
|
||||||
* rather than t_infomask.
|
* t_infomask2 rather than t_infomask.
|
||||||
*/
|
*/
|
||||||
frz->t_infomask &= ~HEAP_XMAX_BITS;
|
frz->t_infomask &= ~HEAP_XMAX_BITS;
|
||||||
frz->xmax = newxmax;
|
frz->xmax = newxmax;
|
||||||
|
|
@ -6674,10 +6673,10 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
||||||
info = XLOG_HEAP_UPDATE;
|
info = XLOG_HEAP_UPDATE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the old and new tuple are on the same page, we only need to log
|
* If the old and new tuple are on the same page, we only need to log the
|
||||||
* the parts of the new tuple that were changed. That saves on the amount
|
* parts of the new tuple that were changed. That saves on the amount of
|
||||||
* of WAL we need to write. Currently, we just count any unchanged bytes
|
* WAL we need to write. Currently, we just count any unchanged bytes in
|
||||||
* in the beginning and end of the tuple. That's quick to check, and
|
* the beginning and end of the tuple. That's quick to check, and
|
||||||
* perfectly covers the common case that only one field is updated.
|
* perfectly covers the common case that only one field is updated.
|
||||||
*
|
*
|
||||||
* We could do this even if the old and new tuple are on different pages,
|
* We could do this even if the old and new tuple are on different pages,
|
||||||
|
|
@ -6688,10 +6687,10 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
||||||
* updates tend to create the new tuple version on the same page, there
|
* updates tend to create the new tuple version on the same page, there
|
||||||
* isn't much to be gained by doing this across pages anyway.
|
* isn't much to be gained by doing this across pages anyway.
|
||||||
*
|
*
|
||||||
* Skip this if we're taking a full-page image of the new page, as we don't
|
* Skip this if we're taking a full-page image of the new page, as we
|
||||||
* include the new tuple in the WAL record in that case. Also disable if
|
* don't include the new tuple in the WAL record in that case. Also
|
||||||
* wal_level='logical', as logical decoding needs to be able to read the
|
* disable if wal_level='logical', as logical decoding needs to be able to
|
||||||
* new tuple in whole from the WAL record alone.
|
* read the new tuple in whole from the WAL record alone.
|
||||||
*/
|
*/
|
||||||
if (oldbuf == newbuf && !need_tuple_data &&
|
if (oldbuf == newbuf && !need_tuple_data &&
|
||||||
!XLogCheckBufferNeedsBackup(newbuf))
|
!XLogCheckBufferNeedsBackup(newbuf))
|
||||||
|
|
@ -6707,6 +6706,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
||||||
if (newp[prefixlen] != oldp[prefixlen])
|
if (newp[prefixlen] != oldp[prefixlen])
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Storing the length of the prefix takes 2 bytes, so we need to save
|
* Storing the length of the prefix takes 2 bytes, so we need to save
|
||||||
* at least 3 bytes or there's no point.
|
* at least 3 bytes or there's no point.
|
||||||
|
|
@ -6793,8 +6793,8 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
||||||
xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
|
xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
|
||||||
xlhdr.header.t_infomask = newtup->t_data->t_infomask;
|
xlhdr.header.t_infomask = newtup->t_data->t_infomask;
|
||||||
xlhdr.header.t_hoff = newtup->t_data->t_hoff;
|
xlhdr.header.t_hoff = newtup->t_data->t_hoff;
|
||||||
Assert(offsetof(HeapTupleHeaderData, t_bits) + prefixlen + suffixlen <= newtup->t_len);
|
Assert(offsetof(HeapTupleHeaderData, t_bits) +prefixlen + suffixlen <= newtup->t_len);
|
||||||
xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - prefixlen - suffixlen;
|
xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -prefixlen - suffixlen;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As with insert records, we need not store this rdata segment if we
|
* As with insert records, we need not store this rdata segment if we
|
||||||
|
|
@ -6816,7 +6816,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
||||||
if (prefixlen == 0)
|
if (prefixlen == 0)
|
||||||
{
|
{
|
||||||
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
|
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
|
||||||
rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - suffixlen;
|
rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen;
|
||||||
rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
|
rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
|
||||||
rdata[nr].buffer_std = true;
|
rdata[nr].buffer_std = true;
|
||||||
rdata[nr].next = NULL;
|
rdata[nr].next = NULL;
|
||||||
|
|
@ -6829,7 +6829,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
||||||
* two separate rdata entries.
|
* two separate rdata entries.
|
||||||
*/
|
*/
|
||||||
/* bitmap [+ padding] [+ oid] */
|
/* bitmap [+ padding] [+ oid] */
|
||||||
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) > 0)
|
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) >0)
|
||||||
{
|
{
|
||||||
rdata[nr - 1].next = &(rdata[nr]);
|
rdata[nr - 1].next = &(rdata[nr]);
|
||||||
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
|
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
|
||||||
|
|
@ -6992,8 +6992,8 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||||
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
|
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The page may be uninitialized. If so, we can't set the LSN because
|
* The page may be uninitialized. If so, we can't set the LSN because that
|
||||||
* that would corrupt the page.
|
* would corrupt the page.
|
||||||
*/
|
*/
|
||||||
if (!PageIsNew(page))
|
if (!PageIsNew(page))
|
||||||
{
|
{
|
||||||
|
|
@ -7179,8 +7179,8 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* The OID column can appear in an index definition, but that's
|
* The OID column can appear in an index definition, but that's
|
||||||
* OK, becuse we always copy the OID if present (see below).
|
* OK, becuse we always copy the OID if present (see below). Other
|
||||||
* Other system columns may not.
|
* system columns may not.
|
||||||
*/
|
*/
|
||||||
if (attno == ObjectIdAttributeNumber)
|
if (attno == ObjectIdAttributeNumber)
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -7211,6 +7211,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
|
||||||
if (HeapTupleHasExternal(key_tuple))
|
if (HeapTupleHasExternal(key_tuple))
|
||||||
{
|
{
|
||||||
HeapTuple oldtup = key_tuple;
|
HeapTuple oldtup = key_tuple;
|
||||||
|
|
||||||
key_tuple = toast_flatten_tuple(oldtup, RelationGetDescr(relation));
|
key_tuple = toast_flatten_tuple(oldtup, RelationGetDescr(relation));
|
||||||
heap_freetuple(oldtup);
|
heap_freetuple(oldtup);
|
||||||
}
|
}
|
||||||
|
|
@ -8169,7 +8170,7 @@ newsame:;
|
||||||
if (suffixlen > 0)
|
if (suffixlen > 0)
|
||||||
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
|
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
|
||||||
|
|
||||||
newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
|
newlen = offsetof(HeapTupleHeaderData, t_bits) +xlhdr.t_len + prefixlen + suffixlen;
|
||||||
htup->t_infomask2 = xlhdr.header.t_infomask2;
|
htup->t_infomask2 = xlhdr.header.t_infomask2;
|
||||||
htup->t_infomask = xlhdr.header.t_infomask;
|
htup->t_infomask = xlhdr.header.t_infomask;
|
||||||
htup->t_hoff = xlhdr.header.t_hoff;
|
htup->t_hoff = xlhdr.header.t_hoff;
|
||||||
|
|
@ -8444,6 +8445,7 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||||
heap_xlog_lock_updated(lsn, record);
|
heap_xlog_lock_updated(lsn, record);
|
||||||
break;
|
break;
|
||||||
case XLOG_HEAP2_NEW_CID:
|
case XLOG_HEAP2_NEW_CID:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Nothing to do on a real replay, only used during logical
|
* Nothing to do on a real replay, only used during logical
|
||||||
* decoding.
|
* decoding.
|
||||||
|
|
|
||||||
|
|
@ -496,9 +496,10 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HEAPTUPLE_DELETE_IN_PROGRESS:
|
case HEAPTUPLE_DELETE_IN_PROGRESS:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This tuple may soon become DEAD. Update the hint field
|
* This tuple may soon become DEAD. Update the hint field so
|
||||||
* so that the page is reconsidered for pruning in future.
|
* that the page is reconsidered for pruning in future.
|
||||||
*/
|
*/
|
||||||
heap_prune_record_prunable(prstate,
|
heap_prune_record_prunable(prstate,
|
||||||
HeapTupleHeaderGetUpdateXid(htup));
|
HeapTupleHeaderGetUpdateXid(htup));
|
||||||
|
|
|
||||||
|
|
@ -962,14 +962,14 @@ logical_end_heap_rewrite(RewriteState state)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* writeout remaining in-memory entries */
|
/* writeout remaining in-memory entries */
|
||||||
if (state->rs_num_rewrite_mappings > 0 )
|
if (state->rs_num_rewrite_mappings > 0)
|
||||||
logical_heap_rewrite_flush_mappings(state);
|
logical_heap_rewrite_flush_mappings(state);
|
||||||
|
|
||||||
/* Iterate over all mappings we have written and fsync the files. */
|
/* Iterate over all mappings we have written and fsync the files. */
|
||||||
hash_seq_init(&seq_status, state->rs_logical_mappings);
|
hash_seq_init(&seq_status, state->rs_logical_mappings);
|
||||||
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
|
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
|
||||||
{
|
{
|
||||||
if(FileSync(src->vfd) != 0)
|
if (FileSync(src->vfd) != 0)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not fsync file \"%s\": %m", src->path)));
|
errmsg("could not fsync file \"%s\": %m", src->path)));
|
||||||
|
|
@ -1041,7 +1041,7 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
|
||||||
* Write out buffer every time we've too many in-memory entries across all
|
* Write out buffer every time we've too many in-memory entries across all
|
||||||
* mapping files.
|
* mapping files.
|
||||||
*/
|
*/
|
||||||
if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */)
|
if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */ )
|
||||||
logical_heap_rewrite_flush_mappings(state);
|
logical_heap_rewrite_flush_mappings(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1148,6 +1148,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not create file \"%s\": %m", path)));
|
errmsg("could not create file \"%s\": %m", path)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Truncate all data that's not guaranteed to have been safely fsynced (by
|
* Truncate all data that's not guaranteed to have been safely fsynced (by
|
||||||
* previous record or by the last checkpoint).
|
* previous record or by the last checkpoint).
|
||||||
|
|
@ -1174,6 +1175,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not write to file \"%s\": %m", path)));
|
errmsg("could not write to file \"%s\": %m", path)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now fsync all previously written data. We could improve things and only
|
* Now fsync all previously written data. We could improve things and only
|
||||||
* do this for the last write to a file, but the required bookkeeping
|
* do this for the last write to a file, but the required bookkeeping
|
||||||
|
|
@ -1228,7 +1230,8 @@ CheckPointLogicalRewriteHeap(void)
|
||||||
XLogRecPtr lsn;
|
XLogRecPtr lsn;
|
||||||
TransactionId rewrite_xid;
|
TransactionId rewrite_xid;
|
||||||
TransactionId create_xid;
|
TransactionId create_xid;
|
||||||
uint32 hi, lo;
|
uint32 hi,
|
||||||
|
lo;
|
||||||
|
|
||||||
if (strcmp(mapping_de->d_name, ".") == 0 ||
|
if (strcmp(mapping_de->d_name, ".") == 0 ||
|
||||||
strcmp(mapping_de->d_name, "..") == 0)
|
strcmp(mapping_de->d_name, "..") == 0)
|
||||||
|
|
@ -1244,7 +1247,7 @@ CheckPointLogicalRewriteHeap(void)
|
||||||
|
|
||||||
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
|
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
|
||||||
&dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
|
&dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
|
||||||
elog(ERROR,"could not parse filename \"%s\"", mapping_de->d_name);
|
elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
|
||||||
|
|
||||||
lsn = ((uint64) hi) << 32 | lo;
|
lsn = ((uint64) hi) << 32 | lo;
|
||||||
|
|
||||||
|
|
@ -1269,6 +1272,7 @@ CheckPointLogicalRewriteHeap(void)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not open file \"%s\": %m", path)));
|
errmsg("could not open file \"%s\": %m", path)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We could try to avoid fsyncing files that either haven't
|
* We could try to avoid fsyncing files that either haven't
|
||||||
* changed or have only been created since the checkpoint's start,
|
* changed or have only been created since the checkpoint's start,
|
||||||
|
|
|
||||||
|
|
@ -91,8 +91,9 @@ heap_tuple_fetch_attr(struct varlena * attr)
|
||||||
* to persist a Datum for unusually long time, like in a HOLD cursor.
|
* to persist a Datum for unusually long time, like in a HOLD cursor.
|
||||||
*/
|
*/
|
||||||
struct varatt_indirect redirect;
|
struct varatt_indirect redirect;
|
||||||
|
|
||||||
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
||||||
attr = (struct varlena *)redirect.pointer;
|
attr = (struct varlena *) redirect.pointer;
|
||||||
|
|
||||||
/* nested indirect Datums aren't allowed */
|
/* nested indirect Datums aren't allowed */
|
||||||
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
|
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
|
||||||
|
|
@ -147,8 +148,9 @@ heap_tuple_untoast_attr(struct varlena * attr)
|
||||||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||||
{
|
{
|
||||||
struct varatt_indirect redirect;
|
struct varatt_indirect redirect;
|
||||||
|
|
||||||
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
||||||
attr = (struct varlena *)redirect.pointer;
|
attr = (struct varlena *) redirect.pointer;
|
||||||
|
|
||||||
/* nested indirect Datums aren't allowed */
|
/* nested indirect Datums aren't allowed */
|
||||||
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
|
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
|
||||||
|
|
@ -217,6 +219,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
|
||||||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||||
{
|
{
|
||||||
struct varatt_indirect redirect;
|
struct varatt_indirect redirect;
|
||||||
|
|
||||||
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
||||||
|
|
||||||
/* nested indirect Datums aren't allowed */
|
/* nested indirect Datums aren't allowed */
|
||||||
|
|
@ -299,6 +302,7 @@ toast_raw_datum_size(Datum value)
|
||||||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||||
{
|
{
|
||||||
struct varatt_indirect toast_pointer;
|
struct varatt_indirect toast_pointer;
|
||||||
|
|
||||||
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
|
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
|
||||||
|
|
||||||
/* nested indirect Datums aren't allowed */
|
/* nested indirect Datums aren't allowed */
|
||||||
|
|
@ -354,6 +358,7 @@ toast_datum_size(Datum value)
|
||||||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||||
{
|
{
|
||||||
struct varatt_indirect toast_pointer;
|
struct varatt_indirect toast_pointer;
|
||||||
|
|
||||||
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
|
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
|
||||||
|
|
||||||
/* nested indirect Datums aren't allowed */
|
/* nested indirect Datums aren't allowed */
|
||||||
|
|
@ -2127,6 +2132,7 @@ toast_open_indexes(Relation toastrel,
|
||||||
for (i = 0; i < *num_indexes; i++)
|
for (i = 0; i < *num_indexes; i++)
|
||||||
{
|
{
|
||||||
Relation toastidx = (*toastidxs)[i];
|
Relation toastidx = (*toastidxs)[i];
|
||||||
|
|
||||||
if (toastidx->rd_index->indisvalid)
|
if (toastidx->rd_index->indisvalid)
|
||||||
{
|
{
|
||||||
res = i;
|
res = i;
|
||||||
|
|
@ -2136,14 +2142,14 @@ toast_open_indexes(Relation toastrel,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free index list, not necessary anymore as relations are opened
|
* Free index list, not necessary anymore as relations are opened and a
|
||||||
* and a valid index has been found.
|
* valid index has been found.
|
||||||
*/
|
*/
|
||||||
list_free(indexlist);
|
list_free(indexlist);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The toast relation should have one valid index, so something is
|
* The toast relation should have one valid index, so something is going
|
||||||
* going wrong if there is nothing.
|
* wrong if there is nothing.
|
||||||
*/
|
*/
|
||||||
if (!found)
|
if (!found)
|
||||||
elog(ERROR, "no valid index found for toast relation with Oid %d",
|
elog(ERROR, "no valid index found for toast relation with Oid %d",
|
||||||
|
|
|
||||||
|
|
@ -620,10 +620,10 @@ _bt_findinsertloc(Relation rel,
|
||||||
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this page was incompletely split, finish the split now.
|
* If this page was incompletely split, finish the split now. We
|
||||||
* We do this while holding a lock on the left sibling, which
|
* do this while holding a lock on the left sibling, which is not
|
||||||
* is not good because finishing the split could be a fairly
|
* good because finishing the split could be a fairly lengthy
|
||||||
* lengthy operation. But this should happen very seldom.
|
* operation. But this should happen very seldom.
|
||||||
*/
|
*/
|
||||||
if (P_INCOMPLETE_SPLIT(lpageop))
|
if (P_INCOMPLETE_SPLIT(lpageop))
|
||||||
{
|
{
|
||||||
|
|
@ -1330,11 +1330,10 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||||
lastrdata++;
|
lastrdata++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Although we don't need to WAL-log anything on the left page,
|
* Although we don't need to WAL-log anything on the left page, we
|
||||||
* we still need XLogInsert to consider storing a full-page image
|
* still need XLogInsert to consider storing a full-page image of
|
||||||
* of the left page, so make an empty entry referencing that
|
* the left page, so make an empty entry referencing that buffer.
|
||||||
* buffer. This also ensures that the left page is always backup
|
* This also ensures that the left page is always backup block 1.
|
||||||
* block 1.
|
|
||||||
*/
|
*/
|
||||||
lastrdata->data = NULL;
|
lastrdata->data = NULL;
|
||||||
lastrdata->len = 0;
|
lastrdata->len = 0;
|
||||||
|
|
|
||||||
|
|
@ -1049,11 +1049,12 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
|
||||||
lbuf = _bt_getbuf(rel, leftsib, BT_READ);
|
lbuf = _bt_getbuf(rel, leftsib, BT_READ);
|
||||||
lpage = BufferGetPage(lbuf);
|
lpage = BufferGetPage(lbuf);
|
||||||
lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
|
lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the left sibling was concurrently split, so that its
|
* If the left sibling was concurrently split, so that its
|
||||||
* next-pointer doesn't point to the current page anymore,
|
* next-pointer doesn't point to the current page anymore, the
|
||||||
* the split that created the current page must be completed.
|
* split that created the current page must be completed. (We
|
||||||
* (We don't allow splitting an incompletely split page again
|
* don't allow splitting an incompletely split page again
|
||||||
* until the previous split has been completed)
|
* until the previous split has been completed)
|
||||||
*/
|
*/
|
||||||
if (lopaque->btpo_next == parent &&
|
if (lopaque->btpo_next == parent &&
|
||||||
|
|
@ -1112,6 +1113,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
bool rightsib_empty;
|
bool rightsib_empty;
|
||||||
Page page;
|
Page page;
|
||||||
BTPageOpaque opaque;
|
BTPageOpaque opaque;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* "stack" is a search stack leading (approximately) to the target page.
|
* "stack" is a search stack leading (approximately) to the target page.
|
||||||
* It is initially NULL, but when iterating, we keep it to avoid
|
* It is initially NULL, but when iterating, we keep it to avoid
|
||||||
|
|
@ -1140,10 +1142,10 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
* was never supposed to leave half-dead pages in the tree, it was
|
* was never supposed to leave half-dead pages in the tree, it was
|
||||||
* just a transient state, but it was nevertheless possible in
|
* just a transient state, but it was nevertheless possible in
|
||||||
* error scenarios. We don't know how to deal with them here. They
|
* error scenarios. We don't know how to deal with them here. They
|
||||||
* are harmless as far as searches are considered, but inserts into
|
* are harmless as far as searches are considered, but inserts
|
||||||
* the deleted keyspace could add out-of-order downlinks in the
|
* into the deleted keyspace could add out-of-order downlinks in
|
||||||
* upper levels. Log a notice, hopefully the admin will notice and
|
* the upper levels. Log a notice, hopefully the admin will notice
|
||||||
* reindex.
|
* and reindex.
|
||||||
*/
|
*/
|
||||||
if (P_ISHALFDEAD(opaque))
|
if (P_ISHALFDEAD(opaque))
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
|
|
@ -1156,8 +1158,8 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can never delete rightmost pages nor root pages. While at
|
* We can never delete rightmost pages nor root pages. While at it,
|
||||||
* it, check that page is not already deleted and is empty.
|
* check that page is not already deleted and is empty.
|
||||||
*
|
*
|
||||||
* To keep the algorithm simple, we also never delete an incompletely
|
* To keep the algorithm simple, we also never delete an incompletely
|
||||||
* split page (they should be rare enough that this doesn't make any
|
* split page (they should be rare enough that this doesn't make any
|
||||||
|
|
@ -1167,10 +1169,10 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
* left half of an incomplete split, but ensuring that it's not the
|
* left half of an incomplete split, but ensuring that it's not the
|
||||||
* right half is more complicated. For that, we have to check that
|
* right half is more complicated. For that, we have to check that
|
||||||
* the left sibling doesn't have its INCOMPLETE_SPLIT flag set. On
|
* the left sibling doesn't have its INCOMPLETE_SPLIT flag set. On
|
||||||
* the first iteration, we temporarily release the lock on the
|
* the first iteration, we temporarily release the lock on the current
|
||||||
* current page, and check the left sibling and also construct a
|
* page, and check the left sibling and also construct a search stack
|
||||||
* search stack to. On subsequent iterations, we know we stepped right
|
* to. On subsequent iterations, we know we stepped right from a page
|
||||||
* from a page that passed these tests, so it's OK.
|
* that passed these tests, so it's OK.
|
||||||
*/
|
*/
|
||||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
|
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
|
||||||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
|
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
|
||||||
|
|
@ -1184,9 +1186,9 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, remove downlink pointing to the page (or a parent of the page,
|
* First, remove downlink pointing to the page (or a parent of the
|
||||||
* if we are going to delete a taller branch), and mark the page as
|
* page, if we are going to delete a taller branch), and mark the page
|
||||||
* half-dead.
|
* as half-dead.
|
||||||
*/
|
*/
|
||||||
if (!P_ISHALFDEAD(opaque))
|
if (!P_ISHALFDEAD(opaque))
|
||||||
{
|
{
|
||||||
|
|
@ -1219,9 +1221,9 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fetch the left sibling, to check that it's not marked
|
* Fetch the left sibling, to check that it's not marked with
|
||||||
* with INCOMPLETE_SPLIT flag. That would mean that the
|
* INCOMPLETE_SPLIT flag. That would mean that the page
|
||||||
* page to-be-deleted doesn't have a downlink, and the page
|
* to-be-deleted doesn't have a downlink, and the page
|
||||||
* deletion algorithm isn't prepared to handle that.
|
* deletion algorithm isn't prepared to handle that.
|
||||||
*/
|
*/
|
||||||
if (!P_LEFTMOST(opaque))
|
if (!P_LEFTMOST(opaque))
|
||||||
|
|
@ -1267,7 +1269,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Then unlink it from its siblings. Each call to
|
* Then unlink it from its siblings. Each call to
|
||||||
*_bt_unlink_halfdead_page unlinks the topmost page from the branch,
|
* _bt_unlink_halfdead_page unlinks the topmost page from the branch,
|
||||||
* making it shallower. Iterate until the leaf page is gone.
|
* making it shallower. Iterate until the leaf page is gone.
|
||||||
*/
|
*/
|
||||||
rightsib_empty = false;
|
rightsib_empty = false;
|
||||||
|
|
@ -1291,8 +1293,8 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||||
* is that it was the rightmost child of the parent. Now that we
|
* is that it was the rightmost child of the parent. Now that we
|
||||||
* removed the downlink for this page, the right sibling might now be
|
* removed the downlink for this page, the right sibling might now be
|
||||||
* the only child of the parent, and could be removed. It would be
|
* the only child of the parent, and could be removed. It would be
|
||||||
* picked up by the next vacuum anyway, but might as well try to remove
|
* picked up by the next vacuum anyway, but might as well try to
|
||||||
* it now, so loop back to process the right sibling.
|
* remove it now, so loop back to process the right sibling.
|
||||||
*/
|
*/
|
||||||
if (!rightsib_empty)
|
if (!rightsib_empty)
|
||||||
break;
|
break;
|
||||||
|
|
@ -1605,9 +1607,9 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
||||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check page is still empty etc, else abandon deletion. This is just
|
* Check page is still empty etc, else abandon deletion. This is just for
|
||||||
* for paranoia's sake; a half-dead page cannot resurrect because there
|
* paranoia's sake; a half-dead page cannot resurrect because there can be
|
||||||
* can be only one vacuum process running at a time.
|
* only one vacuum process running at a time.
|
||||||
*/
|
*/
|
||||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
|
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -40,9 +40,9 @@ _bt_restore_page(Page page, char *from, int len)
|
||||||
int nitems;
|
int nitems;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To get the items back in the original order, we add them to the page
|
* To get the items back in the original order, we add them to the page in
|
||||||
* in reverse. To figure out where one tuple ends and another begins,
|
* reverse. To figure out where one tuple ends and another begins, we
|
||||||
* we have to scan them in forward order first.
|
* have to scan them in forward order first.
|
||||||
*/
|
*/
|
||||||
i = 0;
|
i = 0;
|
||||||
while (from < end)
|
while (from < end)
|
||||||
|
|
@ -128,6 +128,7 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
|
||||||
if (lsn > PageGetLSN(page))
|
if (lsn > PageGetLSN(page))
|
||||||
{
|
{
|
||||||
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
|
||||||
Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
|
Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
|
||||||
pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
|
pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
|
||||||
|
|
||||||
|
|
@ -153,6 +154,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||||
|
|
||||||
datapos = (char *) xlrec + SizeOfBtreeInsert;
|
datapos = (char *) xlrec + SizeOfBtreeInsert;
|
||||||
datalen = record->xl_len - SizeOfBtreeInsert;
|
datalen = record->xl_len - SizeOfBtreeInsert;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if this insert finishes a split at lower level, extract the block
|
* if this insert finishes a split at lower level, extract the block
|
||||||
* number of the (left) child.
|
* number of the (left) child.
|
||||||
|
|
@ -172,10 +174,10 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insertion to an internal page finishes an incomplete split at the
|
* Insertion to an internal page finishes an incomplete split at the child
|
||||||
* child level. Clear the incomplete-split flag in the child. Note:
|
* level. Clear the incomplete-split flag in the child. Note: during
|
||||||
* during normal operation, the child and parent pages are locked at the
|
* normal operation, the child and parent pages are locked at the same
|
||||||
* same time, so that clearing the flag and inserting the downlink appear
|
* time, so that clearing the flag and inserting the downlink appear
|
||||||
* atomic to other backends. We don't bother with that during replay,
|
* atomic to other backends. We don't bother with that during replay,
|
||||||
* because readers don't care about the incomplete-split flag and there
|
* because readers don't care about the incomplete-split flag and there
|
||||||
* cannot be updates happening.
|
* cannot be updates happening.
|
||||||
|
|
@ -279,9 +281,10 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||||
datapos += left_hikeysz;
|
datapos += left_hikeysz;
|
||||||
datalen -= left_hikeysz;
|
datalen -= left_hikeysz;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this insertion finishes an incomplete split, get the block number
|
* If this insertion finishes an incomplete split, get the block number of
|
||||||
* of the child.
|
* the child.
|
||||||
*/
|
*/
|
||||||
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
|
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
|
||||||
{
|
{
|
||||||
|
|
@ -716,9 +719,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||||
/*
|
/*
|
||||||
* If all heap tuples were LP_DEAD then we will be returning
|
* If all heap tuples were LP_DEAD then we will be returning
|
||||||
* InvalidTransactionId here, which avoids conflicts. This matches
|
* InvalidTransactionId here, which avoids conflicts. This matches
|
||||||
* existing logic which assumes that LP_DEAD tuples must already be
|
* existing logic which assumes that LP_DEAD tuples must already be older
|
||||||
* older than the latestRemovedXid on the cleanup record that
|
* than the latestRemovedXid on the cleanup record that set them as
|
||||||
* set them as LP_DEAD, hence must already have generated a conflict.
|
* LP_DEAD, hence must already have generated a conflict.
|
||||||
*/
|
*/
|
||||||
return latestRemovedXid;
|
return latestRemovedXid;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
|
||||||
walbuf += nitems * sizeof(ItemPointerData);
|
walbuf += nitems * sizeof(ItemPointerData);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch(a_action)
|
switch (a_action)
|
||||||
{
|
{
|
||||||
case GIN_SEGMENT_ADDITEMS:
|
case GIN_SEGMENT_ADDITEMS:
|
||||||
appendStringInfo(buf, " %d (add %d items)", a_segno, nitems);
|
appendStringInfo(buf, " %d (add %d items)", a_segno, nitems);
|
||||||
|
|
@ -129,6 +129,7 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ginxlogInsertDataInternal *insertData = (ginxlogInsertDataInternal *) payload;
|
ginxlogInsertDataInternal *insertData = (ginxlogInsertDataInternal *) payload;
|
||||||
|
|
||||||
appendStringInfo(buf, " pitem: %u-%u/%u",
|
appendStringInfo(buf, " pitem: %u-%u/%u",
|
||||||
PostingItemGetBlockNumber(&insertData->newitem),
|
PostingItemGetBlockNumber(&insertData->newitem),
|
||||||
ItemPointerGetBlockNumber(&insertData->newitem.key),
|
ItemPointerGetBlockNumber(&insertData->newitem.key),
|
||||||
|
|
@ -155,6 +156,7 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||||
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
|
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
|
||||||
{
|
{
|
||||||
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
|
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
|
||||||
|
|
||||||
appendStringInfoString(buf, "Vacuum data leaf page, ");
|
appendStringInfoString(buf, "Vacuum data leaf page, ");
|
||||||
desc_node(buf, xlrec->node, xlrec->blkno);
|
desc_node(buf, xlrec->node, xlrec->blkno);
|
||||||
if (xl_info & XLR_BKP_BLOCK(0))
|
if (xl_info & XLR_BKP_BLOCK(0))
|
||||||
|
|
|
||||||
|
|
@ -579,9 +579,9 @@ MultiXactIdSetOldestMember(void)
|
||||||
* back. Which would be wrong.
|
* back. Which would be wrong.
|
||||||
*
|
*
|
||||||
* Note that a shared lock is sufficient, because it's enough to stop
|
* Note that a shared lock is sufficient, because it's enough to stop
|
||||||
* someone from advancing nextMXact; and nobody else could be trying to
|
* someone from advancing nextMXact; and nobody else could be trying
|
||||||
* write to our OldestMember entry, only reading (and we assume storing
|
* to write to our OldestMember entry, only reading (and we assume
|
||||||
* it is atomic.)
|
* storing it is atomic.)
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(MultiXactGenLock, LW_SHARED);
|
LWLockAcquire(MultiXactGenLock, LW_SHARED);
|
||||||
|
|
||||||
|
|
@ -2399,8 +2399,8 @@ SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage,
|
||||||
return false; /* easy case out */
|
return false; /* easy case out */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To ensure that no segment is spuriously removed, we must keep track
|
* To ensure that no segment is spuriously removed, we must keep track of
|
||||||
* of new segments added since the start of the directory scan; to do this,
|
* new segments added since the start of the directory scan; to do this,
|
||||||
* we update our end-of-range point as we run.
|
* we update our end-of-range point as we run.
|
||||||
*
|
*
|
||||||
* As an optimization, we can skip looking at shared memory if we know for
|
* As an optimization, we can skip looking at shared memory if we know for
|
||||||
|
|
|
||||||
|
|
@ -487,8 +487,8 @@ AssignTransactionId(TransactionState s)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When wal_level=logical, guarantee that a subtransaction's xid can only
|
* When wal_level=logical, guarantee that a subtransaction's xid can only
|
||||||
* be seen in the WAL stream if its toplevel xid has been logged
|
* be seen in the WAL stream if its toplevel xid has been logged before.
|
||||||
* before. If necessary we log a xact_assignment record with fewer than
|
* If necessary we log a xact_assignment record with fewer than
|
||||||
* PGPROC_MAX_CACHED_SUBXIDS. Note that it is fine if didLogXid isn't set
|
* PGPROC_MAX_CACHED_SUBXIDS. Note that it is fine if didLogXid isn't set
|
||||||
* for a transaction even though it appears in a WAL record, we just might
|
* for a transaction even though it appears in a WAL record, we just might
|
||||||
* superfluously log something. That can happen when an xid is included
|
* superfluously log something. That can happen when an xid is included
|
||||||
|
|
|
||||||
|
|
@ -418,11 +418,11 @@ typedef struct XLogCtlInsert
|
||||||
slock_t insertpos_lck; /* protects CurrBytePos and PrevBytePos */
|
slock_t insertpos_lck; /* protects CurrBytePos and PrevBytePos */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CurrBytePos is the end of reserved WAL. The next record will be inserted
|
* CurrBytePos is the end of reserved WAL. The next record will be
|
||||||
* at that position. PrevBytePos is the start position of the previously
|
* inserted at that position. PrevBytePos is the start position of the
|
||||||
* inserted (or rather, reserved) record - it is copied to the prev-link
|
* previously inserted (or rather, reserved) record - it is copied to the
|
||||||
* of the next record. These are stored as "usable byte positions" rather
|
* prev-link of the next record. These are stored as "usable byte
|
||||||
* than XLogRecPtrs (see XLogBytePosToRecPtr()).
|
* positions" rather than XLogRecPtrs (see XLogBytePosToRecPtr()).
|
||||||
*/
|
*/
|
||||||
uint64 CurrBytePos;
|
uint64 CurrBytePos;
|
||||||
uint64 PrevBytePos;
|
uint64 PrevBytePos;
|
||||||
|
|
@ -504,10 +504,11 @@ typedef struct XLogCtlData
|
||||||
* Latest initialized page in the cache (last byte position + 1).
|
* Latest initialized page in the cache (last byte position + 1).
|
||||||
*
|
*
|
||||||
* To change the identity of a buffer (and InitializedUpTo), you need to
|
* To change the identity of a buffer (and InitializedUpTo), you need to
|
||||||
* hold WALBufMappingLock. To change the identity of a buffer that's still
|
* hold WALBufMappingLock. To change the identity of a buffer that's
|
||||||
* dirty, the old page needs to be written out first, and for that you
|
* still dirty, the old page needs to be written out first, and for that
|
||||||
* need WALWriteLock, and you need to ensure that there are no in-progress
|
* you need WALWriteLock, and you need to ensure that there are no
|
||||||
* insertions to the page by calling WaitXLogInsertionsToFinish().
|
* in-progress insertions to the page by calling
|
||||||
|
* WaitXLogInsertionsToFinish().
|
||||||
*/
|
*/
|
||||||
XLogRecPtr InitializedUpTo;
|
XLogRecPtr InitializedUpTo;
|
||||||
|
|
||||||
|
|
@ -860,6 +861,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
|
||||||
if (rechdr == NULL)
|
if (rechdr == NULL)
|
||||||
{
|
{
|
||||||
static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
|
static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
|
||||||
|
|
||||||
rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
|
rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
|
||||||
MemSet(rechdr, 0, SizeOfXLogRecord);
|
MemSet(rechdr, 0, SizeOfXLogRecord);
|
||||||
}
|
}
|
||||||
|
|
@ -1232,6 +1234,7 @@ begin:;
|
||||||
{
|
{
|
||||||
TRACE_POSTGRESQL_XLOG_SWITCH();
|
TRACE_POSTGRESQL_XLOG_SWITCH();
|
||||||
XLogFlush(EndPos);
|
XLogFlush(EndPos);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Even though we reserved the rest of the segment for us, which is
|
* Even though we reserved the rest of the segment for us, which is
|
||||||
* reflected in EndPos, we return a pointer to just the end of the
|
* reflected in EndPos, we return a pointer to just the end of the
|
||||||
|
|
@ -1272,7 +1275,7 @@ begin:;
|
||||||
rdt_lastnormal->next = NULL;
|
rdt_lastnormal->next = NULL;
|
||||||
|
|
||||||
initStringInfo(&recordbuf);
|
initStringInfo(&recordbuf);
|
||||||
for (;rdata != NULL; rdata = rdata->next)
|
for (; rdata != NULL; rdata = rdata->next)
|
||||||
appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
|
appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
|
||||||
|
|
||||||
appendStringInfoString(&buf, " - ");
|
appendStringInfoString(&buf, " - ");
|
||||||
|
|
@ -1514,8 +1517,8 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this was an xlog-switch, it's not enough to write the switch record,
|
* If this was an xlog-switch, it's not enough to write the switch record,
|
||||||
* we also have to consume all the remaining space in the WAL segment.
|
* we also have to consume all the remaining space in the WAL segment. We
|
||||||
* We have already reserved it for us, but we still need to make sure it's
|
* have already reserved it for us, but we still need to make sure it's
|
||||||
* allocated and zeroed in the WAL buffers so that when the caller (or
|
* allocated and zeroed in the WAL buffers so that when the caller (or
|
||||||
* someone else) does XLogWrite(), it can really write out all the zeros.
|
* someone else) does XLogWrite(), it can really write out all the zeros.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1556,14 +1559,14 @@ WALInsertLockAcquire(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It doesn't matter which of the WAL insertion locks we acquire, so try
|
* It doesn't matter which of the WAL insertion locks we acquire, so try
|
||||||
* the one we used last time. If the system isn't particularly busy,
|
* the one we used last time. If the system isn't particularly busy, it's
|
||||||
* it's a good bet that it's still available, and it's good to have some
|
* a good bet that it's still available, and it's good to have some
|
||||||
* affinity to a particular lock so that you don't unnecessarily bounce
|
* affinity to a particular lock so that you don't unnecessarily bounce
|
||||||
* cache lines between processes when there's no contention.
|
* cache lines between processes when there's no contention.
|
||||||
*
|
*
|
||||||
* If this is the first time through in this backend, pick a lock
|
* If this is the first time through in this backend, pick a lock
|
||||||
* (semi-)randomly. This allows the locks to be used evenly if you have
|
* (semi-)randomly. This allows the locks to be used evenly if you have a
|
||||||
* a lot of very short connections.
|
* lot of very short connections.
|
||||||
*/
|
*/
|
||||||
static int lockToTry = -1;
|
static int lockToTry = -1;
|
||||||
|
|
||||||
|
|
@ -1583,10 +1586,10 @@ WALInsertLockAcquire(void)
|
||||||
/*
|
/*
|
||||||
* If we couldn't get the lock immediately, try another lock next
|
* If we couldn't get the lock immediately, try another lock next
|
||||||
* time. On a system with more insertion locks than concurrent
|
* time. On a system with more insertion locks than concurrent
|
||||||
* inserters, this causes all the inserters to eventually migrate
|
* inserters, this causes all the inserters to eventually migrate to a
|
||||||
* to a lock that no-one else is using. On a system with more
|
* lock that no-one else is using. On a system with more inserters
|
||||||
* inserters than locks, it still helps to distribute the inserters
|
* than locks, it still helps to distribute the inserters evenly
|
||||||
* evenly across the locks.
|
* across the locks.
|
||||||
*/
|
*/
|
||||||
lockToTry = (lockToTry + 1) % num_xloginsert_locks;
|
lockToTry = (lockToTry + 1) % num_xloginsert_locks;
|
||||||
}
|
}
|
||||||
|
|
@ -1604,8 +1607,8 @@ WALInsertLockAcquireExclusive(void)
|
||||||
/*
|
/*
|
||||||
* When holding all the locks, we only update the last lock's insertingAt
|
* When holding all the locks, we only update the last lock's insertingAt
|
||||||
* indicator. The others are set to 0xFFFFFFFFFFFFFFFF, which is higher
|
* indicator. The others are set to 0xFFFFFFFFFFFFFFFF, which is higher
|
||||||
* than any real XLogRecPtr value, to make sure that no-one blocks
|
* than any real XLogRecPtr value, to make sure that no-one blocks waiting
|
||||||
* waiting on those.
|
* on those.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < num_xloginsert_locks - 1; i++)
|
for (i = 0; i < num_xloginsert_locks - 1; i++)
|
||||||
{
|
{
|
||||||
|
|
@ -1716,15 +1719,16 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto)
|
||||||
* Loop through all the locks, sleeping on any in-progress insert older
|
* Loop through all the locks, sleeping on any in-progress insert older
|
||||||
* than 'upto'.
|
* than 'upto'.
|
||||||
*
|
*
|
||||||
* finishedUpto is our return value, indicating the point upto which
|
* finishedUpto is our return value, indicating the point upto which all
|
||||||
* all the WAL insertions have been finished. Initialize it to the head
|
* the WAL insertions have been finished. Initialize it to the head of
|
||||||
* of reserved WAL, and as we iterate through the insertion locks, back it
|
* reserved WAL, and as we iterate through the insertion locks, back it
|
||||||
* out for any insertion that's still in progress.
|
* out for any insertion that's still in progress.
|
||||||
*/
|
*/
|
||||||
finishedUpto = reservedUpto;
|
finishedUpto = reservedUpto;
|
||||||
for (i = 0; i < num_xloginsert_locks; i++)
|
for (i = 0; i < num_xloginsert_locks; i++)
|
||||||
{
|
{
|
||||||
XLogRecPtr insertingat = InvalidXLogRecPtr;
|
XLogRecPtr insertingat = InvalidXLogRecPtr;
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
|
@ -1797,9 +1801,9 @@ GetXLogBuffer(XLogRecPtr ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The XLog buffer cache is organized so that a page is always loaded
|
* The XLog buffer cache is organized so that a page is always loaded to a
|
||||||
* to a particular buffer. That way we can easily calculate the buffer
|
* particular buffer. That way we can easily calculate the buffer a given
|
||||||
* a given page must be loaded into, from the XLogRecPtr alone.
|
* page must be loaded into, from the XLogRecPtr alone.
|
||||||
*/
|
*/
|
||||||
idx = XLogRecPtrToBufIdx(ptr);
|
idx = XLogRecPtrToBufIdx(ptr);
|
||||||
|
|
||||||
|
|
@ -1827,8 +1831,8 @@ GetXLogBuffer(XLogRecPtr ptr)
|
||||||
if (expectedEndPtr != endptr)
|
if (expectedEndPtr != endptr)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Let others know that we're finished inserting the record up
|
* Let others know that we're finished inserting the record up to the
|
||||||
* to the page boundary.
|
* page boundary.
|
||||||
*/
|
*/
|
||||||
WALInsertLockUpdateInsertingAt(expectedEndPtr - XLOG_BLCKSZ);
|
WALInsertLockUpdateInsertingAt(expectedEndPtr - XLOG_BLCKSZ);
|
||||||
|
|
||||||
|
|
@ -1837,7 +1841,7 @@ GetXLogBuffer(XLogRecPtr ptr)
|
||||||
|
|
||||||
if (expectedEndPtr != endptr)
|
if (expectedEndPtr != endptr)
|
||||||
elog(PANIC, "could not find WAL buffer for %X/%X",
|
elog(PANIC, "could not find WAL buffer for %X/%X",
|
||||||
(uint32) (ptr >> 32) , (uint32) ptr);
|
(uint32) (ptr >> 32), (uint32) ptr);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -2170,8 +2174,8 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now the next buffer slot is free and we can set it up to be the next
|
* Now the next buffer slot is free and we can set it up to be the
|
||||||
* output page.
|
* next output page.
|
||||||
*/
|
*/
|
||||||
NewPageBeginPtr = XLogCtl->InitializedUpTo;
|
NewPageBeginPtr = XLogCtl->InitializedUpTo;
|
||||||
NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
|
NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
|
||||||
|
|
@ -2194,6 +2198,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
|
||||||
/* NewPage->xlp_info = 0; */ /* done by memset */
|
/* NewPage->xlp_info = 0; */ /* done by memset */
|
||||||
NewPage ->xlp_tli = ThisTimeLineID;
|
NewPage ->xlp_tli = ThisTimeLineID;
|
||||||
NewPage ->xlp_pageaddr = NewPageBeginPtr;
|
NewPage ->xlp_pageaddr = NewPageBeginPtr;
|
||||||
|
|
||||||
/* NewPage->xlp_rem_len = 0; */ /* done by memset */
|
/* NewPage->xlp_rem_len = 0; */ /* done by memset */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -2202,12 +2207,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
|
||||||
* blocks. This allows the WAL archiver to know whether it is safe to
|
* blocks. This allows the WAL archiver to know whether it is safe to
|
||||||
* compress archived WAL data by transforming full-block records into
|
* compress archived WAL data by transforming full-block records into
|
||||||
* the non-full-block format. It is sufficient to record this at the
|
* the non-full-block format. It is sufficient to record this at the
|
||||||
* page level because we force a page switch (in fact a segment switch)
|
* page level because we force a page switch (in fact a segment
|
||||||
* when starting a backup, so the flag will be off before any records
|
* switch) when starting a backup, so the flag will be off before any
|
||||||
* can be written during the backup. At the end of a backup, the last
|
* records can be written during the backup. At the end of a backup,
|
||||||
* page will be marked as all unsafe when perhaps only part is unsafe,
|
* the last page will be marked as all unsafe when perhaps only part
|
||||||
* but at worst the archiver would miss the opportunity to compress a
|
* is unsafe, but at worst the archiver would miss the opportunity to
|
||||||
* few records.
|
* compress a few records.
|
||||||
*/
|
*/
|
||||||
if (!Insert->forcePageWrites)
|
if (!Insert->forcePageWrites)
|
||||||
NewPage ->xlp_info |= XLP_BKP_REMOVABLE;
|
NewPage ->xlp_info |= XLP_BKP_REMOVABLE;
|
||||||
|
|
@ -2330,6 +2335,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
||||||
* last page that's been initialized by AdvanceXLInsertBuffer.
|
* last page that's been initialized by AdvanceXLInsertBuffer.
|
||||||
*/
|
*/
|
||||||
XLogRecPtr EndPtr = XLogCtl->xlblocks[curridx];
|
XLogRecPtr EndPtr = XLogCtl->xlblocks[curridx];
|
||||||
|
|
||||||
if (LogwrtResult.Write >= EndPtr)
|
if (LogwrtResult.Write >= EndPtr)
|
||||||
elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
|
elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
|
||||||
(uint32) (LogwrtResult.Write >> 32),
|
(uint32) (LogwrtResult.Write >> 32),
|
||||||
|
|
@ -2617,6 +2623,7 @@ XLogGetReplicationSlotMinimumLSN(void)
|
||||||
/* use volatile pointer to prevent code rearrangement */
|
/* use volatile pointer to prevent code rearrangement */
|
||||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||||
XLogRecPtr retval;
|
XLogRecPtr retval;
|
||||||
|
|
||||||
SpinLockAcquire(&xlogctl->info_lck);
|
SpinLockAcquire(&xlogctl->info_lck);
|
||||||
retval = xlogctl->replicationSlotMinLSN;
|
retval = xlogctl->replicationSlotMinLSN;
|
||||||
SpinLockRelease(&xlogctl->info_lck);
|
SpinLockRelease(&xlogctl->info_lck);
|
||||||
|
|
@ -3828,6 +3835,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr)
|
||||||
xlde->d_name)));
|
xlde->d_name)));
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On Windows, if another process (e.g another backend)
|
* On Windows, if another process (e.g another backend)
|
||||||
* holds the file open in FILE_SHARE_DELETE mode, unlink
|
* holds the file open in FILE_SHARE_DELETE mode, unlink
|
||||||
|
|
@ -4815,7 +4823,7 @@ XLOGShmemInit(void)
|
||||||
|
|
||||||
/* WAL insertion locks. Ensure they're aligned to the full padded size */
|
/* WAL insertion locks. Ensure they're aligned to the full padded size */
|
||||||
allocptr += sizeof(WALInsertLockPadded) -
|
allocptr += sizeof(WALInsertLockPadded) -
|
||||||
((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
|
((uintptr_t) allocptr) %sizeof(WALInsertLockPadded);
|
||||||
WALInsertLocks = XLogCtl->Insert.WALInsertLocks =
|
WALInsertLocks = XLogCtl->Insert.WALInsertLocks =
|
||||||
(WALInsertLockPadded *) allocptr;
|
(WALInsertLockPadded *) allocptr;
|
||||||
allocptr += sizeof(WALInsertLockPadded) * num_xloginsert_locks;
|
allocptr += sizeof(WALInsertLockPadded) * num_xloginsert_locks;
|
||||||
|
|
@ -4836,8 +4844,8 @@ XLOGShmemInit(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Align the start of the page buffers to a full xlog block size boundary.
|
* Align the start of the page buffers to a full xlog block size boundary.
|
||||||
* This simplifies some calculations in XLOG insertion. It is also required
|
* This simplifies some calculations in XLOG insertion. It is also
|
||||||
* for O_DIRECT.
|
* required for O_DIRECT.
|
||||||
*/
|
*/
|
||||||
allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
|
allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
|
||||||
XLogCtl->pages = allocptr;
|
XLogCtl->pages = allocptr;
|
||||||
|
|
@ -5464,8 +5472,8 @@ recoveryStopsBefore(XLogRecord *record)
|
||||||
*
|
*
|
||||||
* when testing for an xid, we MUST test for equality only, since
|
* when testing for an xid, we MUST test for equality only, since
|
||||||
* transactions are numbered in the order they start, not the order
|
* transactions are numbered in the order they start, not the order
|
||||||
* they complete. A higher numbered xid will complete before you
|
* they complete. A higher numbered xid will complete before you about
|
||||||
* about 50% of the time...
|
* 50% of the time...
|
||||||
*/
|
*/
|
||||||
stopsHere = (record->xl_xid == recoveryTargetXid);
|
stopsHere = (record->xl_xid == recoveryTargetXid);
|
||||||
}
|
}
|
||||||
|
|
@ -5525,8 +5533,8 @@ recoveryStopsAfter(XLogRecord *record)
|
||||||
record_info = record->xl_info & ~XLR_INFO_MASK;
|
record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There can be many restore points that share the same name; we stop
|
* There can be many restore points that share the same name; we stop at
|
||||||
* at the first one.
|
* the first one.
|
||||||
*/
|
*/
|
||||||
if (recoveryTarget == RECOVERY_TARGET_NAME &&
|
if (recoveryTarget == RECOVERY_TARGET_NAME &&
|
||||||
record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
|
record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
|
||||||
|
|
@ -5688,10 +5696,10 @@ recoveryApplyDelay(XLogRecord *record)
|
||||||
/*
|
/*
|
||||||
* Is it a COMMIT record?
|
* Is it a COMMIT record?
|
||||||
*
|
*
|
||||||
* We deliberately choose not to delay aborts since they have no effect
|
* We deliberately choose not to delay aborts since they have no effect on
|
||||||
* on MVCC. We already allow replay of records that don't have a
|
* MVCC. We already allow replay of records that don't have a timestamp,
|
||||||
* timestamp, so there is already opportunity for issues caused by early
|
* so there is already opportunity for issues caused by early conflicts on
|
||||||
* conflicts on standbys.
|
* standbys.
|
||||||
*/
|
*/
|
||||||
record_info = record->xl_info & ~XLR_INFO_MASK;
|
record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||||
if (!(record->xl_rmid == RM_XACT_ID &&
|
if (!(record->xl_rmid == RM_XACT_ID &&
|
||||||
|
|
@ -5711,7 +5719,7 @@ recoveryApplyDelay(XLogRecord *record)
|
||||||
*/
|
*/
|
||||||
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
|
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
|
||||||
&secs, µsecs);
|
&secs, µsecs);
|
||||||
if (secs <= 0 && microsecs <=0)
|
if (secs <= 0 && microsecs <= 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
|
|
@ -5731,7 +5739,7 @@ recoveryApplyDelay(XLogRecord *record)
|
||||||
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
|
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
|
||||||
&secs, µsecs);
|
&secs, µsecs);
|
||||||
|
|
||||||
if (secs <= 0 && microsecs <=0)
|
if (secs <= 0 && microsecs <= 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
elog(DEBUG2, "recovery apply delay %ld seconds, %d milliseconds",
|
elog(DEBUG2, "recovery apply delay %ld seconds, %d milliseconds",
|
||||||
|
|
@ -6261,9 +6269,9 @@ StartupXLOG(void)
|
||||||
StartupReorderBuffer();
|
StartupReorderBuffer();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Startup MultiXact. We need to do this early for two reasons: one
|
* Startup MultiXact. We need to do this early for two reasons: one is
|
||||||
* is that we might try to access multixacts when we do tuple freezing,
|
* that we might try to access multixacts when we do tuple freezing, and
|
||||||
* and the other is we need its state initialized because we attempt
|
* the other is we need its state initialized because we attempt
|
||||||
* truncation during restartpoints.
|
* truncation during restartpoints.
|
||||||
*/
|
*/
|
||||||
StartupMultiXact();
|
StartupMultiXact();
|
||||||
|
|
@ -6517,9 +6525,9 @@ StartupXLOG(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize shared variables for tracking progress of WAL replay,
|
* Initialize shared variables for tracking progress of WAL replay, as
|
||||||
* as if we had just replayed the record before the REDO location
|
* if we had just replayed the record before the REDO location (or the
|
||||||
* (or the checkpoint record itself, if it's a shutdown checkpoint).
|
* checkpoint record itself, if it's a shutdown checkpoint).
|
||||||
*/
|
*/
|
||||||
SpinLockAcquire(&xlogctl->info_lck);
|
SpinLockAcquire(&xlogctl->info_lck);
|
||||||
if (checkPoint.redo < RecPtr)
|
if (checkPoint.redo < RecPtr)
|
||||||
|
|
@ -6646,17 +6654,17 @@ StartupXLOG(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we've been asked to lag the master, wait on
|
* If we've been asked to lag the master, wait on latch until
|
||||||
* latch until enough time has passed.
|
* enough time has passed.
|
||||||
*/
|
*/
|
||||||
if (recoveryApplyDelay(record))
|
if (recoveryApplyDelay(record))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We test for paused recovery again here. If
|
* We test for paused recovery again here. If user sets
|
||||||
* user sets delayed apply, it may be because
|
* delayed apply, it may be because they expect to pause
|
||||||
* they expect to pause recovery in case of
|
* recovery in case of problems, so we must test again
|
||||||
* problems, so we must test again here otherwise
|
* here otherwise pausing during the delay-wait wouldn't
|
||||||
* pausing during the delay-wait wouldn't work.
|
* work.
|
||||||
*/
|
*/
|
||||||
if (xlogctl->recoveryPause)
|
if (xlogctl->recoveryPause)
|
||||||
recoveryPausesHere();
|
recoveryPausesHere();
|
||||||
|
|
@ -6996,9 +7004,9 @@ StartupXLOG(void)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* There is no partial block to copy. Just set InitializedUpTo,
|
* There is no partial block to copy. Just set InitializedUpTo, and
|
||||||
* and let the first attempt to insert a log record to initialize
|
* let the first attempt to insert a log record to initialize the next
|
||||||
* the next buffer.
|
* buffer.
|
||||||
*/
|
*/
|
||||||
XLogCtl->InitializedUpTo = EndOfLog;
|
XLogCtl->InitializedUpTo = EndOfLog;
|
||||||
}
|
}
|
||||||
|
|
@ -7335,6 +7343,7 @@ RecoveryInProgress(void)
|
||||||
pg_memory_barrier();
|
pg_memory_barrier();
|
||||||
InitXLOGAccess();
|
InitXLOGAccess();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: We don't need a memory barrier when we're still in recovery.
|
* Note: We don't need a memory barrier when we're still in recovery.
|
||||||
* We might exit recovery immediately after return, so the caller
|
* We might exit recovery immediately after return, so the caller
|
||||||
|
|
@ -8131,9 +8140,8 @@ CreateCheckPoint(int flags)
|
||||||
* fuzzy: it is possible that we will wait for xacts we didn't really need
|
* fuzzy: it is possible that we will wait for xacts we didn't really need
|
||||||
* to wait for. But the delay should be short and it seems better to make
|
* to wait for. But the delay should be short and it seems better to make
|
||||||
* checkpoint take a bit longer than to hold off insertions longer than
|
* checkpoint take a bit longer than to hold off insertions longer than
|
||||||
* necessary.
|
* necessary. (In fact, the whole reason we have this issue is that xact.c
|
||||||
* (In fact, the whole reason we have this issue is that xact.c does
|
* does commit record XLOG insertion and clog update as two separate steps
|
||||||
* commit record XLOG insertion and clog update as two separate steps
|
|
||||||
* protected by different locks, but again that seems best on grounds of
|
* protected by different locks, but again that seems best on grounds of
|
||||||
* minimizing lock contention.)
|
* minimizing lock contention.)
|
||||||
*
|
*
|
||||||
|
|
@ -8600,11 +8608,11 @@ CreateRestartPoint(int flags)
|
||||||
_logSegNo--;
|
_logSegNo--;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to recycle segments on a useful timeline. If we've been promoted
|
* Try to recycle segments on a useful timeline. If we've been
|
||||||
* since the beginning of this restartpoint, use the new timeline
|
* promoted since the beginning of this restartpoint, use the new
|
||||||
* chosen at end of recovery (RecoveryInProgress() sets ThisTimeLineID
|
* timeline chosen at end of recovery (RecoveryInProgress() sets
|
||||||
* in that case). If we're still in recovery, use the timeline we're
|
* ThisTimeLineID in that case). If we're still in recovery, use the
|
||||||
* currently replaying.
|
* timeline we're currently replaying.
|
||||||
*
|
*
|
||||||
* There is no guarantee that the WAL segments will be useful on the
|
* There is no guarantee that the WAL segments will be useful on the
|
||||||
* current timeline; if recovery proceeds to a new timeline right
|
* current timeline; if recovery proceeds to a new timeline right
|
||||||
|
|
@ -8859,8 +8867,9 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
|
||||||
* lsn updates. We assume pd_lower/upper cannot be changed without an
|
* lsn updates. We assume pd_lower/upper cannot be changed without an
|
||||||
* exclusive lock, so the contents bkp are not racy.
|
* exclusive lock, so the contents bkp are not racy.
|
||||||
*
|
*
|
||||||
* With buffer_std set to false, XLogCheckBuffer() sets hole_length and
|
* With buffer_std set to false, XLogCheckBuffer() sets hole_length
|
||||||
* hole_offset to 0; so the following code is safe for either case.
|
* and hole_offset to 0; so the following code is safe for either
|
||||||
|
* case.
|
||||||
*/
|
*/
|
||||||
memcpy(copied_buffer, origdata, bkpb.hole_offset);
|
memcpy(copied_buffer, origdata, bkpb.hole_offset);
|
||||||
memcpy(copied_buffer + bkpb.hole_offset,
|
memcpy(copied_buffer + bkpb.hole_offset,
|
||||||
|
|
@ -9262,10 +9271,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||||
BkpBlock bkpb;
|
BkpBlock bkpb;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Full-page image (FPI) records contain a backup block stored "inline"
|
* Full-page image (FPI) records contain a backup block stored
|
||||||
* in the normal data since the locking when writing hint records isn't
|
* "inline" in the normal data since the locking when writing hint
|
||||||
* sufficient to use the normal backup block mechanism, which assumes
|
* records isn't sufficient to use the normal backup block mechanism,
|
||||||
* exclusive lock on the buffer supplied.
|
* which assumes exclusive lock on the buffer supplied.
|
||||||
*
|
*
|
||||||
* Since the only change in these backup block are hint bits, there
|
* Since the only change in these backup block are hint bits, there
|
||||||
* are no recovery conflicts generated.
|
* are no recovery conflicts generated.
|
||||||
|
|
|
||||||
|
|
@ -1256,10 +1256,10 @@ index_constraint_create(Relation heapRelation,
|
||||||
/*
|
/*
|
||||||
* If needed, mark the index as primary and/or deferred in pg_index.
|
* If needed, mark the index as primary and/or deferred in pg_index.
|
||||||
*
|
*
|
||||||
* Note: When making an existing index into a constraint, caller must
|
* Note: When making an existing index into a constraint, caller must have
|
||||||
* have a table lock that prevents concurrent table updates; otherwise,
|
* a table lock that prevents concurrent table updates; otherwise, there
|
||||||
* there is a risk that concurrent readers of the table will miss seeing
|
* is a risk that concurrent readers of the table will miss seeing this
|
||||||
* this index at all.
|
* index at all.
|
||||||
*/
|
*/
|
||||||
if (update_pgindex && (mark_as_primary || deferrable))
|
if (update_pgindex && (mark_as_primary || deferrable))
|
||||||
{
|
{
|
||||||
|
|
@ -1443,10 +1443,10 @@ index_drop(Oid indexId, bool concurrent)
|
||||||
/*
|
/*
|
||||||
* Now we must wait until no running transaction could be using the
|
* Now we must wait until no running transaction could be using the
|
||||||
* index for a query. Use AccessExclusiveLock here to check for
|
* index for a query. Use AccessExclusiveLock here to check for
|
||||||
* running transactions that hold locks of any kind on the table.
|
* running transactions that hold locks of any kind on the table. Note
|
||||||
* Note we do not need to worry about xacts that open the table for
|
* we do not need to worry about xacts that open the table for reading
|
||||||
* reading after this point; they will see the index as invalid when
|
* after this point; they will see the index as invalid when they open
|
||||||
* they open the relation.
|
* the relation.
|
||||||
*
|
*
|
||||||
* Note: the reason we use actual lock acquisition here, rather than
|
* Note: the reason we use actual lock acquisition here, rather than
|
||||||
* just checking the ProcArray and sleeping, is that deadlock is
|
* just checking the ProcArray and sleeping, is that deadlock is
|
||||||
|
|
|
||||||
|
|
@ -344,7 +344,7 @@ smgrDoPendingDeletes(bool isCommit)
|
||||||
if (maxrels == 0)
|
if (maxrels == 0)
|
||||||
{
|
{
|
||||||
maxrels = 8;
|
maxrels = 8;
|
||||||
srels = palloc(sizeof(SMgrRelation) * maxrels );
|
srels = palloc(sizeof(SMgrRelation) * maxrels);
|
||||||
}
|
}
|
||||||
else if (maxrels <= nrels)
|
else if (maxrels <= nrels)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -177,8 +177,8 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If requested check lockmode is sufficient. This is a cross check
|
* If requested check lockmode is sufficient. This is a cross check in
|
||||||
* in case of errors or conflicting decisions in earlier code.
|
* case of errors or conflicting decisions in earlier code.
|
||||||
*/
|
*/
|
||||||
if (check && lockmode != AccessExclusiveLock)
|
if (check && lockmode != AccessExclusiveLock)
|
||||||
elog(ERROR, "AccessExclusiveLock required to add toast table.");
|
elog(ERROR, "AccessExclusiveLock required to add toast table.");
|
||||||
|
|
|
||||||
|
|
@ -696,11 +696,11 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, bool forcetemp,
|
||||||
*
|
*
|
||||||
* If the relation doesn't have a TOAST table already, we can't need one
|
* If the relation doesn't have a TOAST table already, we can't need one
|
||||||
* for the new relation. The other way around is possible though: if some
|
* for the new relation. The other way around is possible though: if some
|
||||||
* wide columns have been dropped, NewHeapCreateToastTable can decide
|
* wide columns have been dropped, NewHeapCreateToastTable can decide that
|
||||||
* that no TOAST table is needed for the new table.
|
* no TOAST table is needed for the new table.
|
||||||
*
|
*
|
||||||
* Note that NewHeapCreateToastTable ends with CommandCounterIncrement,
|
* Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
|
||||||
* so that the TOAST table will be visible for insertion.
|
* that the TOAST table will be visible for insertion.
|
||||||
*/
|
*/
|
||||||
toastid = OldHeap->rd_rel->reltoastrelid;
|
toastid = OldHeap->rd_rel->reltoastrelid;
|
||||||
if (OidIsValid(toastid))
|
if (OidIsValid(toastid))
|
||||||
|
|
@ -1404,7 +1404,8 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
|
||||||
relform1->relkind == RELKIND_TOASTVALUE &&
|
relform1->relkind == RELKIND_TOASTVALUE &&
|
||||||
relform2->relkind == RELKIND_TOASTVALUE)
|
relform2->relkind == RELKIND_TOASTVALUE)
|
||||||
{
|
{
|
||||||
Oid toastIndex1, toastIndex2;
|
Oid toastIndex1,
|
||||||
|
toastIndex2;
|
||||||
|
|
||||||
/* Get valid index for each relation */
|
/* Get valid index for each relation */
|
||||||
toastIndex1 = toast_get_valid_index(r1,
|
toastIndex1 = toast_get_valid_index(r1,
|
||||||
|
|
@ -1511,11 +1512,11 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
|
||||||
* swap_relation_files()), thus relfrozenxid was not updated. That's
|
* swap_relation_files()), thus relfrozenxid was not updated. That's
|
||||||
* annoying because a potential reason for doing a VACUUM FULL is a
|
* annoying because a potential reason for doing a VACUUM FULL is a
|
||||||
* imminent or actual anti-wraparound shutdown. So, now that we can
|
* imminent or actual anti-wraparound shutdown. So, now that we can
|
||||||
* access the new relation using it's indices, update
|
* access the new relation using it's indices, update relfrozenxid.
|
||||||
* relfrozenxid. pg_class doesn't have a toast relation, so we don't need
|
* pg_class doesn't have a toast relation, so we don't need to update the
|
||||||
* to update the corresponding toast relation. Not that there's little
|
* corresponding toast relation. Not that there's little point moving all
|
||||||
* point moving all relfrozenxid updates here since swap_relation_files()
|
* relfrozenxid updates here since swap_relation_files() needs to write to
|
||||||
* needs to write to pg_class for non-mapped relations anyway.
|
* pg_class for non-mapped relations anyway.
|
||||||
*/
|
*/
|
||||||
if (OIDOldHeap == RelationRelationId)
|
if (OIDOldHeap == RelationRelationId)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -2248,8 +2248,8 @@ CopyFrom(CopyState cstate)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Reset the per-tuple exprcontext. We can only do this if the
|
* Reset the per-tuple exprcontext. We can only do this if the
|
||||||
* tuple buffer is empty. (Calling the context the per-tuple memory
|
* tuple buffer is empty. (Calling the context the per-tuple
|
||||||
* context is a bit of a misnomer now.)
|
* memory context is a bit of a misnomer now.)
|
||||||
*/
|
*/
|
||||||
ResetPerTupleExprContext(estate);
|
ResetPerTupleExprContext(estate);
|
||||||
}
|
}
|
||||||
|
|
@ -2569,19 +2569,20 @@ BeginCopyFrom(Relation rel,
|
||||||
num_defaults++;
|
num_defaults++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If a default expression looks at the table being loaded, then
|
* If a default expression looks at the table being loaded,
|
||||||
* it could give the wrong answer when using multi-insert. Since
|
* then it could give the wrong answer when using
|
||||||
* database access can be dynamic this is hard to test for
|
* multi-insert. Since database access can be dynamic this is
|
||||||
* exactly, so we use the much wider test of whether the
|
* hard to test for exactly, so we use the much wider test of
|
||||||
* default expression is volatile. We allow for the special case
|
* whether the default expression is volatile. We allow for
|
||||||
* of when the default expression is the nextval() of a sequence
|
* the special case of when the default expression is the
|
||||||
* which in this specific case is known to be safe for use with
|
* nextval() of a sequence which in this specific case is
|
||||||
* the multi-insert optimisation. Hence we use this special case
|
* known to be safe for use with the multi-insert
|
||||||
* function checker rather than the standard check for
|
* optimisation. Hence we use this special case function
|
||||||
|
* checker rather than the standard check for
|
||||||
* contain_volatile_functions().
|
* contain_volatile_functions().
|
||||||
*/
|
*/
|
||||||
if (!volatile_defexprs)
|
if (!volatile_defexprs)
|
||||||
volatile_defexprs = contain_volatile_functions_not_nextval((Node *)defexpr);
|
volatile_defexprs = contain_volatile_functions_not_nextval((Node *) defexpr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2861,7 +2862,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
|
||||||
|
|
||||||
if (cstate->csv_mode)
|
if (cstate->csv_mode)
|
||||||
{
|
{
|
||||||
if(string == NULL &&
|
if (string == NULL &&
|
||||||
cstate->force_notnull_flags[m])
|
cstate->force_notnull_flags[m])
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
|
@ -2870,14 +2871,14 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
|
||||||
*/
|
*/
|
||||||
string = cstate->null_print;
|
string = cstate->null_print;
|
||||||
}
|
}
|
||||||
else if(string != NULL && cstate->force_null_flags[m]
|
else if (string != NULL && cstate->force_null_flags[m]
|
||||||
&& strcmp(string,cstate->null_print) == 0 )
|
&& strcmp(string, cstate->null_print) == 0)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* FORCE_NULL option is set and column matches the NULL string.
|
* FORCE_NULL option is set and column matches the NULL
|
||||||
* It must have been quoted, or otherwise the string would already
|
* string. It must have been quoted, or otherwise the
|
||||||
* have been set to NULL.
|
* string would already have been set to NULL. Convert it
|
||||||
* Convert it to NULL as specified.
|
* to NULL as specified.
|
||||||
*/
|
*/
|
||||||
string = NULL;
|
string = NULL;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -359,8 +359,8 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If necessary, create a TOAST table for the target table. Note that
|
* If necessary, create a TOAST table for the target table. Note that
|
||||||
* NewRelationCreateToastTable ends with CommandCounterIncrement(), so that
|
* NewRelationCreateToastTable ends with CommandCounterIncrement(), so
|
||||||
* the TOAST table will be visible for insertion.
|
* that the TOAST table will be visible for insertion.
|
||||||
*/
|
*/
|
||||||
CommandCounterIncrement();
|
CommandCounterIncrement();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -751,7 +751,8 @@ dropdb(const char *dbname, bool missing_ok)
|
||||||
HeapTuple tup;
|
HeapTuple tup;
|
||||||
int notherbackends;
|
int notherbackends;
|
||||||
int npreparedxacts;
|
int npreparedxacts;
|
||||||
int nslots, nslots_active;
|
int nslots,
|
||||||
|
nslots_active;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Look up the target database's OID, and get exclusive lock on it. We
|
* Look up the target database's OID, and get exclusive lock on it. We
|
||||||
|
|
|
||||||
|
|
@ -321,7 +321,8 @@ ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es,
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PlannedStmt *plan;
|
PlannedStmt *plan;
|
||||||
instr_time planstart, planduration;
|
instr_time planstart,
|
||||||
|
planduration;
|
||||||
|
|
||||||
INSTR_TIME_SET_CURRENT(planstart);
|
INSTR_TIME_SET_CURRENT(planstart);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -349,11 +349,11 @@ DefineIndex(Oid relationId,
|
||||||
* index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
|
* index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
|
||||||
* (but not VACUUM).
|
* (but not VACUUM).
|
||||||
*
|
*
|
||||||
* NB: Caller is responsible for making sure that relationId refers
|
* NB: Caller is responsible for making sure that relationId refers to the
|
||||||
* to the relation on which the index should be built; except in bootstrap
|
* relation on which the index should be built; except in bootstrap mode,
|
||||||
* mode, this will typically require the caller to have already locked
|
* this will typically require the caller to have already locked the
|
||||||
* the relation. To avoid lock upgrade hazards, that lock should be at
|
* relation. To avoid lock upgrade hazards, that lock should be at least
|
||||||
* least as strong as the one we take here.
|
* as strong as the one we take here.
|
||||||
*/
|
*/
|
||||||
lockmode = stmt->concurrent ? ShareUpdateExclusiveLock : ShareLock;
|
lockmode = stmt->concurrent ? ShareUpdateExclusiveLock : ShareLock;
|
||||||
rel = heap_open(relationId, lockmode);
|
rel = heap_open(relationId, lockmode);
|
||||||
|
|
|
||||||
|
|
@ -240,9 +240,9 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString,
|
||||||
owner = matviewRel->rd_rel->relowner;
|
owner = matviewRel->rd_rel->relowner;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create the transient table that will receive the regenerated data.
|
* Create the transient table that will receive the regenerated data. Lock
|
||||||
* Lock it against access by any other process until commit (by which time
|
* it against access by any other process until commit (by which time it
|
||||||
* it will be gone).
|
* will be gone).
|
||||||
*/
|
*/
|
||||||
OIDNewHeap = make_new_heap(matviewOid, tableSpace, concurrent,
|
OIDNewHeap = make_new_heap(matviewOid, tableSpace, concurrent,
|
||||||
ExclusiveLock);
|
ExclusiveLock);
|
||||||
|
|
|
||||||
|
|
@ -325,8 +325,8 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
|
||||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since VACUUM does not process sequences, we have to force the tuple
|
* Since VACUUM does not process sequences, we have to force the tuple to
|
||||||
* to have xmin = FrozenTransactionId now. Otherwise it would become
|
* have xmin = FrozenTransactionId now. Otherwise it would become
|
||||||
* invisible to SELECTs after 2G transactions. It is okay to do this
|
* invisible to SELECTs after 2G transactions. It is okay to do this
|
||||||
* because if the current transaction aborts, no other xact will ever
|
* because if the current transaction aborts, no other xact will ever
|
||||||
* examine the sequence tuple anyway.
|
* examine the sequence tuple anyway.
|
||||||
|
|
@ -1554,13 +1554,13 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||||
page = (Page) BufferGetPage(buffer);
|
page = (Page) BufferGetPage(buffer);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We always reinit the page. However, since this WAL record type is
|
* We always reinit the page. However, since this WAL record type is also
|
||||||
* also used for updating sequences, it's possible that a hot-standby
|
* used for updating sequences, it's possible that a hot-standby backend
|
||||||
* backend is examining the page concurrently; so we mustn't transiently
|
* is examining the page concurrently; so we mustn't transiently trash the
|
||||||
* trash the buffer. The solution is to build the correct new page
|
* buffer. The solution is to build the correct new page contents in
|
||||||
* contents in local workspace and then memcpy into the buffer. Then only
|
* local workspace and then memcpy into the buffer. Then only bytes that
|
||||||
* bytes that are supposed to change will change, even transiently. We
|
* are supposed to change will change, even transiently. We must palloc
|
||||||
* must palloc the local page for alignment reasons.
|
* the local page for alignment reasons.
|
||||||
*/
|
*/
|
||||||
localpage = (Page) palloc(BufferGetPageSize(buffer));
|
localpage = (Page) palloc(BufferGetPageSize(buffer));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2787,12 +2787,13 @@ AlterTableGetLockLevel(List *cmds)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These subcommands may require addition of toast tables. If we
|
* These subcommands may require addition of toast tables. If
|
||||||
* add a toast table to a table currently being scanned, we
|
* we add a toast table to a table currently being scanned, we
|
||||||
* might miss data added to the new toast table by concurrent
|
* might miss data added to the new toast table by concurrent
|
||||||
* insert transactions.
|
* insert transactions.
|
||||||
*/
|
*/
|
||||||
case AT_SetStorage: /* may add toast tables, see ATRewriteCatalogs() */
|
case AT_SetStorage:/* may add toast tables, see
|
||||||
|
* ATRewriteCatalogs() */
|
||||||
cmd_lockmode = AccessExclusiveLock;
|
cmd_lockmode = AccessExclusiveLock;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
@ -2834,8 +2835,8 @@ AlterTableGetLockLevel(List *cmds)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These subcommands affect write operations only.
|
* These subcommands affect write operations only. XXX
|
||||||
* XXX Theoretically, these could be ShareRowExclusiveLock.
|
* Theoretically, these could be ShareRowExclusiveLock.
|
||||||
*/
|
*/
|
||||||
case AT_ColumnDefault:
|
case AT_ColumnDefault:
|
||||||
case AT_ProcessedConstraint: /* becomes AT_AddConstraint */
|
case AT_ProcessedConstraint: /* becomes AT_AddConstraint */
|
||||||
|
|
@ -2872,9 +2873,9 @@ AlterTableGetLockLevel(List *cmds)
|
||||||
* Cases essentially the same as CREATE INDEX. We
|
* Cases essentially the same as CREATE INDEX. We
|
||||||
* could reduce the lock strength to ShareLock if
|
* could reduce the lock strength to ShareLock if
|
||||||
* we can work out how to allow concurrent catalog
|
* we can work out how to allow concurrent catalog
|
||||||
* updates.
|
* updates. XXX Might be set down to
|
||||||
* XXX Might be set down to ShareRowExclusiveLock
|
* ShareRowExclusiveLock but requires further
|
||||||
* but requires further analysis.
|
* analysis.
|
||||||
*/
|
*/
|
||||||
cmd_lockmode = AccessExclusiveLock;
|
cmd_lockmode = AccessExclusiveLock;
|
||||||
break;
|
break;
|
||||||
|
|
@ -2883,10 +2884,9 @@ AlterTableGetLockLevel(List *cmds)
|
||||||
/*
|
/*
|
||||||
* We add triggers to both tables when we add a
|
* We add triggers to both tables when we add a
|
||||||
* Foreign Key, so the lock level must be at least
|
* Foreign Key, so the lock level must be at least
|
||||||
* as strong as CREATE TRIGGER.
|
* as strong as CREATE TRIGGER. XXX Might be set
|
||||||
* XXX Might be set down to ShareRowExclusiveLock
|
* down to ShareRowExclusiveLock though trigger
|
||||||
* though trigger info is accessed by
|
* info is accessed by pg_get_triggerdef
|
||||||
* pg_get_triggerdef
|
|
||||||
*/
|
*/
|
||||||
cmd_lockmode = AccessExclusiveLock;
|
cmd_lockmode = AccessExclusiveLock;
|
||||||
break;
|
break;
|
||||||
|
|
@ -2902,8 +2902,8 @@ AlterTableGetLockLevel(List *cmds)
|
||||||
* started before us will continue to see the old inheritance
|
* started before us will continue to see the old inheritance
|
||||||
* behaviour, while queries started after we commit will see
|
* behaviour, while queries started after we commit will see
|
||||||
* new behaviour. No need to prevent reads or writes to the
|
* new behaviour. No need to prevent reads or writes to the
|
||||||
* subtable while we hook it up though.
|
* subtable while we hook it up though. Changing the TupDesc
|
||||||
* Changing the TupDesc may be a problem, so keep highest lock.
|
* may be a problem, so keep highest lock.
|
||||||
*/
|
*/
|
||||||
case AT_AddInherit:
|
case AT_AddInherit:
|
||||||
case AT_DropInherit:
|
case AT_DropInherit:
|
||||||
|
|
@ -2912,9 +2912,9 @@ AlterTableGetLockLevel(List *cmds)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These subcommands affect implicit row type conversion. They
|
* These subcommands affect implicit row type conversion. They
|
||||||
* have affects similar to CREATE/DROP CAST on queries.
|
* have affects similar to CREATE/DROP CAST on queries. don't
|
||||||
* don't provide for invalidating parse trees as a result of
|
* provide for invalidating parse trees as a result of such
|
||||||
* such changes, so we keep these at AccessExclusiveLock.
|
* changes, so we keep these at AccessExclusiveLock.
|
||||||
*/
|
*/
|
||||||
case AT_AddOf:
|
case AT_AddOf:
|
||||||
case AT_DropOf:
|
case AT_DropOf:
|
||||||
|
|
@ -2947,22 +2947,25 @@ AlterTableGetLockLevel(List *cmds)
|
||||||
cmd_lockmode = ShareUpdateExclusiveLock;
|
cmd_lockmode = ShareUpdateExclusiveLock;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AT_ValidateConstraint: /* Uses MVCC in getConstraints() */
|
case AT_ValidateConstraint: /* Uses MVCC in
|
||||||
|
* getConstraints() */
|
||||||
cmd_lockmode = ShareUpdateExclusiveLock;
|
cmd_lockmode = ShareUpdateExclusiveLock;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Rel options are more complex than first appears. Options
|
* Rel options are more complex than first appears. Options
|
||||||
* are set here for tables, views and indexes; for historical
|
* are set here for tables, views and indexes; for historical
|
||||||
* reasons these can all be used with ALTER TABLE, so we
|
* reasons these can all be used with ALTER TABLE, so we can't
|
||||||
* can't decide between them using the basic grammar.
|
* decide between them using the basic grammar.
|
||||||
*
|
*
|
||||||
* XXX Look in detail at each option to determine lock level,
|
* XXX Look in detail at each option to determine lock level,
|
||||||
* e.g.
|
* e.g. cmd_lockmode = GetRelOptionsLockLevel((List *)
|
||||||
* cmd_lockmode = GetRelOptionsLockLevel((List *) cmd->def);
|
* cmd->def);
|
||||||
*/
|
*/
|
||||||
case AT_SetRelOptions: /* Uses MVCC in getIndexes() and getTables() */
|
case AT_SetRelOptions: /* Uses MVCC in getIndexes() and
|
||||||
case AT_ResetRelOptions: /* Uses MVCC in getIndexes() and getTables() */
|
* getTables() */
|
||||||
|
case AT_ResetRelOptions: /* Uses MVCC in getIndexes() and
|
||||||
|
* getTables() */
|
||||||
cmd_lockmode = AccessExclusiveLock;
|
cmd_lockmode = AccessExclusiveLock;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
@ -3946,8 +3949,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
|
||||||
HeapTupleSetOid(tuple, tupOid);
|
HeapTupleSetOid(tuple, tupOid);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Constraints might reference the tableoid column, so initialize
|
* Constraints might reference the tableoid column, so
|
||||||
* t_tableOid before evaluating them.
|
* initialize t_tableOid before evaluating them.
|
||||||
*/
|
*/
|
||||||
tuple->t_tableOid = RelationGetRelid(oldrel);
|
tuple->t_tableOid = RelationGetRelid(oldrel);
|
||||||
}
|
}
|
||||||
|
|
@ -6374,8 +6377,8 @@ ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
|
||||||
heap_freetuple(copyTuple);
|
heap_freetuple(copyTuple);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we need to update the multiple entries in pg_trigger
|
* Now we need to update the multiple entries in pg_trigger that
|
||||||
* that implement the constraint.
|
* implement the constraint.
|
||||||
*/
|
*/
|
||||||
tgrel = heap_open(TriggerRelationId, RowExclusiveLock);
|
tgrel = heap_open(TriggerRelationId, RowExclusiveLock);
|
||||||
|
|
||||||
|
|
@ -8150,11 +8153,11 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
|
||||||
* that before dropping. It's safe because the parser won't actually look
|
* that before dropping. It's safe because the parser won't actually look
|
||||||
* at the catalogs to detect the existing entry.
|
* at the catalogs to detect the existing entry.
|
||||||
*
|
*
|
||||||
* We can't rely on the output of deparsing to tell us which relation
|
* We can't rely on the output of deparsing to tell us which relation to
|
||||||
* to operate on, because concurrent activity might have made the name
|
* operate on, because concurrent activity might have made the name
|
||||||
* resolve differently. Instead, we've got to use the OID of the
|
* resolve differently. Instead, we've got to use the OID of the
|
||||||
* constraint or index we're processing to figure out which relation
|
* constraint or index we're processing to figure out which relation to
|
||||||
* to operate on.
|
* operate on.
|
||||||
*/
|
*/
|
||||||
forboth(oid_item, tab->changedConstraintOids,
|
forboth(oid_item, tab->changedConstraintOids,
|
||||||
def_item, tab->changedConstraintDefs)
|
def_item, tab->changedConstraintDefs)
|
||||||
|
|
@ -9099,6 +9102,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
|
||||||
if (OidIsValid(reltoastrelid))
|
if (OidIsValid(reltoastrelid))
|
||||||
{
|
{
|
||||||
Relation toastRel = relation_open(reltoastrelid, lockmode);
|
Relation toastRel = relation_open(reltoastrelid, lockmode);
|
||||||
|
|
||||||
reltoastidxids = RelationGetIndexList(toastRel);
|
reltoastidxids = RelationGetIndexList(toastRel);
|
||||||
relation_close(toastRel, lockmode);
|
relation_close(toastRel, lockmode);
|
||||||
}
|
}
|
||||||
|
|
@ -9120,8 +9124,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
|
||||||
FlushRelationBuffers(rel);
|
FlushRelationBuffers(rel);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Relfilenodes are not unique in databases across tablespaces, so we
|
* Relfilenodes are not unique in databases across tablespaces, so we need
|
||||||
* need to allocate a new one in the new tablespace.
|
* to allocate a new one in the new tablespace.
|
||||||
*/
|
*/
|
||||||
newrelfilenode = GetNewRelFileNode(newTableSpace, NULL,
|
newrelfilenode = GetNewRelFileNode(newTableSpace, NULL,
|
||||||
rel->rd_rel->relpersistence);
|
rel->rd_rel->relpersistence);
|
||||||
|
|
@ -9236,9 +9240,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
||||||
forkNum))));
|
forkNum))));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* WAL-log the copied page. Unfortunately we don't know what kind of
|
* WAL-log the copied page. Unfortunately we don't know what kind of a
|
||||||
* a page this is, so we have to log the full page including any
|
* page this is, so we have to log the full page including any unused
|
||||||
* unused space.
|
* space.
|
||||||
*/
|
*/
|
||||||
if (use_wal)
|
if (use_wal)
|
||||||
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
|
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
|
||||||
|
|
@ -10191,8 +10195,8 @@ relation_mark_replica_identity(Relation rel, char ri_type, Oid indexOid,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clear the indisreplident flag from any index that had it previously, and
|
* Clear the indisreplident flag from any index that had it previously,
|
||||||
* set it for any index that should have it now.
|
* and set it for any index that should have it now.
|
||||||
*/
|
*/
|
||||||
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
|
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
|
||||||
foreach(index, RelationGetIndexList(rel))
|
foreach(index, RelationGetIndexList(rel))
|
||||||
|
|
@ -10261,7 +10265,7 @@ ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode
|
||||||
}
|
}
|
||||||
else if (stmt->identity_type == REPLICA_IDENTITY_INDEX)
|
else if (stmt->identity_type == REPLICA_IDENTITY_INDEX)
|
||||||
{
|
{
|
||||||
/* fallthrough */;
|
/* fallthrough */ ;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
elog(ERROR, "unexpected identity type %u", stmt->identity_type);
|
elog(ERROR, "unexpected identity type %u", stmt->identity_type);
|
||||||
|
|
|
||||||
|
|
@ -1119,8 +1119,8 @@ AlterTableSpaceMove(AlterTableSpaceMoveStmt *stmt)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle permissions-checking here since we are locking the tables
|
* Handle permissions-checking here since we are locking the tables
|
||||||
* and also to avoid doing a bunch of work only to fail part-way.
|
* and also to avoid doing a bunch of work only to fail part-way. Note
|
||||||
* Note that permissions will also be checked by AlterTableInternal().
|
* that permissions will also be checked by AlterTableInternal().
|
||||||
*
|
*
|
||||||
* Caller must be considered an owner on the table to move it.
|
* Caller must be considered an owner on the table to move it.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -3566,6 +3566,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
|
||||||
}
|
}
|
||||||
/* fall through */
|
/* fall through */
|
||||||
case AFTER_TRIGGER_FDW_REUSE:
|
case AFTER_TRIGGER_FDW_REUSE:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Using ExecMaterializeSlot() rather than ExecFetchSlotTuple()
|
* Using ExecMaterializeSlot() rather than ExecFetchSlotTuple()
|
||||||
* ensures that tg_trigtuple does not reference tuplestore memory.
|
* ensures that tg_trigtuple does not reference tuplestore memory.
|
||||||
|
|
|
||||||
|
|
@ -706,10 +706,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||||
* It's possible that another backend has extended the heap,
|
* It's possible that another backend has extended the heap,
|
||||||
* initialized the page, and then failed to WAL-log the page
|
* initialized the page, and then failed to WAL-log the page
|
||||||
* due to an ERROR. Since heap extension is not WAL-logged,
|
* due to an ERROR. Since heap extension is not WAL-logged,
|
||||||
* recovery might try to replay our record setting the
|
* recovery might try to replay our record setting the page
|
||||||
* page all-visible and find that the page isn't initialized,
|
* all-visible and find that the page isn't initialized, which
|
||||||
* which will cause a PANIC. To prevent that, check whether
|
* will cause a PANIC. To prevent that, check whether the
|
||||||
* the page has been previously WAL-logged, and if not, do that
|
* page has been previously WAL-logged, and if not, do that
|
||||||
* now.
|
* now.
|
||||||
*/
|
*/
|
||||||
if (RelationNeedsWAL(onerel) &&
|
if (RelationNeedsWAL(onerel) &&
|
||||||
|
|
@ -834,8 +834,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||||
* NB: Like with per-tuple hint bits, we can't set the
|
* NB: Like with per-tuple hint bits, we can't set the
|
||||||
* PD_ALL_VISIBLE flag if the inserter committed
|
* PD_ALL_VISIBLE flag if the inserter committed
|
||||||
* asynchronously. See SetHintBits for more info. Check
|
* asynchronously. See SetHintBits for more info. Check
|
||||||
* that the tuple is hinted xmin-committed because
|
* that the tuple is hinted xmin-committed because of
|
||||||
* of that.
|
* that.
|
||||||
*/
|
*/
|
||||||
if (all_visible)
|
if (all_visible)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -460,8 +460,8 @@ DefineView(ViewStmt *stmt, const char *queryString)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the check option is specified, look to see if the view is
|
* If the check option is specified, look to see if the view is actually
|
||||||
* actually auto-updatable or not.
|
* auto-updatable or not.
|
||||||
*/
|
*/
|
||||||
if (check_option)
|
if (check_option)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1639,7 +1639,8 @@ ExecWithCheckOptions(ResultRelInfo *resultRelInfo,
|
||||||
TupleTableSlot *slot, EState *estate)
|
TupleTableSlot *slot, EState *estate)
|
||||||
{
|
{
|
||||||
ExprContext *econtext;
|
ExprContext *econtext;
|
||||||
ListCell *l1, *l2;
|
ListCell *l1,
|
||||||
|
*l2;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We will use the EState's per-tuple context for evaluating constraint
|
* We will use the EState's per-tuple context for evaluating constraint
|
||||||
|
|
|
||||||
|
|
@ -449,8 +449,8 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
|
||||||
* Create the combined TupleDesc
|
* Create the combined TupleDesc
|
||||||
*
|
*
|
||||||
* If there is just one function without ordinality, the scan result
|
* If there is just one function without ordinality, the scan result
|
||||||
* tupdesc is the same as the function result tupdesc --- except that
|
* tupdesc is the same as the function result tupdesc --- except that we
|
||||||
* we may stuff new names into it below, so drop any rowtype label.
|
* may stuff new names into it below, so drop any rowtype label.
|
||||||
*/
|
*/
|
||||||
if (scanstate->simple)
|
if (scanstate->simple)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -973,6 +973,7 @@ ExecModifyTable(ModifyTableState *node)
|
||||||
* ctid!! */
|
* ctid!! */
|
||||||
tupleid = &tuple_ctid;
|
tupleid = &tuple_ctid;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use the wholerow attribute, when available, to reconstruct
|
* Use the wholerow attribute, when available, to reconstruct
|
||||||
* the old relation tuple.
|
* the old relation tuple.
|
||||||
|
|
@ -1175,6 +1176,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
|
||||||
WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
|
WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
|
||||||
ExprState *wcoExpr = ExecInitExpr((Expr *) wco->qual,
|
ExprState *wcoExpr = ExecInitExpr((Expr *) wco->qual,
|
||||||
mtstate->mt_plans[i]);
|
mtstate->mt_plans[i]);
|
||||||
|
|
||||||
wcoExprs = lappend(wcoExprs, wcoExpr);
|
wcoExprs = lappend(wcoExprs, wcoExpr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -692,6 +692,7 @@ StreamConnection(pgsocket server_fd, Port *port)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a Win32 socket optimization. The ideal size is 32k.
|
* This is a Win32 socket optimization. The ideal size is 32k.
|
||||||
* http://support.microsoft.com/kb/823764/EN-US/
|
* http://support.microsoft.com/kb/823764/EN-US/
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,7 @@ main(int argc, char *argv[])
|
||||||
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres"));
|
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres"));
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Windows uses codepages rather than the environment, so we work around
|
* Windows uses codepages rather than the environment, so we work around
|
||||||
* that by querying the environment explicitly first for LC_COLLATE and
|
* that by querying the environment explicitly first for LC_COLLATE and
|
||||||
|
|
@ -202,6 +203,7 @@ main(int argc, char *argv[])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Start our win32 signal implementation
|
* Start our win32 signal implementation
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -3300,7 +3300,7 @@ _copyReplicaIdentityStmt(const ReplicaIdentityStmt *from)
|
||||||
}
|
}
|
||||||
|
|
||||||
static AlterSystemStmt *
|
static AlterSystemStmt *
|
||||||
_copyAlterSystemStmt(const AlterSystemStmt * from)
|
_copyAlterSystemStmt(const AlterSystemStmt *from)
|
||||||
{
|
{
|
||||||
AlterSystemStmt *newnode = makeNode(AlterSystemStmt);
|
AlterSystemStmt *newnode = makeNode(AlterSystemStmt);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1551,7 +1551,7 @@ _equalReplicaIdentityStmt(const ReplicaIdentityStmt *a, const ReplicaIdentityStm
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
_equalAlterSystemStmt(const AlterSystemStmt * a, const AlterSystemStmt * b)
|
_equalAlterSystemStmt(const AlterSystemStmt *a, const AlterSystemStmt *b)
|
||||||
{
|
{
|
||||||
COMPARE_NODE_FIELD(setstmt);
|
COMPARE_NODE_FIELD(setstmt);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1938,8 +1938,8 @@ add_child_rel_equivalences(PlannerInfo *root,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No point in searching if parent rel not mentioned in eclass; but
|
* No point in searching if parent rel not mentioned in eclass; but we
|
||||||
* we can't tell that for sure if parent rel is itself a child.
|
* can't tell that for sure if parent rel is itself a child.
|
||||||
*/
|
*/
|
||||||
if (parent_rel->reloptkind == RELOPT_BASEREL &&
|
if (parent_rel->reloptkind == RELOPT_BASEREL &&
|
||||||
!bms_is_subset(parent_rel->relids, cur_ec->ec_relids))
|
!bms_is_subset(parent_rel->relids, cur_ec->ec_relids))
|
||||||
|
|
|
||||||
|
|
@ -916,8 +916,8 @@ inheritance_planner(PlannerInfo *root)
|
||||||
subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
|
subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Planning may have modified the query result relation (if there
|
* Planning may have modified the query result relation (if there were
|
||||||
* were security barrier quals on the result RTE).
|
* security barrier quals on the result RTE).
|
||||||
*/
|
*/
|
||||||
appinfo->child_relid = subroot.parse->resultRelation;
|
appinfo->child_relid = subroot.parse->resultRelation;
|
||||||
|
|
||||||
|
|
@ -940,7 +940,8 @@ inheritance_planner(PlannerInfo *root)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
List *tmp_rtable = NIL;
|
List *tmp_rtable = NIL;
|
||||||
ListCell *cell1, *cell2;
|
ListCell *cell1,
|
||||||
|
*cell2;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check to see if any of the original RTEs were turned into
|
* Check to see if any of the original RTEs were turned into
|
||||||
|
|
|
||||||
|
|
@ -97,6 +97,7 @@ expand_security_quals(PlannerInfo *root, List *tlist)
|
||||||
if (rt_index == parse->resultRelation)
|
if (rt_index == parse->resultRelation)
|
||||||
{
|
{
|
||||||
RangeTblEntry *newrte = copyObject(rte);
|
RangeTblEntry *newrte = copyObject(rte);
|
||||||
|
|
||||||
parse->rtable = lappend(parse->rtable, newrte);
|
parse->rtable = lappend(parse->rtable, newrte);
|
||||||
parse->resultRelation = list_length(parse->rtable);
|
parse->resultRelation = list_length(parse->rtable);
|
||||||
|
|
||||||
|
|
@ -117,11 +118,11 @@ expand_security_quals(PlannerInfo *root, List *tlist)
|
||||||
rte->modifiedCols = NULL;
|
rte->modifiedCols = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For the most part, Vars referencing the original relation should
|
* For the most part, Vars referencing the original relation
|
||||||
* remain as they are, meaning that they pull OLD values from the
|
* should remain as they are, meaning that they pull OLD values
|
||||||
* expanded RTE. But in the RETURNING list and in any WITH CHECK
|
* from the expanded RTE. But in the RETURNING list and in any
|
||||||
* OPTION quals, we want such Vars to represent NEW values, so
|
* WITH CHECK OPTION quals, we want such Vars to represent NEW
|
||||||
* change them to reference the new RTE.
|
* values, so change them to reference the new RTE.
|
||||||
*/
|
*/
|
||||||
ChangeVarNodes((Node *) parse->returningList, rt_index,
|
ChangeVarNodes((Node *) parse->returningList, rt_index,
|
||||||
parse->resultRelation, 0);
|
parse->resultRelation, 0);
|
||||||
|
|
@ -142,6 +143,7 @@ expand_security_quals(PlannerInfo *root, List *tlist)
|
||||||
while (rte->securityQuals != NIL)
|
while (rte->securityQuals != NIL)
|
||||||
{
|
{
|
||||||
Node *qual = (Node *) linitial(rte->securityQuals);
|
Node *qual = (Node *) linitial(rte->securityQuals);
|
||||||
|
|
||||||
rte->securityQuals = list_delete_first(rte->securityQuals);
|
rte->securityQuals = list_delete_first(rte->securityQuals);
|
||||||
|
|
||||||
ChangeVarNodes(qual, rt_index, 1, 0);
|
ChangeVarNodes(qual, rt_index, 1, 0);
|
||||||
|
|
@ -182,6 +184,7 @@ expand_security_qual(PlannerInfo *root, List *tlist, int rt_index,
|
||||||
switch (rte->rtekind)
|
switch (rte->rtekind)
|
||||||
{
|
{
|
||||||
case RTE_RELATION:
|
case RTE_RELATION:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Turn the relation RTE into a security barrier subquery RTE,
|
* Turn the relation RTE into a security barrier subquery RTE,
|
||||||
* moving all permissions checks down into the subquery.
|
* moving all permissions checks down into the subquery.
|
||||||
|
|
@ -219,9 +222,9 @@ expand_security_qual(PlannerInfo *root, List *tlist, int rt_index,
|
||||||
* Note that we can't push the user-defined quals down since they
|
* Note that we can't push the user-defined quals down since they
|
||||||
* may included untrusted functions and that means that we will
|
* may included untrusted functions and that means that we will
|
||||||
* end up locking all rows which pass the securityQuals, even if
|
* end up locking all rows which pass the securityQuals, even if
|
||||||
* those rows don't pass the user-defined quals. This is currently
|
* those rows don't pass the user-defined quals. This is
|
||||||
* documented behavior, but it'd be nice to come up with a better
|
* currently documented behavior, but it'd be nice to come up with
|
||||||
* solution some day.
|
* a better solution some day.
|
||||||
*/
|
*/
|
||||||
rc = get_plan_rowmark(root->rowMarks, rt_index);
|
rc = get_plan_rowmark(root->rowMarks, rt_index);
|
||||||
if (rc != NULL)
|
if (rc != NULL)
|
||||||
|
|
@ -277,6 +280,7 @@ expand_security_qual(PlannerInfo *root, List *tlist, int rt_index,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RTE_SUBQUERY:
|
case RTE_SUBQUERY:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build a new subquery that includes all the same columns as the
|
* Build a new subquery that includes all the same columns as the
|
||||||
* original subquery.
|
* original subquery.
|
||||||
|
|
|
||||||
|
|
@ -1708,6 +1708,7 @@ adjust_appendrel_attrs_mutator(Node *node,
|
||||||
foreach(lc, fields)
|
foreach(lc, fields)
|
||||||
{
|
{
|
||||||
Var *field = (Var *) lfirst(lc);
|
Var *field = (Var *) lfirst(lc);
|
||||||
|
|
||||||
field->varlevelsup += context->sublevels_up;
|
field->varlevelsup += context->sublevels_up;
|
||||||
}
|
}
|
||||||
rowexpr = makeNode(RowExpr);
|
rowexpr = makeNode(RowExpr);
|
||||||
|
|
|
||||||
|
|
@ -2131,7 +2131,8 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* An empty result list is impossible here because of grammar restrictions.
|
* An empty result list is impossible here because of grammar
|
||||||
|
* restrictions.
|
||||||
*/
|
*/
|
||||||
Assert(result != NIL);
|
Assert(result != NIL);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -143,8 +143,8 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
|
||||||
* locale-aware translation. However, there are some locales where this
|
* locale-aware translation. However, there are some locales where this
|
||||||
* is not right either (eg, Turkish may do strange things with 'i' and
|
* is not right either (eg, Turkish may do strange things with 'i' and
|
||||||
* 'I'). Our current compromise is to use tolower() for characters with
|
* 'I'). Our current compromise is to use tolower() for characters with
|
||||||
* the high bit set, as long as they aren't part of a multi-byte character,
|
* the high bit set, as long as they aren't part of a multi-byte
|
||||||
* and use an ASCII-only downcasing for 7-bit characters.
|
* character, and use an ASCII-only downcasing for 7-bit characters.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < len; i++)
|
for (i = 0; i < len; i++)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -374,8 +374,8 @@ CreateAnonymousSegment(Size *size)
|
||||||
(huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED))
|
(huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* use the original size, not the rounded up value, when falling
|
* use the original size, not the rounded up value, when falling back
|
||||||
* back to non-huge pages.
|
* to non-huge pages.
|
||||||
*/
|
*/
|
||||||
allocsize = *size;
|
allocsize = *size;
|
||||||
ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
|
ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
|
||||||
|
|
@ -512,9 +512,9 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
|
||||||
/*
|
/*
|
||||||
* The segment appears to be from a dead Postgres process, or from a
|
* The segment appears to be from a dead Postgres process, or from a
|
||||||
* previous cycle of life in this same process. Zap it, if possible,
|
* previous cycle of life in this same process. Zap it, if possible,
|
||||||
* and any associated dynamic shared memory segments, as well.
|
* and any associated dynamic shared memory segments, as well. This
|
||||||
* This probably shouldn't fail, but if it does, assume the segment
|
* probably shouldn't fail, but if it does, assume the segment belongs
|
||||||
* belongs to someone else after all, and continue quietly.
|
* to someone else after all, and continue quietly.
|
||||||
*/
|
*/
|
||||||
if (hdr->dsm_control != 0)
|
if (hdr->dsm_control != 0)
|
||||||
dsm_cleanup_using_control_segment(hdr->dsm_control);
|
dsm_cleanup_using_control_segment(hdr->dsm_control);
|
||||||
|
|
|
||||||
|
|
@ -127,10 +127,10 @@ BackgroundWorkerShmemInit(void)
|
||||||
BackgroundWorkerData->total_slots = max_worker_processes;
|
BackgroundWorkerData->total_slots = max_worker_processes;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy contents of worker list into shared memory. Record the
|
* Copy contents of worker list into shared memory. Record the shared
|
||||||
* shared memory slot assigned to each worker. This ensures
|
* memory slot assigned to each worker. This ensures a 1-to-1
|
||||||
* a 1-to-1 correspondence betwen the postmaster's private list and
|
* correspondence betwen the postmaster's private list and the array
|
||||||
* the array in shared memory.
|
* in shared memory.
|
||||||
*/
|
*/
|
||||||
slist_foreach(siter, &BackgroundWorkerList)
|
slist_foreach(siter, &BackgroundWorkerList)
|
||||||
{
|
{
|
||||||
|
|
@ -200,8 +200,8 @@ BackgroundWorkerStateChange(void)
|
||||||
* The total number of slots stored in shared memory should match our
|
* The total number of slots stored in shared memory should match our
|
||||||
* notion of max_worker_processes. If it does not, something is very
|
* notion of max_worker_processes. If it does not, something is very
|
||||||
* wrong. Further down, we always refer to this value as
|
* wrong. Further down, we always refer to this value as
|
||||||
* max_worker_processes, in case shared memory gets corrupted while
|
* max_worker_processes, in case shared memory gets corrupted while we're
|
||||||
* we're looping.
|
* looping.
|
||||||
*/
|
*/
|
||||||
if (max_worker_processes != BackgroundWorkerData->total_slots)
|
if (max_worker_processes != BackgroundWorkerData->total_slots)
|
||||||
{
|
{
|
||||||
|
|
@ -213,8 +213,8 @@ BackgroundWorkerStateChange(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Iterate through slots, looking for newly-registered workers or
|
* Iterate through slots, looking for newly-registered workers or workers
|
||||||
* workers who must die.
|
* who must die.
|
||||||
*/
|
*/
|
||||||
for (slotno = 0; slotno < max_worker_processes; ++slotno)
|
for (slotno = 0; slotno < max_worker_processes; ++slotno)
|
||||||
{
|
{
|
||||||
|
|
@ -267,8 +267,8 @@ BackgroundWorkerStateChange(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy strings in a paranoid way. If shared memory is corrupted,
|
* Copy strings in a paranoid way. If shared memory is corrupted, the
|
||||||
* the source data might not even be NUL-terminated.
|
* source data might not even be NUL-terminated.
|
||||||
*/
|
*/
|
||||||
ascii_safe_strlcpy(rw->rw_worker.bgw_name,
|
ascii_safe_strlcpy(rw->rw_worker.bgw_name,
|
||||||
slot->worker.bgw_name, BGW_MAXLEN);
|
slot->worker.bgw_name, BGW_MAXLEN);
|
||||||
|
|
@ -280,10 +280,10 @@ BackgroundWorkerStateChange(void)
|
||||||
/*
|
/*
|
||||||
* Copy various fixed-size fields.
|
* Copy various fixed-size fields.
|
||||||
*
|
*
|
||||||
* flags, start_time, and restart_time are examined by the
|
* flags, start_time, and restart_time are examined by the postmaster,
|
||||||
* postmaster, but nothing too bad will happen if they are
|
* but nothing too bad will happen if they are corrupted. The
|
||||||
* corrupted. The remaining fields will only be examined by the
|
* remaining fields will only be examined by the child process. It
|
||||||
* child process. It might crash, but we won't.
|
* might crash, but we won't.
|
||||||
*/
|
*/
|
||||||
rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
|
rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
|
||||||
rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
|
rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
|
||||||
|
|
@ -292,13 +292,13 @@ BackgroundWorkerStateChange(void)
|
||||||
rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
|
rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy the PID to be notified about state changes, but only if
|
* Copy the PID to be notified about state changes, but only if the
|
||||||
* the postmaster knows about a backend with that PID. It isn't
|
* postmaster knows about a backend with that PID. It isn't an error
|
||||||
* an error if the postmaster doesn't know about the PID, because
|
* if the postmaster doesn't know about the PID, because the backend
|
||||||
* the backend that requested the worker could have died (or been
|
* that requested the worker could have died (or been killed) just
|
||||||
* killed) just after doing so. Nonetheless, at least until we get
|
* after doing so. Nonetheless, at least until we get some experience
|
||||||
* some experience with how this plays out in the wild, log a message
|
* with how this plays out in the wild, log a message at a relative
|
||||||
* at a relative high debug level.
|
* high debug level.
|
||||||
*/
|
*/
|
||||||
rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
|
rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
|
||||||
if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
|
if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
|
||||||
|
|
@ -633,11 +633,11 @@ StartBackgroundWorker(void)
|
||||||
/*
|
/*
|
||||||
* If bgw_main is set, we use that value as the initial entrypoint.
|
* If bgw_main is set, we use that value as the initial entrypoint.
|
||||||
* However, if the library containing the entrypoint wasn't loaded at
|
* However, if the library containing the entrypoint wasn't loaded at
|
||||||
* postmaster startup time, passing it as a direct function pointer is
|
* postmaster startup time, passing it as a direct function pointer is not
|
||||||
* not possible. To work around that, we allow callers for whom a
|
* possible. To work around that, we allow callers for whom a function
|
||||||
* function pointer is not available to pass a library name (which will
|
* pointer is not available to pass a library name (which will be loaded,
|
||||||
* be loaded, if necessary) and a function name (which will be looked up
|
* if necessary) and a function name (which will be looked up in the named
|
||||||
* in the named library).
|
* library).
|
||||||
*/
|
*/
|
||||||
if (worker->bgw_main != NULL)
|
if (worker->bgw_main != NULL)
|
||||||
entrypt = worker->bgw_main;
|
entrypt = worker->bgw_main;
|
||||||
|
|
@ -761,12 +761,12 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
|
||||||
uint64 generation = 0;
|
uint64 generation = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can't register dynamic background workers from the postmaster.
|
* We can't register dynamic background workers from the postmaster. If
|
||||||
* If this is a standalone backend, we're the only process and can't
|
* this is a standalone backend, we're the only process and can't start
|
||||||
* start any more. In a multi-process environement, it might be
|
* any more. In a multi-process environement, it might be theoretically
|
||||||
* theoretically possible, but we don't currently support it due to
|
* possible, but we don't currently support it due to locking
|
||||||
* locking considerations; see comments on the BackgroundWorkerSlot
|
* considerations; see comments on the BackgroundWorkerSlot data
|
||||||
* data structure.
|
* structure.
|
||||||
*/
|
*/
|
||||||
if (!IsUnderPostmaster)
|
if (!IsUnderPostmaster)
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -792,8 +792,8 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
|
||||||
generation = slot->generation;
|
generation = slot->generation;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure postmaster doesn't see the slot as in use before
|
* Make sure postmaster doesn't see the slot as in use before it
|
||||||
* it sees the new contents.
|
* sees the new contents.
|
||||||
*/
|
*/
|
||||||
pg_write_barrier();
|
pg_write_barrier();
|
||||||
|
|
||||||
|
|
@ -845,10 +845,10 @@ GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
|
||||||
slot = &BackgroundWorkerData->slot[handle->slot];
|
slot = &BackgroundWorkerData->slot[handle->slot];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We could probably arrange to synchronize access to data using
|
* We could probably arrange to synchronize access to data using memory
|
||||||
* memory barriers only, but for now, let's just keep it simple and
|
* barriers only, but for now, let's just keep it simple and grab the
|
||||||
* grab the lock. It seems unlikely that there will be enough traffic
|
* lock. It seems unlikely that there will be enough traffic here to
|
||||||
* here to result in meaningful contention.
|
* result in meaningful contention.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
|
LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -298,11 +298,11 @@ BackgroundWriterMain(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Log a new xl_running_xacts every now and then so replication can get
|
* Log a new xl_running_xacts every now and then so replication can
|
||||||
* into a consistent state faster (think of suboverflowed snapshots)
|
* get into a consistent state faster (think of suboverflowed
|
||||||
* and clean up resources (locks, KnownXids*) more frequently. The
|
* snapshots) and clean up resources (locks, KnownXids*) more
|
||||||
* costs of this are relatively low, so doing it 4 times
|
* frequently. The costs of this are relatively low, so doing it 4
|
||||||
* (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
|
* times (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
|
||||||
*
|
*
|
||||||
* We assume the interval for writing xl_running_xacts is
|
* We assume the interval for writing xl_running_xacts is
|
||||||
* significantly bigger than BgWriterDelay, so we don't complicate the
|
* significantly bigger than BgWriterDelay, so we don't complicate the
|
||||||
|
|
@ -314,20 +314,21 @@ BackgroundWriterMain(void)
|
||||||
* we've logged a running xacts.
|
* we've logged a running xacts.
|
||||||
*
|
*
|
||||||
* We do this logging in the bgwriter as its the only process thats
|
* We do this logging in the bgwriter as its the only process thats
|
||||||
* run regularly and returns to its mainloop all the
|
* run regularly and returns to its mainloop all the time. E.g.
|
||||||
* time. E.g. Checkpointer, when active, is barely ever in its
|
* Checkpointer, when active, is barely ever in its mainloop and thus
|
||||||
* mainloop and thus makes it hard to log regularly.
|
* makes it hard to log regularly.
|
||||||
*/
|
*/
|
||||||
if (XLogStandbyInfoActive() && !RecoveryInProgress())
|
if (XLogStandbyInfoActive() && !RecoveryInProgress())
|
||||||
{
|
{
|
||||||
TimestampTz timeout = 0;
|
TimestampTz timeout = 0;
|
||||||
TimestampTz now = GetCurrentTimestamp();
|
TimestampTz now = GetCurrentTimestamp();
|
||||||
|
|
||||||
timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
|
timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
|
||||||
LOG_SNAPSHOT_INTERVAL_MS);
|
LOG_SNAPSHOT_INTERVAL_MS);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* only log if enough time has passed and some xlog record has been
|
* only log if enough time has passed and some xlog record has
|
||||||
* inserted.
|
* been inserted.
|
||||||
*/
|
*/
|
||||||
if (now >= timeout &&
|
if (now >= timeout &&
|
||||||
last_snapshot_lsn != GetXLogInsertRecPtr())
|
last_snapshot_lsn != GetXLogInsertRecPtr())
|
||||||
|
|
|
||||||
|
|
@ -487,14 +487,20 @@ pgarch_ArchiverCopyLoop(void)
|
||||||
/* successful */
|
/* successful */
|
||||||
pgarch_archiveDone(xlog);
|
pgarch_archiveDone(xlog);
|
||||||
|
|
||||||
/* Tell the collector about the WAL file that we successfully archived */
|
/*
|
||||||
|
* Tell the collector about the WAL file that we successfully
|
||||||
|
* archived
|
||||||
|
*/
|
||||||
pgstat_send_archiver(xlog, false);
|
pgstat_send_archiver(xlog, false);
|
||||||
|
|
||||||
break; /* out of inner retry loop */
|
break; /* out of inner retry loop */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Tell the collector about the WAL file that we failed to archive */
|
/*
|
||||||
|
* Tell the collector about the WAL file that we failed to
|
||||||
|
* archive
|
||||||
|
*/
|
||||||
pgstat_send_archiver(xlog, true);
|
pgstat_send_archiver(xlog, true);
|
||||||
|
|
||||||
if (++failures >= NUM_ARCHIVE_RETRIES)
|
if (++failures >= NUM_ARCHIVE_RETRIES)
|
||||||
|
|
|
||||||
|
|
@ -3912,8 +3912,8 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
|
||||||
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clear out global and archiver statistics so they start from zero
|
* Clear out global and archiver statistics so they start from zero in
|
||||||
* in case we can't load an existing statsfile.
|
* case we can't load an existing statsfile.
|
||||||
*/
|
*/
|
||||||
memset(&globalStats, 0, sizeof(globalStats));
|
memset(&globalStats, 0, sizeof(globalStats));
|
||||||
memset(&archiverStats, 0, sizeof(archiverStats));
|
memset(&archiverStats, 0, sizeof(archiverStats));
|
||||||
|
|
|
||||||
|
|
@ -1093,6 +1093,7 @@ PostmasterMain(int argc, char *argv[])
|
||||||
InitPostmasterDeathWatchHandle();
|
InitPostmasterDeathWatchHandle();
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize I/O completion port used to deliver list of dead children.
|
* Initialize I/O completion port used to deliver list of dead children.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1655,9 +1656,9 @@ ServerLoop(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we already sent SIGQUIT to children and they are slow to shut
|
* If we already sent SIGQUIT to children and they are slow to shut
|
||||||
* down, it's time to send them SIGKILL. This doesn't happen normally,
|
* down, it's time to send them SIGKILL. This doesn't happen
|
||||||
* but under certain conditions backends can get stuck while shutting
|
* normally, but under certain conditions backends can get stuck while
|
||||||
* down. This is a last measure to get them unwedged.
|
* shutting down. This is a last measure to get them unwedged.
|
||||||
*
|
*
|
||||||
* Note we also do this during recovery from a process crash.
|
* Note we also do this during recovery from a process crash.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1671,8 +1672,8 @@ ServerLoop(void)
|
||||||
AbortStartTime = 0;
|
AbortStartTime = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Additionally, unless we're recovering from a process crash, it's
|
* Additionally, unless we're recovering from a process crash,
|
||||||
* now the time for postmaster to abandon ship.
|
* it's now the time for postmaster to abandon ship.
|
||||||
*/
|
*/
|
||||||
if (!FatalError)
|
if (!FatalError)
|
||||||
ExitPostmaster(1);
|
ExitPostmaster(1);
|
||||||
|
|
@ -2884,6 +2885,7 @@ CleanupBackgroundWorker(int pid,
|
||||||
#ifdef EXEC_BACKEND
|
#ifdef EXEC_BACKEND
|
||||||
ShmemBackendArrayRemove(rw->rw_backend);
|
ShmemBackendArrayRemove(rw->rw_backend);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It's possible that this background worker started some OTHER
|
* It's possible that this background worker started some OTHER
|
||||||
* background worker and asked to be notified when that worker
|
* background worker and asked to be notified when that worker
|
||||||
|
|
@ -2930,6 +2932,7 @@ CleanupBackend(int pid,
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
|
* On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
|
||||||
* since that sometimes happens under load when the process fails to start
|
* since that sometimes happens under load when the process fails to start
|
||||||
|
|
@ -2974,12 +2977,12 @@ CleanupBackend(int pid,
|
||||||
if (bp->bgworker_notify)
|
if (bp->bgworker_notify)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* This backend may have been slated to receive SIGUSR1
|
* This backend may have been slated to receive SIGUSR1 when
|
||||||
* when some background worker started or stopped. Cancel
|
* some background worker started or stopped. Cancel those
|
||||||
* those notifications, as we don't want to signal PIDs that
|
* notifications, as we don't want to signal PIDs that are not
|
||||||
* are not PostgreSQL backends. This gets skipped in the
|
* PostgreSQL backends. This gets skipped in the (probably
|
||||||
* (probably very common) case where the backend has never
|
* very common) case where the backend has never requested any
|
||||||
* requested any such notifications.
|
* such notifications.
|
||||||
*/
|
*/
|
||||||
BackgroundWorkerStopNotifications(bp->pid);
|
BackgroundWorkerStopNotifications(bp->pid);
|
||||||
}
|
}
|
||||||
|
|
@ -3006,10 +3009,11 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
||||||
bool take_action;
|
bool take_action;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We only log messages and send signals if this is the first process crash
|
* We only log messages and send signals if this is the first process
|
||||||
* and we're not doing an immediate shutdown; otherwise, we're only here to
|
* crash and we're not doing an immediate shutdown; otherwise, we're only
|
||||||
* update postmaster's idea of live processes. If we have already signalled
|
* here to update postmaster's idea of live processes. If we have already
|
||||||
* children, nonzero exit status is to be expected, so don't clutter log.
|
* signalled children, nonzero exit status is to be expected, so don't
|
||||||
|
* clutter log.
|
||||||
*/
|
*/
|
||||||
take_action = !FatalError && Shutdown != ImmediateShutdown;
|
take_action = !FatalError && Shutdown != ImmediateShutdown;
|
||||||
|
|
||||||
|
|
@ -3366,13 +3370,13 @@ PostmasterStateMachine(void)
|
||||||
* PM_WAIT_BACKENDS state ends when we have no regular backends
|
* PM_WAIT_BACKENDS state ends when we have no regular backends
|
||||||
* (including autovac workers), no bgworkers (including unconnected
|
* (including autovac workers), no bgworkers (including unconnected
|
||||||
* ones), and no walwriter, autovac launcher or bgwriter. If we are
|
* ones), and no walwriter, autovac launcher or bgwriter. If we are
|
||||||
* doing crash recovery or an immediate shutdown then we expect
|
* doing crash recovery or an immediate shutdown then we expect the
|
||||||
* the checkpointer to exit as well, otherwise not. The archiver,
|
* checkpointer to exit as well, otherwise not. The archiver, stats,
|
||||||
* stats, and syslogger processes are disregarded since
|
* and syslogger processes are disregarded since they are not
|
||||||
* they are not connected to shared memory; we also disregard
|
* connected to shared memory; we also disregard dead_end children
|
||||||
* dead_end children here. Walsenders are also disregarded,
|
* here. Walsenders are also disregarded, they will be terminated
|
||||||
* they will be terminated later after writing the checkpoint record,
|
* later after writing the checkpoint record, like the archiver
|
||||||
* like the archiver process.
|
* process.
|
||||||
*/
|
*/
|
||||||
if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
|
if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
|
||||||
CountUnconnectedWorkers() == 0 &&
|
CountUnconnectedWorkers() == 0 &&
|
||||||
|
|
|
||||||
|
|
@ -670,6 +670,7 @@ SysLogger_Start(void)
|
||||||
close(syslogPipe[1]);
|
close(syslogPipe[1]);
|
||||||
syslogPipe[1] = -1;
|
syslogPipe[1] = -1;
|
||||||
#else
|
#else
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* open the pipe in binary mode and make sure stderr is binary
|
* open the pipe in binary mode and make sure stderr is binary
|
||||||
* after it's been dup'ed into, to avoid disturbing the pipe
|
* after it's been dup'ed into, to avoid disturbing the pipe
|
||||||
|
|
|
||||||
|
|
@ -137,8 +137,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
|
||||||
SendXlogRecPtrResult(startptr, starttli);
|
SendXlogRecPtrResult(startptr, starttli);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculate the relative path of temporary statistics directory
|
* Calculate the relative path of temporary statistics directory in order
|
||||||
* in order to skip the files which are located in that directory later.
|
* to skip the files which are located in that directory later.
|
||||||
*/
|
*/
|
||||||
if (is_absolute_path(pgstat_stat_directory) &&
|
if (is_absolute_path(pgstat_stat_directory) &&
|
||||||
strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
|
strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
|
||||||
|
|
@ -231,8 +231,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
|
||||||
(int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
|
(int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The minimum amount of time for throttling_sample
|
* The minimum amount of time for throttling_sample bytes to be
|
||||||
* bytes to be transfered.
|
* transfered.
|
||||||
*/
|
*/
|
||||||
elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
|
elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
|
||||||
|
|
||||||
|
|
@ -1276,8 +1276,8 @@ throttle(size_t increment)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* The actual transfer rate is below the limit. A negative value would
|
* The actual transfer rate is below the limit. A negative value
|
||||||
* distort the adjustment of throttled_last.
|
* would distort the adjustment of throttled_last.
|
||||||
*/
|
*/
|
||||||
wait_result = 0;
|
wait_result = 0;
|
||||||
sleep = 0;
|
sleep = 0;
|
||||||
|
|
|
||||||
|
|
@ -156,6 +156,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case XLOG_CHECKPOINT_ONLINE:
|
case XLOG_CHECKPOINT_ONLINE:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* a RUNNING_XACTS record will have been logged near to this, we
|
* a RUNNING_XACTS record will have been logged near to this, we
|
||||||
* can restart from there.
|
* can restart from there.
|
||||||
|
|
@ -292,6 +293,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case XLOG_XACT_PREPARE:
|
case XLOG_XACT_PREPARE:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Currently decoding ignores PREPARE TRANSACTION and will just
|
* Currently decoding ignores PREPARE TRANSACTION and will just
|
||||||
* decode the transaction when the COMMIT PREPARED is sent or
|
* decode the transaction when the COMMIT PREPARED is sent or
|
||||||
|
|
@ -321,7 +323,9 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||||
case XLOG_RUNNING_XACTS:
|
case XLOG_RUNNING_XACTS:
|
||||||
{
|
{
|
||||||
xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
|
xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
|
||||||
|
|
||||||
SnapBuildProcessRunningXacts(builder, buf->origptr, running);
|
SnapBuildProcessRunningXacts(builder, buf->origptr, running);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Abort all transactions that we keep track of, that are
|
* Abort all transactions that we keep track of, that are
|
||||||
* older than the record's oldestRunningXid. This is the most
|
* older than the record's oldestRunningXid. This is the most
|
||||||
|
|
@ -364,21 +368,24 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||||
case XLOG_HEAP2_NEW_CID:
|
case XLOG_HEAP2_NEW_CID:
|
||||||
{
|
{
|
||||||
xl_heap_new_cid *xlrec;
|
xl_heap_new_cid *xlrec;
|
||||||
|
|
||||||
xlrec = (xl_heap_new_cid *) buf->record_data;
|
xlrec = (xl_heap_new_cid *) buf->record_data;
|
||||||
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
|
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case XLOG_HEAP2_REWRITE:
|
case XLOG_HEAP2_REWRITE:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Although these records only exist to serve the needs of logical
|
* Although these records only exist to serve the needs of logical
|
||||||
* decoding, all the work happens as part of crash or archive
|
* decoding, all the work happens as part of crash or archive
|
||||||
* recovery, so we don't need to do anything here.
|
* recovery, so we don't need to do anything here.
|
||||||
*/
|
*/
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Everything else here is just low level physical stuff we're
|
* Everything else here is just low level physical stuff we're not
|
||||||
* not interested in.
|
* interested in.
|
||||||
*/
|
*/
|
||||||
case XLOG_HEAP2_FREEZE_PAGE:
|
case XLOG_HEAP2_FREEZE_PAGE:
|
||||||
case XLOG_HEAP2_CLEAN:
|
case XLOG_HEAP2_CLEAN:
|
||||||
|
|
@ -429,6 +436,7 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case XLOG_HEAP_NEWPAGE:
|
case XLOG_HEAP_NEWPAGE:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is only used in places like indexams and CLUSTER which
|
* This is only used in places like indexams and CLUSTER which
|
||||||
* don't contain changes relevant for logical replication.
|
* don't contain changes relevant for logical replication.
|
||||||
|
|
@ -436,6 +444,7 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case XLOG_HEAP_INPLACE:
|
case XLOG_HEAP_INPLACE:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Inplace updates are only ever performed on catalog tuples and
|
* Inplace updates are only ever performed on catalog tuples and
|
||||||
* can, per definition, not change tuple visibility. Since we
|
* can, per definition, not change tuple visibility. Since we
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,8 @@ StartupDecodingContext(List *output_plugin_options,
|
||||||
LogicalOutputPluginWriterWrite do_write)
|
LogicalOutputPluginWriterWrite do_write)
|
||||||
{
|
{
|
||||||
ReplicationSlot *slot;
|
ReplicationSlot *slot;
|
||||||
MemoryContext context, old_context;
|
MemoryContext context,
|
||||||
|
old_context;
|
||||||
LogicalDecodingContext *ctx;
|
LogicalDecodingContext *ctx;
|
||||||
|
|
||||||
/* shorter lines... */
|
/* shorter lines... */
|
||||||
|
|
@ -133,7 +134,10 @@ StartupDecodingContext(List *output_plugin_options,
|
||||||
|
|
||||||
ctx->context = context;
|
ctx->context = context;
|
||||||
|
|
||||||
/* (re-)load output plugins, so we detect a bad (removed) output plugin now. */
|
/*
|
||||||
|
* (re-)load output plugins, so we detect a bad (removed) output plugin
|
||||||
|
* now.
|
||||||
|
*/
|
||||||
LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
|
LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -254,7 +258,7 @@ CreateInitDecodingContext(char *plugin,
|
||||||
{
|
{
|
||||||
XLogRecPtr flushptr;
|
XLogRecPtr flushptr;
|
||||||
|
|
||||||
/* start at current insert position*/
|
/* start at current insert position */
|
||||||
slot->data.restart_lsn = GetXLogInsertRecPtr();
|
slot->data.restart_lsn = GetXLogInsertRecPtr();
|
||||||
|
|
||||||
/* make sure we have enough information to start */
|
/* make sure we have enough information to start */
|
||||||
|
|
@ -307,8 +311,8 @@ CreateInitDecodingContext(char *plugin,
|
||||||
LWLockRelease(ProcArrayLock);
|
LWLockRelease(ProcArrayLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* tell the snapshot builder to only assemble snapshot once reaching
|
* tell the snapshot builder to only assemble snapshot once reaching the a
|
||||||
* the a running_xact's record with the respective xmin.
|
* running_xact's record with the respective xmin.
|
||||||
*/
|
*/
|
||||||
xmin_horizon = slot->data.catalog_xmin;
|
xmin_horizon = slot->data.catalog_xmin;
|
||||||
|
|
||||||
|
|
@ -385,14 +389,14 @@ CreateDecodingContext(XLogRecPtr start_lsn,
|
||||||
* pretty common for a client to acknowledge a LSN it doesn't have to
|
* pretty common for a client to acknowledge a LSN it doesn't have to
|
||||||
* do anything for, and thus didn't store persistently, because the
|
* do anything for, and thus didn't store persistently, because the
|
||||||
* xlog records didn't result in anything relevant for logical
|
* xlog records didn't result in anything relevant for logical
|
||||||
* decoding. Clients have to be able to do that to support
|
* decoding. Clients have to be able to do that to support synchronous
|
||||||
* synchronous replication.
|
* replication.
|
||||||
*/
|
*/
|
||||||
start_lsn = slot->data.confirmed_flush;
|
start_lsn = slot->data.confirmed_flush;
|
||||||
elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
|
elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
|
||||||
(uint32)(start_lsn >> 32), (uint32)start_lsn,
|
(uint32) (start_lsn >> 32), (uint32) start_lsn,
|
||||||
(uint32)(slot->data.confirmed_flush >> 32),
|
(uint32) (slot->data.confirmed_flush >> 32),
|
||||||
(uint32)slot->data.confirmed_flush);
|
(uint32) slot->data.confirmed_flush);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx = StartupDecodingContext(output_plugin_options,
|
ctx = StartupDecodingContext(output_plugin_options,
|
||||||
|
|
@ -409,10 +413,10 @@ CreateDecodingContext(XLogRecPtr start_lsn,
|
||||||
(errmsg("starting logical decoding for slot %s",
|
(errmsg("starting logical decoding for slot %s",
|
||||||
NameStr(slot->data.name)),
|
NameStr(slot->data.name)),
|
||||||
errdetail("streaming transactions committing after %X/%X, reading WAL from %X/%X",
|
errdetail("streaming transactions committing after %X/%X, reading WAL from %X/%X",
|
||||||
(uint32)(slot->data.confirmed_flush >> 32),
|
(uint32) (slot->data.confirmed_flush >> 32),
|
||||||
(uint32)slot->data.confirmed_flush,
|
(uint32) slot->data.confirmed_flush,
|
||||||
(uint32)(slot->data.restart_lsn >> 32),
|
(uint32) (slot->data.restart_lsn >> 32),
|
||||||
(uint32)slot->data.restart_lsn)));
|
(uint32) slot->data.restart_lsn)));
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
@ -438,8 +442,8 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
|
||||||
startptr = ctx->slot->data.restart_lsn;
|
startptr = ctx->slot->data.restart_lsn;
|
||||||
|
|
||||||
elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
|
elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
|
||||||
(uint32)(ctx->slot->data.restart_lsn >> 32),
|
(uint32) (ctx->slot->data.restart_lsn >> 32),
|
||||||
(uint32)ctx->slot->data.restart_lsn);
|
(uint32) ctx->slot->data.restart_lsn);
|
||||||
|
|
||||||
/* Wait for a consistent starting point */
|
/* Wait for a consistent starting point */
|
||||||
for (;;)
|
for (;;)
|
||||||
|
|
@ -543,14 +547,15 @@ static void
|
||||||
output_plugin_error_callback(void *arg)
|
output_plugin_error_callback(void *arg)
|
||||||
{
|
{
|
||||||
LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
|
LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
|
||||||
|
|
||||||
/* not all callbacks have an associated LSN */
|
/* not all callbacks have an associated LSN */
|
||||||
if (state->report_location != InvalidXLogRecPtr)
|
if (state->report_location != InvalidXLogRecPtr)
|
||||||
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
|
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
|
||||||
NameStr(state->ctx->slot->data.name),
|
NameStr(state->ctx->slot->data.name),
|
||||||
NameStr(state->ctx->slot->data.plugin),
|
NameStr(state->ctx->slot->data.plugin),
|
||||||
state->callback_name,
|
state->callback_name,
|
||||||
(uint32)(state->report_location >> 32),
|
(uint32) (state->report_location >> 32),
|
||||||
(uint32)state->report_location);
|
(uint32) state->report_location);
|
||||||
else
|
else
|
||||||
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
|
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
|
||||||
NameStr(state->ctx->slot->data.name),
|
NameStr(state->ctx->slot->data.name),
|
||||||
|
|
@ -690,6 +695,7 @@ change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
|
||||||
/* set output state */
|
/* set output state */
|
||||||
ctx->accept_writes = true;
|
ctx->accept_writes = true;
|
||||||
ctx->write_xid = txn->xid;
|
ctx->write_xid = txn->xid;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* report this change's lsn so replies from clients can give an up2date
|
* report this change's lsn so replies from clients can give an up2date
|
||||||
* answer. This won't ever be enough (and shouldn't be!) to confirm
|
* answer. This won't ever be enough (and shouldn't be!) to confirm
|
||||||
|
|
@ -725,16 +731,17 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
|
||||||
SpinLockAcquire(&slot->mutex);
|
SpinLockAcquire(&slot->mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* don't overwrite if we already have a newer xmin. This can
|
* don't overwrite if we already have a newer xmin. This can happen if we
|
||||||
* happen if we restart decoding in a slot.
|
* restart decoding in a slot.
|
||||||
*/
|
*/
|
||||||
if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
|
if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the client has already confirmed up to this lsn, we directly
|
* If the client has already confirmed up to this lsn, we directly can
|
||||||
* can mark this as accepted. This can happen if we restart
|
* mark this as accepted. This can happen if we restart decoding in a
|
||||||
* decoding in a slot.
|
* slot.
|
||||||
*/
|
*/
|
||||||
else if (current_lsn <= slot->data.confirmed_flush)
|
else if (current_lsn <= slot->data.confirmed_flush)
|
||||||
{
|
{
|
||||||
|
|
@ -744,6 +751,7 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
|
||||||
/* our candidate can directly be used */
|
/* our candidate can directly be used */
|
||||||
updated_xmin = true;
|
updated_xmin = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only increase if the previous values have been applied, otherwise we
|
* Only increase if the previous values have been applied, otherwise we
|
||||||
* might never end up updating if the receiver acks too slowly.
|
* might never end up updating if the receiver acks too slowly.
|
||||||
|
|
@ -781,13 +789,14 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
|
||||||
|
|
||||||
SpinLockAcquire(&slot->mutex);
|
SpinLockAcquire(&slot->mutex);
|
||||||
|
|
||||||
/* don't overwrite if have a newer restart lsn*/
|
/* don't overwrite if have a newer restart lsn */
|
||||||
if (restart_lsn <= slot->data.restart_lsn)
|
if (restart_lsn <= slot->data.restart_lsn)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We might have already flushed far enough to directly accept this lsn, in
|
* We might have already flushed far enough to directly accept this lsn,
|
||||||
* this case there is no need to check for existing candidate LSNs
|
* in this case there is no need to check for existing candidate LSNs
|
||||||
*/
|
*/
|
||||||
else if (current_lsn <= slot->data.confirmed_flush)
|
else if (current_lsn <= slot->data.confirmed_flush)
|
||||||
{
|
{
|
||||||
|
|
@ -797,6 +806,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
|
||||||
/* our candidate can directly be used */
|
/* our candidate can directly be used */
|
||||||
updated_lsn = true;
|
updated_lsn = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only increase if the previous values have been applied, otherwise we
|
* Only increase if the previous values have been applied, otherwise we
|
||||||
* might never end up updating if the receiver acks too slowly. A missed
|
* might never end up updating if the receiver acks too slowly. A missed
|
||||||
|
|
@ -896,6 +906,7 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
|
||||||
ReplicationSlotSave();
|
ReplicationSlotSave();
|
||||||
elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
|
elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now the new xmin is safely on disk, we can let the global value
|
* Now the new xmin is safely on disk, we can let the global value
|
||||||
* advance. We do not take ProcArrayLock or similar since we only
|
* advance. We do not take ProcArrayLock or similar since we only
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,8 @@
|
||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
|
|
||||||
/* private date for writing out data */
|
/* private date for writing out data */
|
||||||
typedef struct DecodingOutputState {
|
typedef struct DecodingOutputState
|
||||||
|
{
|
||||||
Tuplestorestate *tupstore;
|
Tuplestorestate *tupstore;
|
||||||
TupleDesc tupdesc;
|
TupleDesc tupdesc;
|
||||||
bool binary_output;
|
bool binary_output;
|
||||||
|
|
@ -475,6 +476,7 @@ Datum
|
||||||
pg_logical_slot_get_changes(PG_FUNCTION_ARGS)
|
pg_logical_slot_get_changes(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, false);
|
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, false);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -485,6 +487,7 @@ Datum
|
||||||
pg_logical_slot_peek_changes(PG_FUNCTION_ARGS)
|
pg_logical_slot_peek_changes(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, false);
|
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, false);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -495,6 +498,7 @@ Datum
|
||||||
pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS)
|
pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, true);
|
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, true);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -505,5 +509,6 @@ Datum
|
||||||
pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS)
|
pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, true);
|
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, true);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1047,8 +1047,8 @@ ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cleanup the tuplecids we stored for decoding catalog snapshot
|
* Cleanup the tuplecids we stored for decoding catalog snapshot access.
|
||||||
* access. They are always stored in the toplevel transaction.
|
* They are always stored in the toplevel transaction.
|
||||||
*/
|
*/
|
||||||
dlist_foreach_modify(iter, &txn->tuplecids)
|
dlist_foreach_modify(iter, &txn->tuplecids)
|
||||||
{
|
{
|
||||||
|
|
@ -1204,9 +1204,9 @@ ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap,
|
||||||
snap->subxip[i++] = txn->xid;
|
snap->subxip[i++] = txn->xid;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* nsubxcnt isn't decreased when subtransactions abort, so count
|
* nsubxcnt isn't decreased when subtransactions abort, so count manually.
|
||||||
* manually. Since it's an upper boundary it is safe to use it for the
|
* Since it's an upper boundary it is safe to use it for the allocation
|
||||||
* allocation above.
|
* above.
|
||||||
*/
|
*/
|
||||||
snap->subxcnt = 1;
|
snap->subxcnt = 1;
|
||||||
|
|
||||||
|
|
@ -1309,8 +1309,8 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decoding needs access to syscaches et al., which in turn use
|
* Decoding needs access to syscaches et al., which in turn use
|
||||||
* heavyweight locks and such. Thus we need to have enough state around
|
* heavyweight locks and such. Thus we need to have enough state
|
||||||
* to keep track of those. The easiest way is to simply use a
|
* around to keep track of those. The easiest way is to simply use a
|
||||||
* transaction internally. That also allows us to easily enforce that
|
* transaction internally. That also allows us to easily enforce that
|
||||||
* nothing writes to the database by checking for xid assignments.
|
* nothing writes to the database by checking for xid assignments.
|
||||||
*
|
*
|
||||||
|
|
@ -1415,6 +1415,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
|
||||||
ReorderBufferCopySnap(rb, change->data.snapshot,
|
ReorderBufferCopySnap(rb, change->data.snapshot,
|
||||||
txn, command_id);
|
txn, command_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Restored from disk, need to be careful not to double
|
* Restored from disk, need to be careful not to double
|
||||||
* free. We could introduce refcounting for that, but for
|
* free. We could introduce refcounting for that, but for
|
||||||
|
|
@ -1586,7 +1587,7 @@ ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
|
||||||
*/
|
*/
|
||||||
dlist_foreach_modify(it, &rb->toplevel_by_lsn)
|
dlist_foreach_modify(it, &rb->toplevel_by_lsn)
|
||||||
{
|
{
|
||||||
ReorderBufferTXN * txn;
|
ReorderBufferTXN *txn;
|
||||||
|
|
||||||
txn = dlist_container(ReorderBufferTXN, node, it.cur);
|
txn = dlist_container(ReorderBufferTXN, node, it.cur);
|
||||||
|
|
||||||
|
|
@ -1998,7 +1999,8 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
|
||||||
case REORDER_BUFFER_CHANGE_DELETE:
|
case REORDER_BUFFER_CHANGE_DELETE:
|
||||||
{
|
{
|
||||||
char *data;
|
char *data;
|
||||||
ReorderBufferTupleBuf *oldtup, *newtup;
|
ReorderBufferTupleBuf *oldtup,
|
||||||
|
*newtup;
|
||||||
Size oldlen = 0;
|
Size oldlen = 0;
|
||||||
Size newlen = 0;
|
Size newlen = 0;
|
||||||
|
|
||||||
|
|
@ -2007,12 +2009,12 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
|
||||||
|
|
||||||
if (oldtup)
|
if (oldtup)
|
||||||
oldlen = offsetof(ReorderBufferTupleBuf, data)
|
oldlen = offsetof(ReorderBufferTupleBuf, data)
|
||||||
+ oldtup->tuple.t_len
|
+oldtup->tuple.t_len
|
||||||
- offsetof(HeapTupleHeaderData, t_bits);
|
- offsetof(HeapTupleHeaderData, t_bits);
|
||||||
|
|
||||||
if (newtup)
|
if (newtup)
|
||||||
newlen = offsetof(ReorderBufferTupleBuf, data)
|
newlen = offsetof(ReorderBufferTupleBuf, data)
|
||||||
+ newtup->tuple.t_len
|
+newtup->tuple.t_len
|
||||||
- offsetof(HeapTupleHeaderData, t_bits);
|
- offsetof(HeapTupleHeaderData, t_bits);
|
||||||
|
|
||||||
sz += oldlen;
|
sz += oldlen;
|
||||||
|
|
@ -2884,8 +2886,8 @@ TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
|
||||||
static int
|
static int
|
||||||
file_sort_by_lsn(const void *a_p, const void *b_p)
|
file_sort_by_lsn(const void *a_p, const void *b_p)
|
||||||
{
|
{
|
||||||
RewriteMappingFile *a = *(RewriteMappingFile **)a_p;
|
RewriteMappingFile *a = *(RewriteMappingFile **) a_p;
|
||||||
RewriteMappingFile *b = *(RewriteMappingFile **)b_p;
|
RewriteMappingFile *b = *(RewriteMappingFile **) b_p;
|
||||||
|
|
||||||
if (a->lsn < b->lsn)
|
if (a->lsn < b->lsn)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
@ -2917,14 +2919,15 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
|
||||||
TransactionId f_mapped_xid;
|
TransactionId f_mapped_xid;
|
||||||
TransactionId f_create_xid;
|
TransactionId f_create_xid;
|
||||||
XLogRecPtr f_lsn;
|
XLogRecPtr f_lsn;
|
||||||
uint32 f_hi, f_lo;
|
uint32 f_hi,
|
||||||
|
f_lo;
|
||||||
RewriteMappingFile *f;
|
RewriteMappingFile *f;
|
||||||
|
|
||||||
if (strcmp(mapping_de->d_name, ".") == 0 ||
|
if (strcmp(mapping_de->d_name, ".") == 0 ||
|
||||||
strcmp(mapping_de->d_name, "..") == 0)
|
strcmp(mapping_de->d_name, "..") == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Ignore files that aren't ours*/
|
/* Ignore files that aren't ours */
|
||||||
if (strncmp(mapping_de->d_name, "map-", 4) != 0)
|
if (strncmp(mapping_de->d_name, "map-", 4) != 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
@ -2971,9 +2974,10 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
|
||||||
qsort(files_a, list_length(files), sizeof(RewriteMappingFile *),
|
qsort(files_a, list_length(files), sizeof(RewriteMappingFile *),
|
||||||
file_sort_by_lsn);
|
file_sort_by_lsn);
|
||||||
|
|
||||||
for(off = 0; off < list_length(files); off++)
|
for (off = 0; off < list_length(files); off++)
|
||||||
{
|
{
|
||||||
RewriteMappingFile *f = files_a[off];
|
RewriteMappingFile *f = files_a[off];
|
||||||
|
|
||||||
elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
|
elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
|
||||||
snapshot->subxip[0]);
|
snapshot->subxip[0]);
|
||||||
ApplyLogicalMappingFile(tuplecid_data, relid, f->fname);
|
ApplyLogicalMappingFile(tuplecid_data, relid, f->fname);
|
||||||
|
|
|
||||||
|
|
@ -692,10 +692,10 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
|
||||||
CommandId cid;
|
CommandId cid;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we only log new_cid's if a catalog tuple was modified, so mark
|
* we only log new_cid's if a catalog tuple was modified, so mark the
|
||||||
* the transaction as containing catalog modifications
|
* transaction as containing catalog modifications
|
||||||
*/
|
*/
|
||||||
ReorderBufferXidSetCatalogChanges(builder->reorder, xid,lsn);
|
ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
|
||||||
|
|
||||||
ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
|
ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
|
||||||
xlrec->target.node, xlrec->target.tid,
|
xlrec->target.node, xlrec->target.tid,
|
||||||
|
|
@ -901,7 +901,7 @@ SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid)
|
||||||
*/
|
*/
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("logical decoding found consistent point at %X/%X",
|
(errmsg("logical decoding found consistent point at %X/%X",
|
||||||
(uint32)(lsn >> 32), (uint32)lsn),
|
(uint32) (lsn >> 32), (uint32) lsn),
|
||||||
errdetail("xid %u finished, no running transactions anymore",
|
errdetail("xid %u finished, no running transactions anymore",
|
||||||
xid)));
|
xid)));
|
||||||
builder->state = SNAPBUILD_CONSISTENT;
|
builder->state = SNAPBUILD_CONSISTENT;
|
||||||
|
|
@ -1170,6 +1170,7 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact
|
||||||
*/
|
*/
|
||||||
if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr)
|
if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr)
|
||||||
LogicalIncreaseRestartDecodingForSlot(lsn, txn->restart_decoding_lsn);
|
LogicalIncreaseRestartDecodingForSlot(lsn, txn->restart_decoding_lsn);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No in-progress transaction, can reuse the last serialized snapshot if
|
* No in-progress transaction, can reuse the last serialized snapshot if
|
||||||
* we have one.
|
* we have one.
|
||||||
|
|
@ -1263,7 +1264,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
||||||
|
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("logical decoding found consistent point at %X/%X",
|
(errmsg("logical decoding found consistent point at %X/%X",
|
||||||
(uint32)(lsn >> 32), (uint32)lsn),
|
(uint32) (lsn >> 32), (uint32) lsn),
|
||||||
errdetail("running xacts with xcnt == 0")));
|
errdetail("running xacts with xcnt == 0")));
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -1274,11 +1275,12 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
||||||
/* there won't be any state to cleanup */
|
/* there won't be any state to cleanup */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* b) first encounter of a useable xl_running_xacts record. If we had
|
* b) first encounter of a useable xl_running_xacts record. If we had
|
||||||
* found one earlier we would either track running transactions
|
* found one earlier we would either track running transactions (i.e.
|
||||||
* (i.e. builder->running.xcnt != 0) or be consistent (this function
|
* builder->running.xcnt != 0) or be consistent (this function wouldn't
|
||||||
* wouldn't get called).
|
* get called).
|
||||||
*/
|
*/
|
||||||
else if (!builder->running.xcnt)
|
else if (!builder->running.xcnt)
|
||||||
{
|
{
|
||||||
|
|
@ -1321,7 +1323,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
||||||
|
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("logical decoding found initial starting point at %X/%X",
|
(errmsg("logical decoding found initial starting point at %X/%X",
|
||||||
(uint32)(lsn >> 32), (uint32)lsn),
|
(uint32) (lsn >> 32), (uint32) lsn),
|
||||||
errdetail("%u xacts need to finish", (uint32) builder->running.xcnt)));
|
errdetail("%u xacts need to finish", (uint32) builder->running.xcnt)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1331,7 +1333,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
||||||
* isolationtester to notice that we're currently waiting for
|
* isolationtester to notice that we're currently waiting for
|
||||||
* something.
|
* something.
|
||||||
*/
|
*/
|
||||||
for(off = 0; off < builder->running.xcnt; off++)
|
for (off = 0; off < builder->running.xcnt; off++)
|
||||||
{
|
{
|
||||||
TransactionId xid = builder->running.xip[off];
|
TransactionId xid = builder->running.xip[off];
|
||||||
|
|
||||||
|
|
@ -1471,9 +1473,9 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
|
||||||
* but remember location, so we don't need to read old data again.
|
* but remember location, so we don't need to read old data again.
|
||||||
*
|
*
|
||||||
* To be sure it has been synced to disk after the rename() from the
|
* To be sure it has been synced to disk after the rename() from the
|
||||||
* tempfile filename to the real filename, we just repeat the
|
* tempfile filename to the real filename, we just repeat the fsync.
|
||||||
* fsync. That ought to be cheap because in most scenarios it should
|
* That ought to be cheap because in most scenarios it should already
|
||||||
* already be safely on disk.
|
* be safely on disk.
|
||||||
*/
|
*/
|
||||||
fsync_fname(path, false);
|
fsync_fname(path, false);
|
||||||
fsync_fname("pg_llog/snapshots", true);
|
fsync_fname("pg_llog/snapshots", true);
|
||||||
|
|
@ -1597,8 +1599,8 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
|
||||||
fsync_fname("pg_llog/snapshots", true);
|
fsync_fname("pg_llog/snapshots", true);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now there's no way we can loose the dumped state anymore, remember
|
* Now there's no way we can loose the dumped state anymore, remember this
|
||||||
* this as a serialization point.
|
* as a serialization point.
|
||||||
*/
|
*/
|
||||||
builder->last_serialized_snapshot = lsn;
|
builder->last_serialized_snapshot = lsn;
|
||||||
|
|
||||||
|
|
@ -1781,7 +1783,7 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
|
||||||
|
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("logical decoding found consistent point at %X/%X",
|
(errmsg("logical decoding found consistent point at %X/%X",
|
||||||
(uint32)(lsn >> 32), (uint32)lsn),
|
(uint32) (lsn >> 32), (uint32) lsn),
|
||||||
errdetail("found initial snapshot in snapbuild file")));
|
errdetail("found initial snapshot in snapbuild file")));
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
|
@ -1846,8 +1848,8 @@ CheckPointSnapBuild(void)
|
||||||
/*
|
/*
|
||||||
* temporary filenames from SnapBuildSerialize() include the LSN and
|
* temporary filenames from SnapBuildSerialize() include the LSN and
|
||||||
* everything but are postfixed by .$pid.tmp. We can just remove them
|
* everything but are postfixed by .$pid.tmp. We can just remove them
|
||||||
* the same as other files because there can be none that are currently
|
* the same as other files because there can be none that are
|
||||||
* being written that are older than cutoff.
|
* currently being written that are older than cutoff.
|
||||||
*
|
*
|
||||||
* We just log a message if a file doesn't fit the pattern, it's
|
* We just log a message if a file doesn't fit the pattern, it's
|
||||||
* probably some editors lock/state file or similar...
|
* probably some editors lock/state file or similar...
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,8 @@ ReplicationSlotCtlData *ReplicationSlotCtl = NULL;
|
||||||
ReplicationSlot *MyReplicationSlot = NULL;
|
ReplicationSlot *MyReplicationSlot = NULL;
|
||||||
|
|
||||||
/* GUCs */
|
/* GUCs */
|
||||||
int max_replication_slots = 0; /* the maximum number of replication slots */
|
int max_replication_slots = 0; /* the maximum number of replication
|
||||||
|
* slots */
|
||||||
|
|
||||||
static void ReplicationSlotDropAcquired(void);
|
static void ReplicationSlotDropAcquired(void);
|
||||||
|
|
||||||
|
|
@ -208,18 +209,18 @@ ReplicationSlotCreate(const char *name, bool db_specific,
|
||||||
ReplicationSlotValidateName(name, ERROR);
|
ReplicationSlotValidateName(name, ERROR);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If some other backend ran this code currently with us, we'd likely
|
* If some other backend ran this code currently with us, we'd likely both
|
||||||
* both allocate the same slot, and that would be bad. We'd also be
|
* allocate the same slot, and that would be bad. We'd also be at risk of
|
||||||
* at risk of missing a name collision. Also, we don't want to try to
|
* missing a name collision. Also, we don't want to try to create a new
|
||||||
* create a new slot while somebody's busy cleaning up an old one, because
|
* slot while somebody's busy cleaning up an old one, because we might
|
||||||
* we might both be monkeying with the same directory.
|
* both be monkeying with the same directory.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
|
LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for name collision, and identify an allocatable slot. We need
|
* Check for name collision, and identify an allocatable slot. We need to
|
||||||
* to hold ReplicationSlotControlLock in shared mode for this, so that
|
* hold ReplicationSlotControlLock in shared mode for this, so that nobody
|
||||||
* nobody else can change the in_use flags while we're looking at them.
|
* else can change the in_use flags while we're looking at them.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
||||||
for (i = 0; i < max_replication_slots; i++)
|
for (i = 0; i < max_replication_slots; i++)
|
||||||
|
|
@ -243,10 +244,10 @@ ReplicationSlotCreate(const char *name, bool db_specific,
|
||||||
errhint("Free one or increase max_replication_slots.")));
|
errhint("Free one or increase max_replication_slots.")));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since this slot is not in use, nobody should be looking at any
|
* Since this slot is not in use, nobody should be looking at any part of
|
||||||
* part of it other than the in_use field unless they're trying to allocate
|
* it other than the in_use field unless they're trying to allocate it.
|
||||||
* it. And since we hold ReplicationSlotAllocationLock, nobody except us
|
* And since we hold ReplicationSlotAllocationLock, nobody except us can
|
||||||
* can be doing that. So it's safe to initialize the slot.
|
* be doing that. So it's safe to initialize the slot.
|
||||||
*/
|
*/
|
||||||
Assert(!slot->in_use);
|
Assert(!slot->in_use);
|
||||||
Assert(!slot->active);
|
Assert(!slot->active);
|
||||||
|
|
@ -366,6 +367,7 @@ ReplicationSlotRelease(void)
|
||||||
{
|
{
|
||||||
/* Mark slot inactive. We're not freeing it, just disconnecting. */
|
/* Mark slot inactive. We're not freeing it, just disconnecting. */
|
||||||
volatile ReplicationSlot *vslot = slot;
|
volatile ReplicationSlot *vslot = slot;
|
||||||
|
|
||||||
SpinLockAcquire(&slot->mutex);
|
SpinLockAcquire(&slot->mutex);
|
||||||
vslot->active = false;
|
vslot->active = false;
|
||||||
SpinLockRelease(&slot->mutex);
|
SpinLockRelease(&slot->mutex);
|
||||||
|
|
@ -802,8 +804,8 @@ CheckPointReplicationSlots(void)
|
||||||
* Prevent any slot from being created/dropped while we're active. As we
|
* Prevent any slot from being created/dropped while we're active. As we
|
||||||
* explicitly do *not* want to block iterating over replication_slots or
|
* explicitly do *not* want to block iterating over replication_slots or
|
||||||
* acquiring a slot we cannot take the control lock - but that's OK,
|
* acquiring a slot we cannot take the control lock - but that's OK,
|
||||||
* because holding ReplicationSlotAllocationLock is strictly stronger,
|
* because holding ReplicationSlotAllocationLock is strictly stronger, and
|
||||||
* and enough to guarantee that nobody can change the in_use bits on us.
|
* enough to guarantee that nobody can change the in_use bits on us.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
|
LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
|
||||||
|
|
||||||
|
|
@ -904,11 +906,10 @@ CreateSlotOnDisk(ReplicationSlot *slot)
|
||||||
sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
|
sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It's just barely possible that some previous effort to create or
|
* It's just barely possible that some previous effort to create or drop a
|
||||||
* drop a slot with this name left a temp directory lying around.
|
* slot with this name left a temp directory lying around. If that seems
|
||||||
* If that seems to be the case, try to remove it. If the rmtree()
|
* to be the case, try to remove it. If the rmtree() fails, we'll error
|
||||||
* fails, we'll error out at the mkdir() below, so we don't bother
|
* out at the mkdir() below, so we don't bother checking success.
|
||||||
* checking success.
|
|
||||||
*/
|
*/
|
||||||
if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
|
if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
|
||||||
rmtree(tmppath, true);
|
rmtree(tmppath, true);
|
||||||
|
|
@ -1003,12 +1004,13 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
|
||||||
SpinLockRelease(&slot->mutex);
|
SpinLockRelease(&slot->mutex);
|
||||||
|
|
||||||
COMP_CRC32(cp.checksum,
|
COMP_CRC32(cp.checksum,
|
||||||
(char *)(&cp) + ReplicationSlotOnDiskConstantSize,
|
(char *) (&cp) + ReplicationSlotOnDiskConstantSize,
|
||||||
ReplicationSlotOnDiskDynamicSize);
|
ReplicationSlotOnDiskDynamicSize);
|
||||||
|
|
||||||
if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
|
if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
|
||||||
CloseTransientFile(fd);
|
CloseTransientFile(fd);
|
||||||
errno = save_errno;
|
errno = save_errno;
|
||||||
ereport(elevel,
|
ereport(elevel,
|
||||||
|
|
@ -1022,6 +1024,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
|
||||||
if (pg_fsync(fd) != 0)
|
if (pg_fsync(fd) != 0)
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
|
||||||
CloseTransientFile(fd);
|
CloseTransientFile(fd);
|
||||||
errno = save_errno;
|
errno = save_errno;
|
||||||
ereport(elevel,
|
ereport(elevel,
|
||||||
|
|
@ -1162,7 +1165,7 @@ RestoreSlotFromDisk(const char *name)
|
||||||
|
|
||||||
/* Now that we know the size, read the entire file */
|
/* Now that we know the size, read the entire file */
|
||||||
readBytes = read(fd,
|
readBytes = read(fd,
|
||||||
(char *)&cp + ReplicationSlotOnDiskConstantSize,
|
(char *) &cp + ReplicationSlotOnDiskConstantSize,
|
||||||
cp.length);
|
cp.length);
|
||||||
if (readBytes != cp.length)
|
if (readBytes != cp.length)
|
||||||
{
|
{
|
||||||
|
|
@ -1181,7 +1184,7 @@ RestoreSlotFromDisk(const char *name)
|
||||||
/* now verify the CRC32 */
|
/* now verify the CRC32 */
|
||||||
INIT_CRC32(checksum);
|
INIT_CRC32(checksum);
|
||||||
COMP_CRC32(checksum,
|
COMP_CRC32(checksum,
|
||||||
(char *)&cp + ReplicationSlotOnDiskConstantSize,
|
(char *) &cp + ReplicationSlotOnDiskConstantSize,
|
||||||
ReplicationSlotOnDiskDynamicSize);
|
ReplicationSlotOnDiskDynamicSize);
|
||||||
|
|
||||||
if (!EQ_CRC32(checksum, cp.checksum))
|
if (!EQ_CRC32(checksum, cp.checksum))
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
|
||||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||||
elog(ERROR, "return type must be a row type");
|
elog(ERROR, "return type must be a row type");
|
||||||
|
|
||||||
/* acquire replication slot, this will check for conflicting names*/
|
/* acquire replication slot, this will check for conflicting names */
|
||||||
ReplicationSlotCreate(NameStr(*name), false, RS_PERSISTENT);
|
ReplicationSlotCreate(NameStr(*name), false, RS_PERSISTENT);
|
||||||
|
|
||||||
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
|
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
|
||||||
|
|
@ -97,8 +97,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
|
||||||
Assert(!MyReplicationSlot);
|
Assert(!MyReplicationSlot);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Acquire a logical decoding slot, this will check for conflicting
|
* Acquire a logical decoding slot, this will check for conflicting names.
|
||||||
* names.
|
|
||||||
*/
|
*/
|
||||||
ReplicationSlotCreate(NameStr(*name), true, RS_EPHEMERAL);
|
ReplicationSlotCreate(NameStr(*name), true, RS_EPHEMERAL);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,8 +117,8 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
|
||||||
* set. See SyncRepUpdateSyncStandbysDefined.
|
* set. See SyncRepUpdateSyncStandbysDefined.
|
||||||
*
|
*
|
||||||
* Also check that the standby hasn't already replied. Unlikely race
|
* Also check that the standby hasn't already replied. Unlikely race
|
||||||
* condition but we'll be fetching that cache line anyway so it's likely to
|
* condition but we'll be fetching that cache line anyway so it's likely
|
||||||
* be a low cost check.
|
* to be a low cost check.
|
||||||
*/
|
*/
|
||||||
if (!WalSndCtl->sync_standbys_defined ||
|
if (!WalSndCtl->sync_standbys_defined ||
|
||||||
XactCommitLSN <= WalSndCtl->lsn[mode])
|
XactCommitLSN <= WalSndCtl->lsn[mode])
|
||||||
|
|
|
||||||
|
|
@ -188,7 +188,7 @@ static void WalSndXLogSendHandler(SIGNAL_ARGS);
|
||||||
static void WalSndLastCycleHandler(SIGNAL_ARGS);
|
static void WalSndLastCycleHandler(SIGNAL_ARGS);
|
||||||
|
|
||||||
/* Prototypes for private functions */
|
/* Prototypes for private functions */
|
||||||
typedef void (*WalSndSendDataCallback)(void);
|
typedef void (*WalSndSendDataCallback) (void);
|
||||||
static void WalSndLoop(WalSndSendDataCallback send_data);
|
static void WalSndLoop(WalSndSendDataCallback send_data);
|
||||||
static void InitWalSenderSlot(void);
|
static void InitWalSenderSlot(void);
|
||||||
static void WalSndKill(int code, Datum arg);
|
static void WalSndKill(int code, Datum arg);
|
||||||
|
|
@ -301,8 +301,8 @@ IdentifySystem(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reply with a result set with one row, four columns. First col is system
|
* Reply with a result set with one row, four columns. First col is system
|
||||||
* ID, second is timeline ID, third is current xlog location and the fourth
|
* ID, second is timeline ID, third is current xlog location and the
|
||||||
* contains the database name if we are connected to one.
|
* fourth contains the database name if we are connected to one.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
snprintf(sysid, sizeof(sysid), UINT64_FORMAT,
|
snprintf(sysid, sizeof(sysid), UINT64_FORMAT,
|
||||||
|
|
@ -731,8 +731,8 @@ StartReplication(StartReplicationCmd *cmd)
|
||||||
* set everytime WAL is flushed.
|
* set everytime WAL is flushed.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
logical_read_xlog_page(XLogReaderState* state, XLogRecPtr targetPagePtr, int reqLen,
|
logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
|
||||||
XLogRecPtr targetRecPtr, char* cur_page, TimeLineID *pageTLI)
|
XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI)
|
||||||
{
|
{
|
||||||
XLogRecPtr flushptr;
|
XLogRecPtr flushptr;
|
||||||
int count;
|
int count;
|
||||||
|
|
@ -1013,6 +1013,7 @@ WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xi
|
||||||
pq_sendbyte(ctx->out, 'w');
|
pq_sendbyte(ctx->out, 'w');
|
||||||
pq_sendint64(ctx->out, lsn); /* dataStart */
|
pq_sendint64(ctx->out, lsn); /* dataStart */
|
||||||
pq_sendint64(ctx->out, lsn); /* walEnd */
|
pq_sendint64(ctx->out, lsn); /* walEnd */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fill out the sendtime later, just as it's done in XLogSendPhysical, but
|
* Fill out the sendtime later, just as it's done in XLogSendPhysical, but
|
||||||
* reserve space here.
|
* reserve space here.
|
||||||
|
|
@ -1035,9 +1036,9 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
|
||||||
pq_putmessage_noblock('d', ctx->out->data, ctx->out->len);
|
pq_putmessage_noblock('d', ctx->out->data, ctx->out->len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fill the send timestamp last, so that it is taken as late as
|
* Fill the send timestamp last, so that it is taken as late as possible.
|
||||||
* possible. This is somewhat ugly, but the protocol's set as it's already
|
* This is somewhat ugly, but the protocol's set as it's already used for
|
||||||
* used for several releases by streaming physical replication.
|
* several releases by streaming physical replication.
|
||||||
*/
|
*/
|
||||||
resetStringInfo(&tmpbuf);
|
resetStringInfo(&tmpbuf);
|
||||||
pq_sendint64(&tmpbuf, GetCurrentIntegerTimestamp());
|
pq_sendint64(&tmpbuf, GetCurrentIntegerTimestamp());
|
||||||
|
|
@ -1297,6 +1298,7 @@ exec_replication_command(const char *cmd_string)
|
||||||
case T_StartReplicationCmd:
|
case T_StartReplicationCmd:
|
||||||
{
|
{
|
||||||
StartReplicationCmd *cmd = (StartReplicationCmd *) cmd_node;
|
StartReplicationCmd *cmd = (StartReplicationCmd *) cmd_node;
|
||||||
|
|
||||||
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
|
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
|
||||||
StartReplication(cmd);
|
StartReplication(cmd);
|
||||||
else
|
else
|
||||||
|
|
@ -1473,6 +1475,7 @@ static void
|
||||||
PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
|
PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
|
||||||
{
|
{
|
||||||
bool changed = false;
|
bool changed = false;
|
||||||
|
|
||||||
/* use volatile pointer to prevent code rearrangement */
|
/* use volatile pointer to prevent code rearrangement */
|
||||||
volatile ReplicationSlot *slot = MyReplicationSlot;
|
volatile ReplicationSlot *slot = MyReplicationSlot;
|
||||||
|
|
||||||
|
|
@ -1492,9 +1495,9 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* One could argue that the slot should be saved to disk now, but that'd be
|
* One could argue that the slot should be saved to disk now, but that'd
|
||||||
* energy wasted - the worst lost information can do here is give us wrong
|
* be energy wasted - the worst lost information can do here is give us
|
||||||
* information in a statistics view - we'll just potentially be more
|
* wrong information in a statistics view - we'll just potentially be more
|
||||||
* conservative in removing files.
|
* conservative in removing files.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
@ -1566,10 +1569,11 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin)
|
||||||
|
|
||||||
SpinLockAcquire(&slot->mutex);
|
SpinLockAcquire(&slot->mutex);
|
||||||
MyPgXact->xmin = InvalidTransactionId;
|
MyPgXact->xmin = InvalidTransactionId;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For physical replication we don't need the interlock provided
|
* For physical replication we don't need the interlock provided by xmin
|
||||||
* by xmin and effective_xmin since the consequences of a missed increase
|
* and effective_xmin since the consequences of a missed increase are
|
||||||
* are limited to query cancellations, so set both at once.
|
* limited to query cancellations, so set both at once.
|
||||||
*/
|
*/
|
||||||
if (!TransactionIdIsNormal(slot->data.xmin) ||
|
if (!TransactionIdIsNormal(slot->data.xmin) ||
|
||||||
!TransactionIdIsNormal(feedbackXmin) ||
|
!TransactionIdIsNormal(feedbackXmin) ||
|
||||||
|
|
@ -1667,7 +1671,7 @@ ProcessStandbyHSFeedbackMessage(void)
|
||||||
*
|
*
|
||||||
* If we're using a replication slot we reserve the xmin via that,
|
* If we're using a replication slot we reserve the xmin via that,
|
||||||
* otherwise via the walsender's PGXACT entry.
|
* otherwise via the walsender's PGXACT entry.
|
||||||
|
*
|
||||||
* XXX: It might make sense to introduce ephemeral slots and always use
|
* XXX: It might make sense to introduce ephemeral slots and always use
|
||||||
* the slot mechanism.
|
* the slot mechanism.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1703,9 +1707,9 @@ WalSndComputeSleeptime(TimestampTz now)
|
||||||
wal_sender_timeout);
|
wal_sender_timeout);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If no ping has been sent yet, wakeup when it's time to do
|
* If no ping has been sent yet, wakeup when it's time to do so.
|
||||||
* so. WalSndKeepaliveIfNecessary() wants to send a keepalive once
|
* WalSndKeepaliveIfNecessary() wants to send a keepalive once half of
|
||||||
* half of the timeout passed without a response.
|
* the timeout passed without a response.
|
||||||
*/
|
*/
|
||||||
if (!waiting_for_ping_response)
|
if (!waiting_for_ping_response)
|
||||||
wakeup_time = TimestampTzPlusMilliseconds(last_reply_timestamp,
|
wakeup_time = TimestampTzPlusMilliseconds(last_reply_timestamp,
|
||||||
|
|
@ -1738,8 +1742,8 @@ WalSndCheckTimeOut(TimestampTz now)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Since typically expiration of replication timeout means
|
* Since typically expiration of replication timeout means
|
||||||
* communication problem, we don't send the error message to
|
* communication problem, we don't send the error message to the
|
||||||
* the standby.
|
* standby.
|
||||||
*/
|
*/
|
||||||
ereport(COMMERROR,
|
ereport(COMMERROR,
|
||||||
(errmsg("terminating walsender process due to replication timeout")));
|
(errmsg("terminating walsender process due to replication timeout")));
|
||||||
|
|
@ -1839,10 +1843,10 @@ WalSndLoop(WalSndSendDataCallback send_data)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When SIGUSR2 arrives, we send any outstanding logs up to the
|
* When SIGUSR2 arrives, we send any outstanding logs up to the
|
||||||
* shutdown checkpoint record (i.e., the latest record), wait
|
* shutdown checkpoint record (i.e., the latest record), wait for
|
||||||
* for them to be replicated to the standby, and exit.
|
* them to be replicated to the standby, and exit. This may be a
|
||||||
* This may be a normal termination at shutdown, or a promotion,
|
* normal termination at shutdown, or a promotion, the walsender
|
||||||
* the walsender is not sure which.
|
* is not sure which.
|
||||||
*/
|
*/
|
||||||
if (walsender_ready_to_stop)
|
if (walsender_ready_to_stop)
|
||||||
WalSndDone(send_data);
|
WalSndDone(send_data);
|
||||||
|
|
@ -2416,8 +2420,8 @@ XLogSendLogical(void)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If the record we just wanted read is at or beyond the flushed point,
|
* If the record we just wanted read is at or beyond the flushed
|
||||||
* then we're caught up.
|
* point, then we're caught up.
|
||||||
*/
|
*/
|
||||||
if (logical_decoding_ctx->reader->EndRecPtr >= GetFlushRecPtr())
|
if (logical_decoding_ctx->reader->EndRecPtr >= GetFlushRecPtr())
|
||||||
WalSndCaughtUp = true;
|
WalSndCaughtUp = true;
|
||||||
|
|
@ -2452,10 +2456,10 @@ WalSndDone(WalSndSendDataCallback send_data)
|
||||||
send_data();
|
send_data();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check a write location to see whether all the WAL have
|
* Check a write location to see whether all the WAL have successfully
|
||||||
* successfully been replicated if this walsender is connecting
|
* been replicated if this walsender is connecting to a standby such as
|
||||||
* to a standby such as pg_receivexlog which always returns
|
* pg_receivexlog which always returns an invalid flush location.
|
||||||
* an invalid flush location. Otherwise, check a flush location.
|
* Otherwise, check a flush location.
|
||||||
*/
|
*/
|
||||||
replicatedPtr = XLogRecPtrIsInvalid(MyWalSnd->flush) ?
|
replicatedPtr = XLogRecPtrIsInvalid(MyWalSnd->flush) ?
|
||||||
MyWalSnd->write : MyWalSnd->flush;
|
MyWalSnd->write : MyWalSnd->flush;
|
||||||
|
|
@ -2562,8 +2566,8 @@ WalSndLastCycleHandler(SIGNAL_ARGS)
|
||||||
/*
|
/*
|
||||||
* If replication has not yet started, die like with SIGTERM. If
|
* If replication has not yet started, die like with SIGTERM. If
|
||||||
* replication is active, only set a flag and wake up the main loop. It
|
* replication is active, only set a flag and wake up the main loop. It
|
||||||
* will send any outstanding WAL, wait for it to be replicated to
|
* will send any outstanding WAL, wait for it to be replicated to the
|
||||||
* the standby, and then exit gracefully.
|
* standby, and then exit gracefully.
|
||||||
*/
|
*/
|
||||||
if (!replication_active)
|
if (!replication_active)
|
||||||
kill(MyProcPid, SIGTERM);
|
kill(MyProcPid, SIGTERM);
|
||||||
|
|
|
||||||
|
|
@ -2174,8 +2174,8 @@ view_cols_are_auto_updatable(Query *viewquery,
|
||||||
ListCell *cell;
|
ListCell *cell;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The caller should have verified that this view is auto-updatable and
|
* The caller should have verified that this view is auto-updatable and so
|
||||||
* so there should be a single base relation.
|
* there should be a single base relation.
|
||||||
*/
|
*/
|
||||||
Assert(list_length(viewquery->jointree->fromlist) == 1);
|
Assert(list_length(viewquery->jointree->fromlist) == 1);
|
||||||
rtr = (RangeTblRef *) linitial(viewquery->jointree->fromlist);
|
rtr = (RangeTblRef *) linitial(viewquery->jointree->fromlist);
|
||||||
|
|
@ -2354,9 +2354,9 @@ relation_is_updatable(Oid reloid,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine which of the view's columns are updatable. If there
|
* Determine which of the view's columns are updatable. If there
|
||||||
* are none within the set of columns we are looking at, then
|
* are none within the set of columns we are looking at, then the
|
||||||
* the view doesn't support INSERT/UPDATE, but it may still
|
* view doesn't support INSERT/UPDATE, but it may still support
|
||||||
* support DELETE.
|
* DELETE.
|
||||||
*/
|
*/
|
||||||
view_cols_are_auto_updatable(viewquery, NULL,
|
view_cols_are_auto_updatable(viewquery, NULL,
|
||||||
&updatable_cols, NULL);
|
&updatable_cols, NULL);
|
||||||
|
|
@ -2703,8 +2703,8 @@ rewriteTargetView(Query *parsetree, Relation view)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Move any security barrier quals from the view RTE onto the new target
|
* Move any security barrier quals from the view RTE onto the new target
|
||||||
* RTE. Any such quals should now apply to the new target RTE and will not
|
* RTE. Any such quals should now apply to the new target RTE and will
|
||||||
* reference the original view RTE in the rewritten query.
|
* not reference the original view RTE in the rewritten query.
|
||||||
*/
|
*/
|
||||||
new_rte->securityQuals = view_rte->securityQuals;
|
new_rte->securityQuals = view_rte->securityQuals;
|
||||||
view_rte->securityQuals = NIL;
|
view_rte->securityQuals = NIL;
|
||||||
|
|
@ -2790,8 +2790,8 @@ rewriteTargetView(Query *parsetree, Relation view)
|
||||||
* we did with the view targetlist).
|
* we did with the view targetlist).
|
||||||
*
|
*
|
||||||
* Note that there is special-case handling for the quals of a security
|
* Note that there is special-case handling for the quals of a security
|
||||||
* barrier view, since they need to be kept separate from any user-supplied
|
* barrier view, since they need to be kept separate from any
|
||||||
* quals, so these quals are kept on the new target RTE.
|
* user-supplied quals, so these quals are kept on the new target RTE.
|
||||||
*
|
*
|
||||||
* For INSERT, the view's quals can be ignored in the main query.
|
* For INSERT, the view's quals can be ignored in the main query.
|
||||||
*/
|
*/
|
||||||
|
|
@ -2836,8 +2836,9 @@ rewriteTargetView(Query *parsetree, Relation view)
|
||||||
* If the parent view has a cascaded check option, treat this view as
|
* If the parent view has a cascaded check option, treat this view as
|
||||||
* if it also had a cascaded check option.
|
* if it also had a cascaded check option.
|
||||||
*
|
*
|
||||||
* New WithCheckOptions are added to the start of the list, so if there
|
* New WithCheckOptions are added to the start of the list, so if
|
||||||
* is a cascaded check option, it will be the first item in the list.
|
* there is a cascaded check option, it will be the first item in the
|
||||||
|
* list.
|
||||||
*/
|
*/
|
||||||
if (parsetree->withCheckOptions != NIL)
|
if (parsetree->withCheckOptions != NIL)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -170,10 +170,10 @@ dsm_postmaster_startup(PGShmemHeader *shim)
|
||||||
segsize = dsm_control_bytes_needed(maxitems);
|
segsize = dsm_control_bytes_needed(maxitems);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Loop until we find an unused identifier for the new control segment.
|
* Loop until we find an unused identifier for the new control segment. We
|
||||||
* We sometimes use 0 as a sentinel value indicating that no control
|
* sometimes use 0 as a sentinel value indicating that no control segment
|
||||||
* segment is known to exist, so avoid using that value for a real
|
* is known to exist, so avoid using that value for a real control
|
||||||
* control segment.
|
* segment.
|
||||||
*/
|
*/
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
|
@ -224,17 +224,17 @@ dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to attach the segment. If this fails, it probably just means that
|
* Try to attach the segment. If this fails, it probably just means that
|
||||||
* the operating system has been rebooted and the segment no longer exists,
|
* the operating system has been rebooted and the segment no longer
|
||||||
* or an unrelated proces has used the same shm ID. So just fall out
|
* exists, or an unrelated proces has used the same shm ID. So just fall
|
||||||
* quietly.
|
* out quietly.
|
||||||
*/
|
*/
|
||||||
if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
|
if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
|
||||||
&mapped_address, &mapped_size, DEBUG1))
|
&mapped_address, &mapped_size, DEBUG1))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We've managed to reattach it, but the contents might not be sane.
|
* We've managed to reattach it, but the contents might not be sane. If
|
||||||
* If they aren't, we disregard the segment after all.
|
* they aren't, we disregard the segment after all.
|
||||||
*/
|
*/
|
||||||
old_control = (dsm_control_header *) mapped_address;
|
old_control = (dsm_control_header *) mapped_address;
|
||||||
if (!dsm_control_segment_sane(old_control, mapped_size))
|
if (!dsm_control_segment_sane(old_control, mapped_size))
|
||||||
|
|
@ -245,8 +245,8 @@ dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* OK, the control segment looks basically valid, so we can get use
|
* OK, the control segment looks basically valid, so we can get use it to
|
||||||
* it to get a list of segments that need to be removed.
|
* get a list of segments that need to be removed.
|
||||||
*/
|
*/
|
||||||
nitems = old_control->nitems;
|
nitems = old_control->nitems;
|
||||||
for (i = 0; i < nitems; ++i)
|
for (i = 0; i < nitems; ++i)
|
||||||
|
|
@ -307,6 +307,7 @@ dsm_cleanup_for_mmap(void)
|
||||||
strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
|
strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
|
||||||
{
|
{
|
||||||
char buf[MAXPGPATH];
|
char buf[MAXPGPATH];
|
||||||
|
|
||||||
snprintf(buf, MAXPGPATH, PG_DYNSHMEM_DIR "/%s", dent->d_name);
|
snprintf(buf, MAXPGPATH, PG_DYNSHMEM_DIR "/%s", dent->d_name);
|
||||||
|
|
||||||
elog(DEBUG2, "removing file \"%s\"", buf);
|
elog(DEBUG2, "removing file \"%s\"", buf);
|
||||||
|
|
@ -352,8 +353,8 @@ dsm_postmaster_shutdown(int code, Datum arg)
|
||||||
* If some other backend exited uncleanly, it might have corrupted the
|
* If some other backend exited uncleanly, it might have corrupted the
|
||||||
* control segment while it was dying. In that case, we warn and ignore
|
* control segment while it was dying. In that case, we warn and ignore
|
||||||
* the contents of the control segment. This may end up leaving behind
|
* the contents of the control segment. This may end up leaving behind
|
||||||
* stray shared memory segments, but there's not much we can do about
|
* stray shared memory segments, but there's not much we can do about that
|
||||||
* that if the metadata is gone.
|
* if the metadata is gone.
|
||||||
*/
|
*/
|
||||||
nitems = dsm_control->nitems;
|
nitems = dsm_control->nitems;
|
||||||
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
|
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
|
||||||
|
|
@ -537,13 +538,13 @@ dsm_attach(dsm_handle h)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since this is just a debugging cross-check, we could leave it out
|
* Since this is just a debugging cross-check, we could leave it out
|
||||||
* altogether, or include it only in assert-enabled builds. But since
|
* altogether, or include it only in assert-enabled builds. But since the
|
||||||
* the list of attached segments should normally be very short, let's
|
* list of attached segments should normally be very short, let's include
|
||||||
* include it always for right now.
|
* it always for right now.
|
||||||
*
|
*
|
||||||
* If you're hitting this error, you probably want to attempt to
|
* If you're hitting this error, you probably want to attempt to find an
|
||||||
* find an existing mapping via dsm_find_mapping() before calling
|
* existing mapping via dsm_find_mapping() before calling dsm_attach() to
|
||||||
* dsm_attach() to create a new one.
|
* create a new one.
|
||||||
*/
|
*/
|
||||||
dlist_foreach(iter, &dsm_segment_list)
|
dlist_foreach(iter, &dsm_segment_list)
|
||||||
{
|
{
|
||||||
|
|
@ -584,10 +585,10 @@ dsm_attach(dsm_handle h)
|
||||||
LWLockRelease(DynamicSharedMemoryControlLock);
|
LWLockRelease(DynamicSharedMemoryControlLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we didn't find the handle we're looking for in the control
|
* If we didn't find the handle we're looking for in the control segment,
|
||||||
* segment, it probably means that everyone else who had it mapped,
|
* it probably means that everyone else who had it mapped, including the
|
||||||
* including the original creator, died before we got to this point.
|
* original creator, died before we got to this point. It's up to the
|
||||||
* It's up to the caller to decide what to do about that.
|
* caller to decide what to do about that.
|
||||||
*/
|
*/
|
||||||
if (seg->control_slot == INVALID_CONTROL_SLOT)
|
if (seg->control_slot == INVALID_CONTROL_SLOT)
|
||||||
{
|
{
|
||||||
|
|
@ -710,13 +711,12 @@ dsm_detach(dsm_segment *seg)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to remove the mapping, if one exists. Normally, there will be,
|
* Try to remove the mapping, if one exists. Normally, there will be, but
|
||||||
* but maybe not, if we failed partway through a create or attach
|
* maybe not, if we failed partway through a create or attach operation.
|
||||||
* operation. We remove the mapping before decrementing the reference
|
* We remove the mapping before decrementing the reference count so that
|
||||||
* count so that the process that sees a zero reference count can be
|
* the process that sees a zero reference count can be certain that no
|
||||||
* certain that no remaining mappings exist. Even if this fails, we
|
* remaining mappings exist. Even if this fails, we pretend that it
|
||||||
* pretend that it works, because retrying is likely to fail in the
|
* works, because retrying is likely to fail in the same way.
|
||||||
* same way.
|
|
||||||
*/
|
*/
|
||||||
if (seg->mapped_address != NULL)
|
if (seg->mapped_address != NULL)
|
||||||
{
|
{
|
||||||
|
|
@ -744,15 +744,15 @@ dsm_detach(dsm_segment *seg)
|
||||||
if (refcnt == 1)
|
if (refcnt == 1)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If we fail to destroy the segment here, or are killed before
|
* If we fail to destroy the segment here, or are killed before we
|
||||||
* we finish doing so, the reference count will remain at 1, which
|
* finish doing so, the reference count will remain at 1, which
|
||||||
* will mean that nobody else can attach to the segment. At
|
* will mean that nobody else can attach to the segment. At
|
||||||
* postmaster shutdown time, or when a new postmaster is started
|
* postmaster shutdown time, or when a new postmaster is started
|
||||||
* after a hard kill, another attempt will be made to remove the
|
* after a hard kill, another attempt will be made to remove the
|
||||||
* segment.
|
* segment.
|
||||||
*
|
*
|
||||||
* The main case we're worried about here is being killed by
|
* The main case we're worried about here is being killed by a
|
||||||
* a signal before we can finish removing the segment. In that
|
* signal before we can finish removing the segment. In that
|
||||||
* case, it's important to be sure that the segment still gets
|
* case, it's important to be sure that the segment still gets
|
||||||
* removed. If we actually fail to remove the segment for some
|
* removed. If we actually fail to remove the segment for some
|
||||||
* other reason, the postmaster may not have any better luck than
|
* other reason, the postmaster may not have any better luck than
|
||||||
|
|
@ -1005,5 +1005,5 @@ static uint64
|
||||||
dsm_control_bytes_needed(uint32 nitems)
|
dsm_control_bytes_needed(uint32 nitems)
|
||||||
{
|
{
|
||||||
return offsetof(dsm_control_header, item)
|
return offsetof(dsm_control_header, item)
|
||||||
+ sizeof(dsm_control_item) * (uint64) nitems;
|
+sizeof(dsm_control_item) * (uint64) nitems;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -93,18 +93,18 @@ static int errcode_for_dynamic_shared_memory(void);
|
||||||
|
|
||||||
const struct config_enum_entry dynamic_shared_memory_options[] = {
|
const struct config_enum_entry dynamic_shared_memory_options[] = {
|
||||||
#ifdef USE_DSM_POSIX
|
#ifdef USE_DSM_POSIX
|
||||||
{ "posix", DSM_IMPL_POSIX, false},
|
{"posix", DSM_IMPL_POSIX, false},
|
||||||
#endif
|
#endif
|
||||||
#ifdef USE_DSM_SYSV
|
#ifdef USE_DSM_SYSV
|
||||||
{ "sysv", DSM_IMPL_SYSV, false},
|
{"sysv", DSM_IMPL_SYSV, false},
|
||||||
#endif
|
#endif
|
||||||
#ifdef USE_DSM_WINDOWS
|
#ifdef USE_DSM_WINDOWS
|
||||||
{ "windows", DSM_IMPL_WINDOWS, false},
|
{"windows", DSM_IMPL_WINDOWS, false},
|
||||||
#endif
|
#endif
|
||||||
#ifdef USE_DSM_MMAP
|
#ifdef USE_DSM_MMAP
|
||||||
{ "mmap", DSM_IMPL_MMAP, false},
|
{"mmap", DSM_IMPL_MMAP, false},
|
||||||
#endif
|
#endif
|
||||||
{ "none", DSM_IMPL_NONE, false},
|
{"none", DSM_IMPL_NONE, false},
|
||||||
{NULL, 0, false}
|
{NULL, 0, false}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -367,8 +367,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Map it. */
|
/* Map it. */
|
||||||
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
|
address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
|
||||||
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
|
MAP_SHARED | MAP_HASSEMAPHORE, fd, 0);
|
||||||
if (address == MAP_FAILED)
|
if (address == MAP_FAILED)
|
||||||
{
|
{
|
||||||
int save_errno;
|
int save_errno;
|
||||||
|
|
@ -427,27 +427,27 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* POSIX shared memory and mmap-based shared memory identify segments
|
* POSIX shared memory and mmap-based shared memory identify segments with
|
||||||
* with names. To avoid needless error message variation, we use the
|
* names. To avoid needless error message variation, we use the handle as
|
||||||
* handle as the name.
|
* the name.
|
||||||
*/
|
*/
|
||||||
snprintf(name, 64, "%u", handle);
|
snprintf(name, 64, "%u", handle);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The System V shared memory namespace is very restricted; names are
|
* The System V shared memory namespace is very restricted; names are of
|
||||||
* of type key_t, which is expected to be some sort of integer data type,
|
* type key_t, which is expected to be some sort of integer data type, but
|
||||||
* but not necessarily the same one as dsm_handle. Since we use
|
* not necessarily the same one as dsm_handle. Since we use dsm_handle to
|
||||||
* dsm_handle to identify shared memory segments across processes, this
|
* identify shared memory segments across processes, this might seem like
|
||||||
* might seem like a problem, but it's really not. If dsm_handle is
|
* a problem, but it's really not. If dsm_handle is bigger than key_t,
|
||||||
* bigger than key_t, the cast below might truncate away some bits from
|
* the cast below might truncate away some bits from the handle the
|
||||||
* the handle the user-provided, but it'll truncate exactly the same bits
|
* user-provided, but it'll truncate exactly the same bits away in exactly
|
||||||
* away in exactly the same fashion every time we use that handle, which
|
* the same fashion every time we use that handle, which is all that
|
||||||
* is all that really matters. Conversely, if dsm_handle is smaller than
|
* really matters. Conversely, if dsm_handle is smaller than key_t, we
|
||||||
* key_t, we won't use the full range of available key space, but that's
|
* won't use the full range of available key space, but that's no big deal
|
||||||
* no big deal either.
|
* either.
|
||||||
*
|
*
|
||||||
* We do make sure that the key isn't negative, because that might not
|
* We do make sure that the key isn't negative, because that might not be
|
||||||
* be portable.
|
* portable.
|
||||||
*/
|
*/
|
||||||
key = (key_t) handle;
|
key = (key_t) handle;
|
||||||
if (key < 1) /* avoid compiler warning if type is unsigned */
|
if (key < 1) /* avoid compiler warning if type is unsigned */
|
||||||
|
|
@ -455,10 +455,10 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There's one special key, IPC_PRIVATE, which can't be used. If we end
|
* There's one special key, IPC_PRIVATE, which can't be used. If we end
|
||||||
* up with that value by chance during a create operation, just pretend
|
* up with that value by chance during a create operation, just pretend it
|
||||||
* it already exists, so that caller will retry. If we run into it
|
* already exists, so that caller will retry. If we run into it anywhere
|
||||||
* anywhere else, the caller has passed a handle that doesn't correspond
|
* else, the caller has passed a handle that doesn't correspond to
|
||||||
* to anything we ever created, which should not happen.
|
* anything we ever created, which should not happen.
|
||||||
*/
|
*/
|
||||||
if (key == IPC_PRIVATE)
|
if (key == IPC_PRIVATE)
|
||||||
{
|
{
|
||||||
|
|
@ -469,9 +469,9 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before we can do anything with a shared memory segment, we have to
|
* Before we can do anything with a shared memory segment, we have to map
|
||||||
* map the shared memory key to a shared memory identifier using shmget().
|
* the shared memory key to a shared memory identifier using shmget(). To
|
||||||
* To avoid repeated lookups, we store the key using impl_private.
|
* avoid repeated lookups, we store the key using impl_private.
|
||||||
*/
|
*/
|
||||||
if (*impl_private != NULL)
|
if (*impl_private != NULL)
|
||||||
{
|
{
|
||||||
|
|
@ -507,6 +507,7 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
if (errno != EEXIST)
|
if (errno != EEXIST)
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
|
||||||
pfree(ident_cache);
|
pfree(ident_cache);
|
||||||
errno = save_errno;
|
errno = save_errno;
|
||||||
ereport(elevel,
|
ereport(elevel,
|
||||||
|
|
@ -631,12 +632,12 @@ dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Storing the shared memory segment in the Global\ namespace, can
|
* Storing the shared memory segment in the Global\ namespace, can allow
|
||||||
* allow any process running in any session to access that file
|
* any process running in any session to access that file mapping object
|
||||||
* mapping object provided that the caller has the required access rights.
|
* provided that the caller has the required access rights. But to avoid
|
||||||
* But to avoid issues faced in main shared memory, we are using the naming
|
* issues faced in main shared memory, we are using the naming convention
|
||||||
* convention similar to main shared memory. We can change here once
|
* similar to main shared memory. We can change here once issue mentioned
|
||||||
* issue mentioned in GetSharedMemName is resolved.
|
* in GetSharedMemName is resolved.
|
||||||
*/
|
*/
|
||||||
snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
|
snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
|
||||||
|
|
||||||
|
|
@ -752,9 +753,9 @@ dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* VirtualQuery gives size in page_size units, which is 4K for Windows.
|
* VirtualQuery gives size in page_size units, which is 4K for Windows. We
|
||||||
* We need size only when we are attaching, but it's better to get the
|
* need size only when we are attaching, but it's better to get the size
|
||||||
* size when creating new segment to keep size consistent both for
|
* when creating new segment to keep size consistent both for
|
||||||
* DSM_OP_CREATE and DSM_OP_ATTACH.
|
* DSM_OP_CREATE and DSM_OP_ATTACH.
|
||||||
*/
|
*/
|
||||||
if (VirtualQuery(address, &info, sizeof(info)) == 0)
|
if (VirtualQuery(address, &info, sizeof(info)) == 0)
|
||||||
|
|
@ -891,19 +892,19 @@ dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
/*
|
/*
|
||||||
* Allocate a buffer full of zeros.
|
* Allocate a buffer full of zeros.
|
||||||
*
|
*
|
||||||
* Note: palloc zbuffer, instead of just using a local char array,
|
* Note: palloc zbuffer, instead of just using a local char array, to
|
||||||
* to ensure it is reasonably well-aligned; this may save a few
|
* ensure it is reasonably well-aligned; this may save a few cycles
|
||||||
* cycles transferring data to the kernel.
|
* transferring data to the kernel.
|
||||||
*/
|
*/
|
||||||
char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
|
char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
|
||||||
uint32 remaining = request_size;
|
uint32 remaining = request_size;
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Zero-fill the file. We have to do this the hard way to ensure
|
* Zero-fill the file. We have to do this the hard way to ensure that
|
||||||
* that all the file space has really been allocated, so that we
|
* all the file space has really been allocated, so that we don't
|
||||||
* don't later seg fault when accessing the memory mapping. This
|
* later seg fault when accessing the memory mapping. This is pretty
|
||||||
* is pretty pessimal.
|
* pessimal.
|
||||||
*/
|
*/
|
||||||
while (success && remaining > 0)
|
while (success && remaining > 0)
|
||||||
{
|
{
|
||||||
|
|
@ -966,8 +967,8 @@ dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Map it. */
|
/* Map it. */
|
||||||
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
|
address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
|
||||||
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
|
MAP_SHARED | MAP_HASSEMAPHORE, fd, 0);
|
||||||
if (address == MAP_FAILED)
|
if (address == MAP_FAILED)
|
||||||
{
|
{
|
||||||
int save_errno;
|
int save_errno;
|
||||||
|
|
|
||||||
|
|
@ -219,10 +219,10 @@ shmem_exit(int code)
|
||||||
/*
|
/*
|
||||||
* Call before_shmem_exit callbacks.
|
* Call before_shmem_exit callbacks.
|
||||||
*
|
*
|
||||||
* These should be things that need most of the system to still be
|
* These should be things that need most of the system to still be up and
|
||||||
* up and working, such as cleanup of temp relations, which requires
|
* working, such as cleanup of temp relations, which requires catalog
|
||||||
* catalog access; or things that need to be completed because later
|
* access; or things that need to be completed because later cleanup steps
|
||||||
* cleanup steps depend on them, such as releasing lwlocks.
|
* depend on them, such as releasing lwlocks.
|
||||||
*/
|
*/
|
||||||
elog(DEBUG3, "shmem_exit(%d): %d before_shmem_exit callbacks to make",
|
elog(DEBUG3, "shmem_exit(%d): %d before_shmem_exit callbacks to make",
|
||||||
code, before_shmem_exit_index);
|
code, before_shmem_exit_index);
|
||||||
|
|
@ -241,9 +241,9 @@ shmem_exit(int code)
|
||||||
* callback before invoking it, so that we don't get stuck in an infinite
|
* callback before invoking it, so that we don't get stuck in an infinite
|
||||||
* loop if one of those callbacks itself throws an ERROR or FATAL.
|
* loop if one of those callbacks itself throws an ERROR or FATAL.
|
||||||
*
|
*
|
||||||
* Note that explicitly calling this function here is quite different
|
* Note that explicitly calling this function here is quite different from
|
||||||
* from registering it as an on_shmem_exit callback for precisely this
|
* registering it as an on_shmem_exit callback for precisely this reason:
|
||||||
* reason: if one dynamic shared memory callback errors out, the remaining
|
* if one dynamic shared memory callback errors out, the remaining
|
||||||
* callbacks will still be invoked. Thus, hard-coding this call puts it
|
* callbacks will still be invoked. Thus, hard-coding this call puts it
|
||||||
* equal footing with callbacks for the main shared memory segment.
|
* equal footing with callbacks for the main shared memory segment.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -142,7 +142,7 @@ static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mq, Size nbytes,
|
||||||
void *data, bool nowait, Size *bytes_written);
|
void *data, bool nowait, Size *bytes_written);
|
||||||
static shm_mq_result shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed,
|
static shm_mq_result shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed,
|
||||||
bool nowait, Size *nbytesp, void **datap);
|
bool nowait, Size *nbytesp, void **datap);
|
||||||
static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC * volatile *ptr,
|
static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr,
|
||||||
BackgroundWorkerHandle *handle);
|
BackgroundWorkerHandle *handle);
|
||||||
static uint64 shm_mq_get_bytes_read(volatile shm_mq *mq, bool *detached);
|
static uint64 shm_mq_get_bytes_read(volatile shm_mq *mq, bool *detached);
|
||||||
static void shm_mq_inc_bytes_read(volatile shm_mq *mq, Size n);
|
static void shm_mq_inc_bytes_read(volatile shm_mq *mq, Size n);
|
||||||
|
|
@ -153,7 +153,7 @@ static void shm_mq_detach_callback(dsm_segment *seg, Datum arg);
|
||||||
|
|
||||||
/* Minimum queue size is enough for header and at least one chunk of data. */
|
/* Minimum queue size is enough for header and at least one chunk of data. */
|
||||||
const Size shm_mq_minimum_size =
|
const Size shm_mq_minimum_size =
|
||||||
MAXALIGN(offsetof(shm_mq, mq_ring)) + MAXIMUM_ALIGNOF;
|
MAXALIGN(offsetof(shm_mq, mq_ring)) + MAXIMUM_ALIGNOF;
|
||||||
|
|
||||||
#define MQH_INITIAL_BUFSIZE 8192
|
#define MQH_INITIAL_BUFSIZE 8192
|
||||||
|
|
||||||
|
|
@ -328,7 +328,7 @@ shm_mq_send(shm_mq_handle *mqh, Size nbytes, void *data, bool nowait)
|
||||||
{
|
{
|
||||||
Assert(mqh->mqh_partial_bytes < sizeof(Size));
|
Assert(mqh->mqh_partial_bytes < sizeof(Size));
|
||||||
res = shm_mq_send_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes,
|
res = shm_mq_send_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes,
|
||||||
((char *) &nbytes) + mqh->mqh_partial_bytes,
|
((char *) &nbytes) +mqh->mqh_partial_bytes,
|
||||||
nowait, &bytes_written);
|
nowait, &bytes_written);
|
||||||
mqh->mqh_partial_bytes += bytes_written;
|
mqh->mqh_partial_bytes += bytes_written;
|
||||||
if (res != SHM_MQ_SUCCESS)
|
if (res != SHM_MQ_SUCCESS)
|
||||||
|
|
@ -441,16 +441,17 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
||||||
{
|
{
|
||||||
Size needed;
|
Size needed;
|
||||||
|
|
||||||
nbytes = * (Size *) rawdata;
|
nbytes = *(Size *) rawdata;
|
||||||
|
|
||||||
/* If we've already got the whole message, we're done. */
|
/* If we've already got the whole message, we're done. */
|
||||||
needed = MAXALIGN(sizeof(Size)) + MAXALIGN(nbytes);
|
needed = MAXALIGN(sizeof(Size)) + MAXALIGN(nbytes);
|
||||||
if (rb >= needed)
|
if (rb >= needed)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Technically, we could consume the message length information
|
* Technically, we could consume the message length
|
||||||
* at this point, but the extra write to shared memory wouldn't
|
* information at this point, but the extra write to shared
|
||||||
* be free and in most cases we would reap no benefit.
|
* memory wouldn't be free and in most cases we would reap no
|
||||||
|
* benefit.
|
||||||
*/
|
*/
|
||||||
mqh->mqh_consume_pending = needed;
|
mqh->mqh_consume_pending = needed;
|
||||||
*nbytesp = nbytes;
|
*nbytesp = nbytes;
|
||||||
|
|
@ -498,7 +499,7 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
||||||
if (mqh->mqh_partial_bytes >= sizeof(Size))
|
if (mqh->mqh_partial_bytes >= sizeof(Size))
|
||||||
{
|
{
|
||||||
Assert(mqh->mqh_partial_bytes == sizeof(Size));
|
Assert(mqh->mqh_partial_bytes == sizeof(Size));
|
||||||
mqh->mqh_expected_bytes = * (Size *) mqh->mqh_buffer;
|
mqh->mqh_expected_bytes = *(Size *) mqh->mqh_buffer;
|
||||||
mqh->mqh_length_word_complete = true;
|
mqh->mqh_length_word_complete = true;
|
||||||
mqh->mqh_partial_bytes = 0;
|
mqh->mqh_partial_bytes = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -527,8 +528,8 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The message has wrapped the buffer. We'll need to copy it in order
|
* The message has wrapped the buffer. We'll need to copy it in order
|
||||||
* to return it to the client in one chunk. First, make sure we have a
|
* to return it to the client in one chunk. First, make sure we have
|
||||||
* large enough buffer available.
|
* a large enough buffer available.
|
||||||
*/
|
*/
|
||||||
if (mqh->mqh_buflen < nbytes)
|
if (mqh->mqh_buflen < nbytes)
|
||||||
{
|
{
|
||||||
|
|
@ -559,10 +560,10 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
||||||
mqh->mqh_partial_bytes += rb;
|
mqh->mqh_partial_bytes += rb;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update count of bytes read, with alignment padding. Note
|
* Update count of bytes read, with alignment padding. Note that this
|
||||||
* that this will never actually insert any padding except at the
|
* will never actually insert any padding except at the end of a
|
||||||
* end of a message, because the buffer size is a multiple of
|
* message, because the buffer size is a multiple of MAXIMUM_ALIGNOF,
|
||||||
* MAXIMUM_ALIGNOF, and each read and write is as well.
|
* and each read and write is as well.
|
||||||
*/
|
*/
|
||||||
Assert(mqh->mqh_partial_bytes == nbytes || rb == MAXALIGN(rb));
|
Assert(mqh->mqh_partial_bytes == nbytes || rb == MAXALIGN(rb));
|
||||||
shm_mq_inc_bytes_read(mq, MAXALIGN(rb));
|
shm_mq_inc_bytes_read(mq, MAXALIGN(rb));
|
||||||
|
|
@ -717,11 +718,11 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, void *data, bool nowait,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for our latch to be set. It might already be set for
|
* Wait for our latch to be set. It might already be set for some
|
||||||
* some unrelated reason, but that'll just result in one extra
|
* unrelated reason, but that'll just result in one extra trip
|
||||||
* trip through the loop. It's worth it to avoid resetting the
|
* through the loop. It's worth it to avoid resetting the latch
|
||||||
* latch at top of loop, because setting an already-set latch is
|
* at top of loop, because setting an already-set latch is much
|
||||||
* much cheaper than setting one that has been reset.
|
* cheaper than setting one that has been reset.
|
||||||
*/
|
*/
|
||||||
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
||||||
|
|
||||||
|
|
@ -751,9 +752,9 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, void *data, bool nowait,
|
||||||
shm_mq_inc_bytes_written(mq, MAXALIGN(sendnow));
|
shm_mq_inc_bytes_written(mq, MAXALIGN(sendnow));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For efficiency, we don't set the reader's latch here. We'll
|
* For efficiency, we don't set the reader's latch here. We'll do
|
||||||
* do that only when the buffer fills up or after writing an
|
* that only when the buffer fills up or after writing an entire
|
||||||
* entire message.
|
* message.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -801,10 +802,10 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
|
||||||
/*
|
/*
|
||||||
* Fall out before waiting if the queue has been detached.
|
* Fall out before waiting if the queue has been detached.
|
||||||
*
|
*
|
||||||
* Note that we don't check for this until *after* considering
|
* Note that we don't check for this until *after* considering whether
|
||||||
* whether the data already available is enough, since the
|
* the data already available is enough, since the receiver can finish
|
||||||
* receiver can finish receiving a message stored in the buffer
|
* receiving a message stored in the buffer even after the sender has
|
||||||
* even after the sender has detached.
|
* detached.
|
||||||
*/
|
*/
|
||||||
if (detached)
|
if (detached)
|
||||||
return SHM_MQ_DETACHED;
|
return SHM_MQ_DETACHED;
|
||||||
|
|
@ -814,11 +815,11 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
|
||||||
return SHM_MQ_WOULD_BLOCK;
|
return SHM_MQ_WOULD_BLOCK;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for our latch to be set. It might already be set for
|
* Wait for our latch to be set. It might already be set for some
|
||||||
* some unrelated reason, but that'll just result in one extra
|
* unrelated reason, but that'll just result in one extra trip through
|
||||||
* trip through the loop. It's worth it to avoid resetting the
|
* the loop. It's worth it to avoid resetting the latch at top of
|
||||||
* latch at top of loop, because setting an already-set latch is
|
* loop, because setting an already-set latch is much cheaper than
|
||||||
* much cheaper than setting one that has been reset.
|
* setting one that has been reset.
|
||||||
*/
|
*/
|
||||||
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
||||||
|
|
||||||
|
|
@ -842,7 +843,7 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
|
||||||
* non-NULL when our counterpart attaches to the queue.
|
* non-NULL when our counterpart attaches to the queue.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
shm_mq_wait_internal(volatile shm_mq *mq, PGPROC * volatile *ptr,
|
shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr,
|
||||||
BackgroundWorkerHandle *handle)
|
BackgroundWorkerHandle *handle)
|
||||||
{
|
{
|
||||||
bool save_set_latch_on_sigusr1;
|
bool save_set_latch_on_sigusr1;
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ shm_toc_allocate(shm_toc *toc, Size nbytes)
|
||||||
total_bytes = vtoc->toc_total_bytes;
|
total_bytes = vtoc->toc_total_bytes;
|
||||||
allocated_bytes = vtoc->toc_allocated_bytes;
|
allocated_bytes = vtoc->toc_allocated_bytes;
|
||||||
nentry = vtoc->toc_nentry;
|
nentry = vtoc->toc_nentry;
|
||||||
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
|
toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry)
|
||||||
+ allocated_bytes;
|
+ allocated_bytes;
|
||||||
|
|
||||||
/* Check for memory exhaustion and overflow. */
|
/* Check for memory exhaustion and overflow. */
|
||||||
|
|
@ -132,7 +132,7 @@ shm_toc_freespace(shm_toc *toc)
|
||||||
nentry = vtoc->toc_nentry;
|
nentry = vtoc->toc_nentry;
|
||||||
SpinLockRelease(&toc->toc_mutex);
|
SpinLockRelease(&toc->toc_mutex);
|
||||||
|
|
||||||
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry);
|
toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry);
|
||||||
Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
|
Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
|
||||||
return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
|
return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
|
||||||
}
|
}
|
||||||
|
|
@ -176,7 +176,7 @@ shm_toc_insert(shm_toc *toc, uint64 key, void *address)
|
||||||
total_bytes = vtoc->toc_total_bytes;
|
total_bytes = vtoc->toc_total_bytes;
|
||||||
allocated_bytes = vtoc->toc_allocated_bytes;
|
allocated_bytes = vtoc->toc_allocated_bytes;
|
||||||
nentry = vtoc->toc_nentry;
|
nentry = vtoc->toc_nentry;
|
||||||
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
|
toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry)
|
||||||
+ allocated_bytes;
|
+ allocated_bytes;
|
||||||
|
|
||||||
/* Check for memory exhaustion and overflow. */
|
/* Check for memory exhaustion and overflow. */
|
||||||
|
|
|
||||||
|
|
@ -889,8 +889,8 @@ LogStandbySnapshot(void)
|
||||||
running = GetRunningTransactionData();
|
running = GetRunningTransactionData();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetRunningTransactionData() acquired ProcArrayLock, we must release
|
* GetRunningTransactionData() acquired ProcArrayLock, we must release it.
|
||||||
* it. For Hot Standby this can be done before inserting the WAL record
|
* For Hot Standby this can be done before inserting the WAL record
|
||||||
* because ProcArrayApplyRecoveryInfo() rechecks the commit status using
|
* because ProcArrayApplyRecoveryInfo() rechecks the commit status using
|
||||||
* the clog. For logical decoding, though, the lock can't be released
|
* the clog. For logical decoding, though, the lock can't be released
|
||||||
* early becuase the clog might be "in the future" from the POV of the
|
* early becuase the clog might be "in the future" from the POV of the
|
||||||
|
|
@ -977,9 +977,9 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
|
||||||
/*
|
/*
|
||||||
* Ensure running_xacts information is synced to disk not too far in the
|
* Ensure running_xacts information is synced to disk not too far in the
|
||||||
* future. We don't want to stall anything though (i.e. use XLogFlush()),
|
* future. We don't want to stall anything though (i.e. use XLogFlush()),
|
||||||
* so we let the wal writer do it during normal
|
* so we let the wal writer do it during normal operation.
|
||||||
* operation. XLogSetAsyncXactLSN() conveniently will mark the LSN as
|
* XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
|
||||||
* to-be-synced and nudge the WALWriter into action if sleeping. Check
|
* and nudge the WALWriter into action if sleeping. Check
|
||||||
* XLogBackgroundFlush() for details why a record might not be flushed
|
* XLogBackgroundFlush() for details why a record might not be flushed
|
||||||
* without it.
|
* without it.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -266,10 +266,10 @@ inv_open(Oid lobjId, int flags, MemoryContext mcxt)
|
||||||
errmsg("large object %u does not exist", lobjId)));
|
errmsg("large object %u does not exist", lobjId)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must register the snapshot in TopTransaction's resowner, because
|
* We must register the snapshot in TopTransaction's resowner, because it
|
||||||
* it must stay alive until the LO is closed rather than until the
|
* must stay alive until the LO is closed rather than until the current
|
||||||
* current portal shuts down. Do this after checking that the LO exists,
|
* portal shuts down. Do this after checking that the LO exists, to avoid
|
||||||
* to avoid leaking the snapshot if an error is thrown.
|
* leaking the snapshot if an error is thrown.
|
||||||
*/
|
*/
|
||||||
if (snapshot)
|
if (snapshot)
|
||||||
snapshot = RegisterSnapshotOnOwner(snapshot,
|
snapshot = RegisterSnapshotOnOwner(snapshot,
|
||||||
|
|
|
||||||
|
|
@ -920,8 +920,8 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock out cancel/die interrupts while we sleep on the lock. There is
|
* Lock out cancel/die interrupts while we sleep on the lock. There is no
|
||||||
* no cleanup mechanism to remove us from the wait queue if we got
|
* cleanup mechanism to remove us from the wait queue if we got
|
||||||
* interrupted.
|
* interrupted.
|
||||||
*/
|
*/
|
||||||
HOLD_INTERRUPTS();
|
HOLD_INTERRUPTS();
|
||||||
|
|
|
||||||
|
|
@ -4919,6 +4919,7 @@ is_admin_of_role(Oid member, Oid role)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (member == role)
|
if (member == role)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A role can admin itself when it matches the session user and we're
|
* A role can admin itself when it matches the session user and we're
|
||||||
* outside any security-restricted operation, SECURITY DEFINER or
|
* outside any security-restricted operation, SECURITY DEFINER or
|
||||||
|
|
|
||||||
|
|
@ -1747,6 +1747,7 @@ Datum
|
||||||
array_cardinality(PG_FUNCTION_ARGS)
|
array_cardinality(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
|
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
|
||||||
|
|
||||||
PG_RETURN_INT32(ArrayGetNItems(ARR_NDIM(v), ARR_DIMS(v)));
|
PG_RETURN_INT32(ArrayGetNItems(ARR_NDIM(v), ARR_DIMS(v)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user