/*
* pl_ist_reset - Abort the given number of ISTs and set a Python error.
*
* Used to handle cases where a code object fails to resolve open transactions.
*/
void
pl_ist_reset(unsigned long count)
{
HOLD_INTERRUPTS();
for (; count > 0; --count)
RollbackAndReleaseCurrentSubTransaction();
RESUME_INTERRUPTS();
}
开发者ID:fdr,项目名称:pg-python,代码行数:13,代码来源:ist.c
示例2: LockErrorCleanup
/*
* Cancel any pending wait for lock, when aborting a transaction, and revert
* any strong lock count acquisition for a lock being acquired.
*
* (Normally, this would only happen if we accept a cancel/die
* interrupt while waiting; but an ereport(ERROR) before or during the lock
* wait is within the realm of possibility, too.)
*/
void
LockErrorCleanup(void)
{
LWLock *partitionLock;
DisableTimeoutParams timeouts[2];
HOLD_INTERRUPTS();
AbortStrongLockAcquire();
/* Nothing to do if we weren't waiting for a lock */
if (lockAwaited == NULL)
{
RESUME_INTERRUPTS();
return;
}
/*
* Turn off the deadlock and lock timeout timers, if they are still
* running (see ProcSleep). Note we must preserve the LOCK_TIMEOUT
* indicator flag, since this function is executed before
* ProcessInterrupts when responding to SIGINT; else we'd lose the
* knowledge that the SIGINT came from a lock timeout and not an external
* source.
*/
timeouts[0].id = DEADLOCK_TIMEOUT;
timeouts[0].keep_indicator = false;
timeouts[1].id = LOCK_TIMEOUT;
timeouts[1].keep_indicator = true;
disable_timeouts(timeouts, 2);
/* Unlink myself from the wait queue, if on it (might not be anymore!) */
partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
if (MyProc->links.next != NULL)
{
/* We could not have been granted the lock yet */
RemoveFromWaitQueue(MyProc, lockAwaited->hashcode);
}
else
{
/*
* Somebody kicked us off the lock queue already. Perhaps they
* granted us the lock, or perhaps they detected a deadlock. If they
* did grant us the lock, we'd better remember it in our local lock
* table.
*/
if (MyProc->waitStatus == STATUS_OK)
GrantAwaitedLock();
}
lockAwaited = NULL;
LWLockRelease(partitionLock);
RESUME_INTERRUPTS();
}
/*
* LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
*
* If the lock is not available, return FALSE with no side-effects.
*
* If successful, cancel/die interrupts are held off until lock release.
*/
bool
LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
{
volatile LWLock *lock = LWLockArray + lockid;
bool mustwait;
PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
/*
* Lock out cancel/die interrupts until we exit the code section
* protected by the LWLock. This ensures that interrupts will not
* interfere with manipulations of data structures in shared memory.
*/
HOLD_INTERRUPTS();
/* Acquire mutex. Time spent holding mutex should be short! */
SpinLockAcquire_NoHoldoff(&lock->mutex);
/* If I can get the lock, do so quickly. */
if (mode == LW_EXCLUSIVE)
{
if (lock->exclusive == 0 && lock->shared == 0)
{
lock->exclusive++;
mustwait = false;
}
else
mustwait = true;
}
else
{
if (lock->exclusive == 0)
{
lock->shared++;
mustwait = false;
}
else
mustwait = true;
}
/* We are done updating shared state of the lock itself. */
SpinLockRelease_NoHoldoff(&lock->mutex);
if (mustwait)
{
/* Failed to get lock, so release interrupt holdoff */
RESUME_INTERRUPTS();
LOG_LWDEBUG("LWLockConditionalAcquire", lockid, "failed");
}
else
{
/* Add lock to list of locks held by this backend */
Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS);
held_lwlocks[num_held_lwlocks++] = lockid;
}
return !mustwait;
}
/*
* LWLockReleaseAll - release all currently-held locks
*
* Used to clean up after ereport(ERROR). An important difference between this
* function and retail LWLockRelease calls is that InterruptHoldoffCount is
* unchanged by this operation. This is necessary since InterruptHoldoffCount
* has been set to an appropriate level earlier in error recovery. We could
* decrement it below zero if we allow it to drop for each released lock!
*/
void
LWLockReleaseAll(void)
{
while (num_held_lwlocks > 0)
{
HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
LWLockRelease(held_lwlocks[num_held_lwlocks - 1]);
}
}
开发者ID:fgp,项目名称:lockbench,代码行数:19,代码来源:lwlock.c
示例5: report_commerror
/*
* Report a COMMERROR.
*
* This function holds an interrupt before reporting this error to avoid
* a self deadlock situation, see MPP-13718 for more info.
*/
static void
report_commerror(const char *err_msg)
{
HOLD_INTERRUPTS();
ereport(COMMERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
errmsg("%s",err_msg)));
RESUME_INTERRUPTS();
}
void
FileRepSubProcess_Main()
{
const char *statmsg;
MemoryContext fileRepSubProcessMemoryContext;
sigjmp_buf local_sigjmp_buf;
MyProcPid = getpid();
MyStartTime = time(NULL);
/*
* Create a PGPROC so we can use LWLocks in FileRep sub-processes. The
* routine also register clean up at process exit
*/
InitAuxiliaryProcess();
InitBufferPoolBackend();
FileRepSubProcess_ConfigureSignals();
/*
* If an exception is encountered, processing resumes here.
*
* See notes in postgres.c about the design of this coding.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Prevents interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
LWLockReleaseAll();
if (FileRepPrimary_IsResyncManagerOrWorker())
{
LockReleaseAll(DEFAULT_LOCKMETHOD, false);
}
if (FileRepIsBackendSubProcess(fileRepProcessType))
{
AbortBufferIO();
UnlockBuffers();
/* buffer pins are released here: */
ResourceOwnerRelease(CurrentResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
}
/*
* We can now go away. Note that because we'll call InitProcess, a
* callback will be registered to do ProcKill, which will clean up
* necessary state.
*/
proc_exit(0);
}
/* We can now handle ereport(ERROR) */
PG_exception_stack = &local_sigjmp_buf;
PG_SETMASK(&UnBlockSig);
/*
* Identify myself via ps
*/
statmsg = FileRepProcessTypeToString[fileRepProcessType];
init_ps_display(statmsg, "", "", "");
/* Create the memory context where cross-transaction state is stored */
fileRepSubProcessMemoryContext = AllocSetContextCreate(TopMemoryContext,
"filerep subprocess memory context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(fileRepSubProcessMemoryContext);
stateChangeRequestCounter++;
FileRepSubProcess_ProcessSignals();
switch (fileRepProcessType)
{
case FileRepProcessTypePrimarySender:
FileRepPrimary_StartSender();
break;
case FileRepProcessTypeMirrorReceiver:
FileRepMirror_StartReceiver();
break;
case FileRepProcessTypeMirrorConsumer:
case FileRepProcessTypeMirrorConsumerWriter:
//.........这里部分代码省略.........
/*
* Main entry point for checkpointer process
*
* This is invoked from AuxiliaryProcessMain, which has already created the
* basic execution environment, but not enabled signals yet.
*/
void
CheckpointerMain(void)
{
sigjmp_buf local_sigjmp_buf;
MemoryContext checkpointer_context;
CheckpointerShmem->checkpointer_pid = MyProcPid;
/*
* Properly accept or ignore signals the postmaster might send us
*
* Note: we deliberately ignore SIGTERM, because during a standard Unix
* system shutdown cycle, init will SIGTERM all processes at once. We
* want to wait for the backends to exit, whereupon the postmaster will
* tell us it's okay to shut down (via SIGUSR2).
*/
pqsignal(SIGHUP, ChkptSigHupHandler); /* set flag to read config
* file */
pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */
pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, chkpt_sigusr1_handler);
pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */
/*
* Reset some signals that are accepted by postmaster but not here
*/
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
/* We allow SIGQUIT (quickdie) at all times */
sigdelset(&BlockSig, SIGQUIT);
/*
* Initialize so that first time-driven event happens at the correct time.
*/
last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);
/*
* Create a resource owner to keep track of our resources (currently only
* buffer pins).
*/
CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer");
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks. Formerly this code just ran in
* TopMemoryContext, but resetting that would be a really bad idea.
*/
checkpointer_context = AllocSetContextCreate(TopMemoryContext,
"Checkpointer",
ALLOCSET_DEFAULT_SIZES);
MemoryContextSwitchTo(checkpointer_context);
/*
* If an exception is encountered, processing resumes here.
*
* See notes in postgres.c about the design of this coding.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/*
* These operations are really just a minimal subset of
* AbortTransaction(). We don't have very many resources to worry
* about in checkpointer, but we do have LWLocks, buffers, and temp
* files.
*/
LWLockReleaseAll();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
AbortBufferIO();
UnlockBuffers();
/* buffer pins are released here: */
ResourceOwnerRelease(CurrentResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
/* we needn't bother with the other ResourceOwnerRelease phases */
AtEOXact_Buffers(false);
AtEOXact_SMgr();
//.........这里部分代码省略.........
/*
* master_create_worker_shards creates empty shards for the given table based
* on the specified number of initial shards. The function first gets a list of
* candidate nodes and issues DDL commands on the nodes to create empty shard
* placements on those nodes. The function then updates metadata on the master
* node to make this shard (and its placements) visible. Note that the function
* assumes the table is hash partitioned and calculates the min/max hash token
* ranges for each shard, giving them an equal split of the hash space.
*/
Datum
master_create_worker_shards(PG_FUNCTION_ARGS)
{
text *tableNameText = PG_GETARG_TEXT_P(0);
int32 shardCount = PG_GETARG_INT32(1);
int32 replicationFactor = PG_GETARG_INT32(2);
Oid distributedTableId = ResolveRelationId(tableNameText);
char relationKind = get_rel_relkind(distributedTableId);
char *tableName = text_to_cstring(tableNameText);
char shardStorageType = '\0';
int32 shardIndex = 0;
List *workerNodeList = NIL;
List *ddlCommandList = NIL;
int32 workerNodeCount = 0;
uint32 placementAttemptCount = 0;
uint32 hashTokenIncrement = 0;
List *existingShardList = NIL;
/* make sure table is hash partitioned */
CheckHashPartitionedTable(distributedTableId);
/* validate that shards haven't already been created for this table */
existingShardList = LoadShardIntervalList(distributedTableId);
if (existingShardList != NIL)
{
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("table \"%s\" has already had shards created for it",
tableName)));
}
/* make sure that at least one shard is specified */
if (shardCount <= 0)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("shardCount must be positive")));
}
/* make sure that at least one replica is specified */
if (replicationFactor <= 0)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("replicationFactor must be positive")));
}
/* calculate the split of the hash space */
hashTokenIncrement = UINT_MAX / shardCount;
/* load and sort the worker node list for deterministic placement */
workerNodeList = ParseWorkerNodeFile(WORKER_LIST_FILENAME);
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
/* make sure we don't process cancel signals until all shards are created */
HOLD_INTERRUPTS();
/* retrieve the DDL commands for the table */
ddlCommandList = TableDDLCommandList(distributedTableId);
workerNodeCount = list_length(workerNodeList);
if (replicationFactor > workerNodeCount)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("replicationFactor (%d) exceeds number of worker nodes "
"(%d)", replicationFactor, workerNodeCount),
errhint("Add more worker nodes or try again with a lower "
"replication factor.")));
}
/* if we have enough nodes, add an extra placement attempt for backup */
placementAttemptCount = (uint32) replicationFactor;
if (workerNodeCount > replicationFactor)
{
placementAttemptCount++;
}
/* set shard storage type according to relation type */
if (relationKind == RELKIND_FOREIGN_TABLE)
{
shardStorageType = SHARD_STORAGE_FOREIGN;
}
else
{
shardStorageType = SHARD_STORAGE_TABLE;
}
for (shardIndex = 0; shardIndex < shardCount; shardIndex++)
{
uint64 shardId = NextSequenceId(SHARD_ID_SEQUENCE_NAME);
int32 placementCount = 0;
uint32 placementIndex = 0;
uint32 roundRobinNodeIndex = shardIndex % workerNodeCount;
//.........这里部分代码省略.........
/*
* AutoVacMain
*/
NON_EXEC_STATIC void
AutoVacMain(int argc, char *argv[])
{
ListCell *cell;
List *dblist;
autovac_dbase *db;
TransactionId xidForceLimit;
bool for_xid_wrap;
sigjmp_buf local_sigjmp_buf;
/* we are a postmaster subprocess now */
IsUnderPostmaster = true;
am_autovacuum = true;
/* MPP-4990: Autovacuum always runs as utility-mode */
Gp_role = GP_ROLE_UTILITY;
/* reset MyProcPid */
MyProcPid = getpid();
/* record Start Time for logging */
MyStartTime = time(NULL);
/* Identify myself via ps */
init_ps_display("autovacuum process", "", "", "");
SetProcessingMode(InitProcessing);
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (autovacuum probably never has
* any child processes, but for consistency we make all postmaster
* child processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Set up signal handlers. We operate on databases much like a regular
* backend, so we use the same signal handling. See equivalent code in
* tcop/postgres.c.
*
* Currently, we don't pay attention to postgresql.conf changes that
* happen during a single daemon iteration, so we can ignore SIGHUP.
*/
pqsignal(SIGHUP, SIG_IGN);
/*
* SIGINT is used to signal cancelling the current table's vacuum; SIGTERM
* means abort and exit cleanly, and SIGQUIT means abandon ship.
*/
pqsignal(SIGINT, StatementCancelHandler);
pqsignal(SIGTERM, die);
pqsignal(SIGQUIT, quickdie);
pqsignal(SIGALRM, handle_sig_alarm);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, procsignal_sigusr1_handler);
/* We don't listen for async notifies */
pqsignal(SIGUSR2, SIG_IGN);
pqsignal(SIGFPE, FloatExceptionHandler);
pqsignal(SIGCHLD, SIG_DFL);
/* Early initialization */
BaseInit();
/*
* Create a per-backend PGPROC struct in shared memory, except in the
* EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
* this before we can use LWLocks (and in the EXEC_BACKEND case we already
* had to do some stuff with LWLocks).
*/
#ifndef EXEC_BACKEND
InitProcess();
#endif
/*
* If an exception is encountered, processing resumes here.
*
* See notes in postgres.c about the design of this coding.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Prevents interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/*
* We can now go away. Note that because we called InitProcess, a
* callback was registered to do ProcKill, which will clean up
* necessary state.
*/
proc_exit(0);
//.........这里部分代码省略.........
开发者ID:huor,项目名称:gpdb,代码行数:101,代码来源:autovacuum.c
示例12: workfile_mgr_cleanup_set
/*
* Workfile-manager specific function to clean up before releasing a
* workfile set from the cache.
*
*/
static void
workfile_mgr_cleanup_set(const void *resource)
{
workfile_set *work_set = (workfile_set *) resource;
/*
* We have to make this callback function return cleanly ALL the
* time. It shouldn't throw an exception.
* We must try to clean up as much as we can in the callback, and
* then never be called again.
* This means holding interrupts, catching and handling all exceptions.
*/
if (work_set->on_disk)
{
ereport(gp_workfile_caching_loglevel,
(errmsg("workfile mgr cleanup deleting set: key=0x%0xd, size=" INT64_FORMAT
" in_progress_size=" INT64_FORMAT " path=%s",
work_set->key,
work_set->size,
work_set->in_progress_size,
work_set->path),
errprintstack(true)));
Assert(NULL == work_set->set_plan);
PG_TRY();
{
#ifdef FAULT_INJECTOR
FaultInjector_InjectFaultIfSet(
WorkfileCleanupSet,
DDLNotSpecified,
"", /* databaseName */
"" /* tableName */
);
#endif
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
workfile_mgr_delete_set_directory(work_set->path);
/* Now we can allow interrupts again */
RESUME_INTERRUPTS();
}
PG_CATCH();
{
elog(LOG, "Cleaning up workfile set directory path=%s failed. Proceeding",
work_set->path);
/* We're not re-throwing the error. Otherwise we'll end up having
* to clean up again, probably failing again.
*/
}
PG_END_TRY();
/*
* The most accurate size of a workset is recorded in work_set->in_progress_size.
* work_set->size is only updated when we close a file, so it lags behind
*/
Assert(work_set->in_progress_size >= work_set->size);
int64 size_to_delete = work_set->in_progress_size;
elog(gp_workfile_caching_loglevel, "Subtracting " INT64_FORMAT " from workfile diskspace", size_to_delete);
/*
* When subtracting the size of this workset from our accounting,
* only update the per-query counter if we created the workset.
* In that case, the state is ACQUIRED, otherwise is CACHED or DELETED
*/
CacheEntry *cacheEntry = CACHE_ENTRY_HEADER(resource);
bool update_query_space = (cacheEntry->state == CACHE_ENTRY_ACQUIRED);
WorkfileDiskspace_Commit(0, size_to_delete, update_query_space);
}
else
{
/* Non-physical workfile set, we need to free up the plan memory */
if (NULL != work_set->set_plan->serialized_plan)
{
pfree(work_set->set_plan->serialized_plan);
}
if (NULL != work_set->set_plan)
{
pfree(work_set->set_plan);
}
}
}
/*
* LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
*
* If the lock is not available, return FALSE with no side-effects.
*
* If successful, cancel/die interrupts are held off until lock release.
*/
bool
LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
{
volatile LWLock *lock = &(LWLockArray[lockid].lock);
#if LWLOCK_LOCK_PARTS > 1
volatile LWLockPart *part = LWLOCK_PART(lock, lockid, MyBackendId);
#endif
bool mustwait;
PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
/* Ensure we will have room to remember the lock */
if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
elog(ERROR, "too many LWLocks taken");
/*
* Lock out cancel/die interrupts until we exit the code section protected
* by the LWLock. This ensures that interrupts will not interfere with
* manipulations of data structures in shared memory.
*/
HOLD_INTERRUPTS();
if (mode == LW_SHARED)
{
#ifdef LWLOCK_PART_SHARED_OPS_ATOMIC
/* Increment shared counter partition. If there's no contention,
* this is sufficient to take the lock
*/
LWLOCK_PART_SHARED_POSTINC_ATOMIC(lock, lockid, part, MyBackendId);
LWLOCK_PART_SHARED_FENCE();
/* A concurrent exclusive locking attempt does the following
* three steps
* 1) Acquire mutex
* 2) Check shared counter partitions for readers.
* 3a) If found add proc to wait queue, block, restart at (1)
* 3b) If not found, set exclusive flag, continue with (4)
* 4) Enter protected section
* The fence after the atomic add above ensures that no further
* such attempt can proceed to (3b) or beyond. There may be
* pre-existing exclusive locking attempts at step (3b) or beyond,
* but we can recognize those by either the mutex being taken, or
* the exclusive flag being set. Conversely, if we see neither, we
* may proceed and enter the protected section.
*
* FIXME: This doesn't work if slock_t is a struct or doesn't
* use 0 for state "unlocked".
*/
if ((lock->mutex == 0) && (lock->exclusive == 0))
goto lock_acquired;
/* At this point, we don't know if the concurrent exclusive locker
* has proceeded to (3b) or blocked. We must take the mutex and
* re-check
*/
#endif /* LWLOCK_PART_SHARED_OPS_ATOMIC */
/* Acquire mutex. Time spent holding mutex should be short! */
SpinLockAcquire(&lock->mutex);
if (lock->exclusive == 0)
{
#ifdef LWLOCK_PART_SHARED_OPS_ATOMIC
/* Already incremented the shared counter partition above */
#else
lock->shared++;
#endif
mustwait = false;
}
else
{
#ifdef LWLOCK_PART_SHARED_OPS_ATOMIC
/* Must undo shared counter partition increment. Note that
* we *need* to do that while holding the mutex. Otherwise,
* the exclusive lock could be released and attempted to be
* re-acquired before we undo the increment. That attempt
* would then block, even though there'd be no lock holder
* left
*/
LWLOCK_PART_SHARED_POSTDEC_ATOMIC(lock, lockid, part, MyBackendId);
#endif
mustwait = true;
}
}
else
{
/* Step (1). Acquire mutex. Time spent holding mutex should be
* short!
*/
SpinLockAcquire(&lock->mutex);
if (lock->exclusive == 0)
//.........这里部分代码省略.........
开发者ID:fgp,项目名称:lockbench,代码行数:101,代码来源:lwlock.c
示例14: BackgroundWriterMain
/*
* Main entry point for bgwriter process
*
* This is invoked from BootstrapMain, which has already created the basic
* execution environment, but not enabled signals yet.
*/
void
BackgroundWriterMain(void)
{
sigjmp_buf local_sigjmp_buf;
MemoryContext bgwriter_context;
BgWriterShmem->bgwriter_pid = MyProcPid;
am_bg_writer = true;
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (bgwriter probably never has any
* child processes, but for consistency we make all postmaster child
* processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Properly accept or ignore signals the postmaster might send us
*
* Note: we deliberately ignore SIGTERM, because during a standard Unix
* system shutdown cycle, init will SIGTERM all processes at once. We
* want to wait for the backends to exit, whereupon the postmaster will
* tell us it's okay to shut down (via SIGUSR2).
*
* SIGUSR1 is presently unused; keep it spare in case someday we want this
* process to participate in sinval messaging.
*/
pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */
pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */
pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */
/*
* Reset some signals that are accepted by postmaster but not here
*/
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
/* We allow SIGQUIT (quickdie) at all times */
#ifdef HAVE_SIGPROCMASK
sigdelset(&BlockSig, SIGQUIT);
#else
BlockSig &= ~(sigmask(SIGQUIT));
#endif
/*
* Initialize so that first time-driven event happens at the correct time.
*/
last_checkpoint_time = last_xlog_switch_time = time(NULL);
/*
* Create a resource owner to keep track of our resources (currently only
* buffer pins).
*/
CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks. Formerly this code just ran in
* TopMemoryContext, but resetting that would be a really bad idea.
*/
bgwriter_context = AllocSetContextCreate(TopMemoryContext,
"Background Writer",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(bgwriter_context);
/*
* If an exception is encountered, processing resumes here.
*
* See notes in postgres.c about the design of this coding.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
//.........这里部分代码省略.........
/*
* Main entry point for walwriter process
*
* This is invoked from BootstrapMain, which has already created the basic
* execution environment, but not enabled signals yet.
*/
void
WalWriterMain(void)
{
sigjmp_buf local_sigjmp_buf;
MemoryContext walwriter_context;
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (walwriter probably never has any
* child processes, but for consistency we make all postmaster child
* processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Properly accept or ignore signals the postmaster might send us
*
* We have no particular use for SIGINT at the moment, but seems
* reasonable to treat like SIGTERM.
*/
pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
pqsignal(SIGINT, WalShutdownHandler); /* request shutdown */
pqsignal(SIGTERM, WalShutdownHandler); /* request shutdown */
pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
pqsignal(SIGUSR2, SIG_IGN); /* not used */
/*
* Reset some signals that are accepted by postmaster but not here
*/
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
/* We allow SIGQUIT (quickdie) at all times */
sigdelset(&BlockSig, SIGQUIT);
/*
* Create a resource owner to keep track of our resources (not clear that
* we need this, but may as well have one).
*/
CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks. Formerly this code just ran in
* TopMemoryContext, but resetting that would be a really bad idea.
*/
walwriter_context = AllocSetContextCreate(TopMemoryContext,
"Wal Writer",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(walwriter_context);
/*
* If an exception is encountered, processing resumes here.
*
* This code is heavily based on bgwriter.c, q.v.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/*
* These operations are really just a minimal subset of
* AbortTransaction(). We don't have very many resources to worry
* about in walwriter, but we do have LWLocks, and perhaps buffers?
*/
LWLockReleaseAll();
AbortBufferIO();
UnlockBuffers();
/* buffer pins are released here: */
ResourceOwnerRelease(CurrentResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
/* we needn't bother with the other ResourceOwnerRelease phases */
AtEOXact_Buffers(false);
AtEOXact_Files();
//.........这里部分代码省略.........
开发者ID:GisKook,项目名称:Gis,代码行数:101,代码来源:walwriter.c
示例16: lazy_vacuum_rel
//.........这里部分代码省略.........
/* Set threshold for interesting free space = average request size */
/* XXX should we scale it up or down? Adjust vacuum.c too, if so */
vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
vacrelstats->num_index_scans = 0;
/* Open all indexes of the relation */
vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
vacrelstats->hasindex = (nindexes > 0);
/* Do the vacuuming */
lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats);
/* Done with indexes */
vac_close_indexes(nindexes, Irel, NoLock);
/*
* Optionally truncate the relation.
*
* Don't even think about it unless we have a shot at releasing a goodly
* number of pages. Otherwise, the time taken isn't worth it.
*
* Note that after we've truncated the heap, it's too late to abort the
* transaction; doing so would lose the sinval messages needed to tell
* the other backends about the table being shrunk. We prevent interrupts
* in that case; caller is responsible for re-enabling them after
* committing the transaction.
*/
possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
if (possibly_freeable > 0 &&
(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
{
HOLD_INTERRUPTS();
heldoff = true;
lazy_truncate_heap(onerel, vacrelstats);
}
/* Update shared free space map with final free space info */
lazy_update_fsm(onerel, vacrelstats);
if (vacrelstats->tot_free_pages > MaxFSMPages)
ereport(WARNING,
(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel)),
/* Only suggest VACUUM FULL if > 20% free */
(vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ?
errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));
/* Update statistics in pg_class */
vac_update_relstats_from_list(onerel,
vacrelstats->rel_pages,
vacrelstats->rel_tuples,
vacrelstats->hasindex,
FreezeLimit,
updated_stats);
/* report results to the stats collector, too */
pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
true /*vacrelstats->scanned_all*/,
vacstmt->analyze, vacrelstats->rel_tuples);
if (gp_indexcheck_vacuum == INDEX_CHECK_ALL ||
(gp_indexcheck_vacuum == INDEX_CHECK_SYSTEM &&
PG_CATALOG_NAMESPACE == RelationGetNamespace(onerel)))
{
int i;
for (i = 0; i < nindexes; i++)
{
if (Irel[i]->rd_rel->relam == BTREE_AM_OID)
_bt_validate_vacuum(Irel[i], onerel, OldestXmin);
}
}
/* and log the action if appropriate */
if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
{
if (Log_autovacuum_min_duration == 0 ||
TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
Log_autovacuum_min_duration))
ereport(LOG,
(errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
"pages: %d removed, %d remain\n"
"tuples: %.0f removed, %.0f remain\n"
"system usage: %s",
get_database_name(MyDatabaseId),
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel),
vacrelstats->num_index_scans,
vacrelstats->pages_removed, vacrelstats->rel_pages,
vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
pg_rusage_show(&ru0))));
}
return heldoff;
}
/*
* master_copy_shard_placement implements a user-facing UDF to copy data from
* a healthy (source) node to an inactive (target) node. To accomplish this it
* entirely recreates the table structure before copying all data. During this
* time all modifications are paused to the shard. After successful repair, the
* inactive placement is marked healthy and modifications may continue. If the
* repair fails at any point, this function throws an error, leaving the node
* in an unhealthy state.
*/
Datum
master_copy_shard_placement(PG_FUNCTION_ARGS)
{
int64 shardId = PG_GETARG_INT64(0);
text *sourceNodeName = PG_GETARG_TEXT_P(1);
int32 sourceNodePort = PG_GETARG_INT32(2);
text *targetNodeName = PG_GETARG_TEXT_P(3);
int32 targetNodePort = PG_GETARG_INT32(4);
ShardInterval *shardInterval = LoadShardInterval(shardId);
Oid distributedTableId = shardInterval->relationId;
List *shardPlacementList = NIL;
ShardPlacement *sourcePlacement = NULL;
ShardPlacement *targetPlacement = NULL;
List *ddlCommandList = NIL;
bool recreated = false;
bool dataCopied = false;
/*
* By taking an exclusive lock on the shard, we both stop all modifications
* (INSERT, UPDATE, or DELETE) and prevent concurrent repair operations from
* being able to operate on this shard.
*/
LockShard(shardId, ExclusiveLock);
shardPlacementList = LoadShardPlacementList(shardId);
sourcePlacement = SearchShardPlacementInList(shardPlacementList, sourceNodeName,
sourceNodePort);
if (sourcePlacement->shardState != STATE_FINALIZED)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("source placement must be in finalized state")));
}
targetPlacement = SearchShardPlacementInList(shardPlacementList, targetNodeName,
targetNodePort);
if (targetPlacement->shardState != STATE_INACTIVE)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("target placement must be in inactive state")));
}
/* retrieve the DDL commands for the table and run them */
ddlCommandList = RecreateTableDDLCommandList(distributedTableId, shardId);
recreated = ExecuteRemoteCommandList(targetPlacement->nodeName,
targetPlacement->nodePort,
ddlCommandList);
if (!recreated)
{
ereport(ERROR, (errmsg("could not recreate shard table"),
errhint("Consult recent messages in the server logs for "
"details.")));
}
HOLD_INTERRUPTS();
dataCopied = CopyDataFromFinalizedPlacement(distributedTableId, shardId,
sourcePlacement, targetPlacement);
if (!dataCopied)
{
ereport(ERROR, (errmsg("could not copy shard data"),
errhint("Consult recent messages in the server logs for "
"details.")));
}
/* the placement is repaired, so return to finalized state */
DeleteShardPlacementRow(targetPlacement->id);
InsertShardPlacementRow(targetPlacement->id, targetPlacement->shardId,
STATE_FINALIZED, targetPlacement->nodeName,
targetPlacement->nodePort);
RESUME_INTERRUPTS();
PG_RETURN_VOID();
}
请发表评论