PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么

2025-03-12 技术教程

本篇内容介绍了“PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么”的有关知识，在实际案例的操作过程中，不少人都会遇到这样的困境，接下来就让小编带领大家学习一下如何处理这些情况吧！希望大家仔细阅读，能够学有所成！

一、数据结构

宏定义

#defineGetProcessingMode()Mode#defineSetProcessingMode(mode)\do{\AssertArg((mode)==BootstrapProcessing||\(mode)==InitProcessing||\(mode)==NormalProcessing);\Mode=(mode);\}while(0)二、源码解读

AutoVacLauncherMain函数,autovacuum进程主循环.

/**Mainloopfortheautovacuumlauncherprocess.*autovacuum进程主循环*/NON_EXEC_STATICvoidAutoVacLauncherMain(intargc,char*argv[]){sigjmp_buflocal_sigjmp_buf;am_autovacuum_launcher=true;/*Identifymyselfviaps*///进程IDinit_ps_display(pgstat_get_backend_desc(B_AUTOVAC_LAUNCHER),"","","");ereport(DEBUG1,(errmsg("autovacuumlauncherstarted")));if(PostAuthDelay)pg_usleep(PostAuthDelay*1000000L);//设置进程模式SetProcessingMode(InitProcessing);/**Setupsignalhandlers.Weoperateondatabasesmuchlikearegular*backend,soweusethesamesignalhandling.Seeequivalentcodein*tcop/postgres.c.*设置信号控制器.*autovacuum的执行类似于普通的后台进程,因此使用相同的信号控制机制.*参考tcop/postgres.c中的代码.*/pqsignal(SIGHUP,av_sighup_handler);pqsignal(SIGINT,StatementCancelHandler);pqsignal(SIGTERM,avl_sigterm_handler);pqsignal(SIGQUIT,quickdie);//建立SIGALRM控制器InitializeTimeouts();/*establishesSIGALRMhandler*/pqsignal(SIGPIPE,SIG_IGN);//忽略SIGPIPEpqsignal(SIGUSR1,procsignal_sigusr1_handler);pqsignal(SIGUSR2,avl_sigusr2_handler);pqsignal(SIGFPE,FloatExceptionHandler);pqsignal(SIGCHLD,SIG_DFL);/*Earlyinitialization*///基础初始化BaseInit();/**Createaper-backendPGPROCstructinsharedmemory,exceptinthe*EXEC_BACKENDcasewherethiswasdoneinSubPostmasterMain.Wemustdo*thisbeforewecanuseLWLocks(andintheEXEC_BACKENDcasewealready*hadtodosomestuffwithLWLocks).*在共享内存中创建每个后台进程的PGPROC结构体，*但除了exEXEC_BACKEND这种情况，这是在SubPostmasterMain中完成的。*/#ifndefEXEC_BACKENDInitProcess();#endif//初始化InitPostgres(NULL,InvalidOid,NULL,InvalidOid,NULL,false);//设置进程模式SetProcessingMode(NormalProcessing);/**Createamemorycontextthatwewilldoallourworkin.Wedothisso*thatwecanresetthecontextduringerrorrecoveryandtherebyavoid*possiblememoryleaks.*创建内存上下文.*之所以这样做是因为我们可以在错误恢复中重置上下文,并且可以避免内存泄漏.*/AutovacMemCxt=AllocSetContextCreate(TopMemoryContext,"AutovacuumLauncher",ALLOCSET_DEFAULT_SIZES);MemoryContextSwitchTo(AutovacMemCxt);/**Ifanexceptionisencountered,processingresumeshere.*如果出现异常,在这里重新恢复.**ThiscodeisastrippeddownversionofPostgresMainerrorrecovery.*这段代码是PostgresMain错误恢复的精简版。*/if(sigsetjmp(local_sigjmp_buf,1)!=0){/*sincenotusingPG_TRY,mustreseterrorstackbyhand*///由于没有使用PG_TRY,这里必须手工重置错误.error_context_stack=NULL;/*Preventsinterruptswhilecleaningup*///在清理期间禁用中断HOLD_INTERRUPTS();/*ForgetanypendingQueryCancelortimeoutrequest*///忽略所有QueryCancel或者超时请求disable_all_timeouts(false);QueryCancelPending=false;/*secondtoavoidracecondition*//*Reporttheerrortotheserverlog*///在服务器日志中记录日志.EmitErrorReport();/*Abortthecurrenttransactioninordertorecover*///废弃当前事务,以准备恢复AbortCurrentTransaction();/**Releaseanyotherresources,forthecasewherewewerenotina*transaction.*释放任何其他资源，以防我们不在事务中。*/LWLockReleaseAll();pgstat_report_wait_end();AbortBufferIO();UnlockBuffers();/*thisisprobablydeadcode,butlet'sbesafe:*///这可能是deadcode,但可以保证安全if(AuxProcessResourceOwner)ReleaseAuxProcessResources(false);AtEOXact_Buffers(false);AtEOXact_SMgr();AtEOXact_Files(false);AtEOXact_HashTables(false);/**Nowreturntonormaltop-levelcontextandclearErrorContextfor*nexttime.*现在切换回正常的顶层上下文中,并为下一次的启动清理错误上下文*/MemoryContextSwitchTo(AutovacMemCxt);FlushErrorState();/*Flushanyleakeddatainthetop-levelcontext*///在top-level上下文刷新所有泄漏的数据MemoryContextResetAndDeleteChildren(AutovacMemCxt);/*don'tleavedanglingpointerstofreedmemory*///不要留下悬空指针来释放内存DatabaseListCxt=NULL;dlist_init(&DatabaseList);/**Makesurepgstatalsoconsidersourstatdataasgone.Note:we*mustn'tuseautovac_refresh_statshere.*确保pgstat也认为我们的统计数据已经丢弃。*注意:这里不能使用autovac_refresh_stats。*/pgstat_clear_snapshot();/*Nowwecanallowinterruptsagain*///可以允许中断了RESUME_INTERRUPTS();/*ifinshutdownmode,noneedforanythingfurther;justgoaway*///如处于shutdown模式,不需要继续后续的工作了,跳转到shutdownif(got_SIGTERM)gotoshutdown;/**Sleepatleast1secondafteranyerror.Wedon'twanttobe*fillingtheerrorlogsasfastaswecan.*/pg_usleep(1000000L);}/*Wecannowhandleereport(ERROR)*///现在可以处理ereport(ERROR)了PG_exception_stack=&local_sigjmp_buf;/*mustunblocksignalsbeforecallingrebuild_database_list*///在调用rebuild_database_list前不能阻塞信号PG_SETMASK(&UnBlockSig);/**Setalways-securesearchpath.Launcherdoesn'tconnecttoadatabase,*sothishasnoeffect.*设置安全的搜索路径.*Launcher不能连接数据库,因此并没有什么影响.*/SetConfigOption("search_path","",PGC_SUSET,PGC_S_OVERRIDE);/**Forcezero_damaged_pagesOFFintheautovacprocess,evenifitisset*inpostgresql.conf.Wedon'treallywantsuchadangerousoptionbeing*appliednon-interactively.*在autovacuum进程中,强制关闭zero_damaged_pages,即时该参数在配置文件设置为ON.*我们真的不希望这样一个危险的选项在无需交互的情况进行应用.*/SetConfigOption("zero_damaged_pages","false",PGC_SUSET,PGC_S_OVERRIDE);/**Forcesettabletimeoutsofftoavoidlettingthesesettingsprevent*regularmaintenancefrombeingexecuted.*强制关闭可设置的超时，以避免这些设置妨碍常规维护的执行。*/SetConfigOption("statement_timeout","0",PGC_SUSET,PGC_S_OVERRIDE);SetConfigOption("lock_timeout","0",PGC_SUSET,PGC_S_OVERRIDE);SetConfigOption("idle_in_transaction_session_timeout","0",PGC_SUSET,PGC_S_OVERRIDE);/**Forcedefault_transaction_isolationtoREADCOMMITTED.Wedon'twant*topaytheoverheadofserializablemode,noraddanyriskofcausing*deadlocksordelayingothertransactions.*强制default_transaction_isolation为READCOMMITTED.*我们不希望在serializable模式下增加负担,也不想增加导致死锁或者其他事务延迟的风险.*/SetConfigOption("default_transaction_isolation","readcommitted",PGC_SUSET,PGC_S_OVERRIDE);/**Inemergencymode,juststartaworker(unlessshutdownwasrequested)*andgoaway.*在紧急模式,启动一个worker(除非已请求shutdown)*/if(!AutoVacuumingActive()){if(!got_SIGTERM)do_start_worker();proc_exit(0);/*done*/}AutoVacuumShmem->av_launcherpid=MyProcPid;/**Createtheinitialdatabaselist.Theinvariantwewantthislistto*keepisthatit'sorderedbydecreasingnext_time.Assoonasanentry*isupdatedtoahighertime,itwillbemovedtothefront(whichis*correctbecausetheonlyoperationistoaddautovacuum_naptimetothe*entry,andtimealwaysincreases).*创建初始化数据库链表.*我们希望这个链表保持不变的是它是通过减少next_time来进行排序.*一旦条目更新到更高的时间，它就会被移动到前面*(这样处理没有问题，因为惟一的操作是向条目添加autovacuum_naptime，而时间总是会增加)。*/rebuild_database_list(InvalidOid);/*loopuntilshutdownrequest*///循环,直至请求shutdownwhile(!got_SIGTERM){structtimevalnap;TimestampTzcurrent_time=0;boolcan_launch;/**ThisloopisabitdifferentfromthenormaluseofWaitLatch,*becausewe'dliketosleepbeforethefirstlaunchofachild*process.Soit'sWaitLatch,thenResetLatch,thencheckfor*wakeningconditions.*该循环与常规的使用WaitLatch不同,因为我们希望在第一个子进程启动前处于休眠状态.*因此首先是WaitLatch,然后是ResetLatch,然后检查并等待唤醒条件.*/launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers),false,&nap);/**Waituntilnaptimeexpiresorwegetsometypeofsignal(allthe*signalhandlerswillwakeusbycallingSetLatch).*等待,直至naptime超时或者我们接收到某些类型的信号.*(所有的信号控制器会通过调用SetLatch唤醒进程)*/(void)WaitLatch(MyLatch,WL_LATCH_SET|WL_TIMEOUT|WL_EXIT_ON_PM_DEATH,(nap.tv_sec*1000L)+(nap.tv_usec/1000L),WAIT_EVENT_AUTOVACUUM_MAIN);ResetLatch(MyLatch);/*Processsinvalcatchupinterruptsthathappenedwhilesleeping*///在休眠过程中,进程会捕获相关的中断.ProcessCatchupInterrupt();/*thenormalshutdowncase*///shutdonw信号if(got_SIGTERM)break;if(got_SIGHUP){//SIGHUP信号got_SIGHUP=false;ProcessConfigFile(PGC_SIGHUP);/*shutdownrequestedinconfigfile?*///在配置文件中已请求shutdown?if(!AutoVacuumingActive())break;/*rebalanceincasethedefaultcostparameterschanged*///如默认的成本参数变化,则自动平衡.LWLockAcquire(AutovacuumLock,LW_EXCLUSIVE);autovac_balance_cost();LWLockRelease(AutovacuumLock);/*rebuildthelistincasethenaptimechanged*///如naptime出现变化,重建链表rebuild_database_list(InvalidOid);}/**aworkerfinished,orpostmastersignalledfailuretostarta*worker*某个worker已完成,或者postmaster信号出现异常无法启动worker*/if(got_SIGUSR2){//SIGUSR2信号got_SIGUSR2=false;/*rebalancecostlimits,ifneeded*///如需要,重平衡成本限制if(AutoVacuumShmem->av_signal[AutoVacRebalance]){LWLockAcquire(AutovacuumLock,LW_EXCLUSIVE);AutoVacuumShmem->av_signal[AutoVacRebalance]=false;autovac_balance_cost();LWLockRelease(AutovacuumLock);}if(AutoVacuumShmem->av_signal[AutoVacForkFailed]){/**Ifthepostmasterfailedtostartanewworker,wesleep*foralittlewhileandresendthesignal.Thenewworker's*stateisstillinmemory,sothisissufficient.After*that,werestartthemainloop.*如果postmaster无法启动新的worker,休眠一段时间,重新发送信号.*新的worker的状态仍然在内存中,因此这样处理是OK的.*再次之后,重新启动主循环.**XXXshouldweputalimittothenumberoftimesweretry?*Idon'tthinkitmakesmuchsense,becauseafuturestart*ofaworkerwillcontinuetofailinthesameway.*是否增加重试次数的限制?XXX*我们不想太过敏感,因为某个worker在未来的启动会以同样的方式持续失败.*/AutoVacuumShmem->av_signal[AutoVacForkFailed]=false;pg_usleep(1000000L);/*1s*/SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);continue;}}/**Therearesomeconditionsthatweneedtocheckbeforetryingto*startaworker.First,weneedtomakesurethatthereisaworker*slotavailable.Second,weneedtomakesurethatnootherworker*failedwhilestartingup.*在尝试启动worker前,有一些条件需要检查.*首先,需要确保有可用的workerslot;其次,需要确保worker在启动时没有出现异常.*/current_time=GetCurrentTimestamp();LWLockAcquire(AutovacuumLock,LW_SHARED);can_launch=!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers);if(AutoVacuumShmem->av_startingWorker!=NULL){intwaittime;WorkerInfoworker=AutoVacuumShmem->av_startingWorker;/**Wecan'tlaunchanotherworkerwhenanotheroneisstill*startingup(orfailedwhiledoingso),sojustsleepforabit*more;thatworkerwillwakeusupagainassoonasit'sready.*Wewillonlywaitautovacuum_naptimeseconds(uptoamaximum*of60seconds)forthistohappenhowever.Notethatfailure*toconnecttoaparticulardatabaseisnotaproblemhere,*becausetheworkerremovesitselffromthestartingWorker*pointerbeforetryingtoconnect.Problemsdetectedbythe*postmaster(likefork()failure)arealsoreportedandhandled*differently.Theonlyproblemsthatmaycausethiscodeto*fireareerrorsintheearliersectionsofAutoVacWorkerMain,*beforetheworkerremovestheWorkerInfofromthe*startingWorkerpointer.*在某个worker仍然在启动时,不能启动新的worker,因此休眠一段时间;*另外一个worker在ready后会第一时间唤醒我们.*只需要等待autovacuum_naptime参数设置的时间(单位秒)(最大为60s).*注意,在这里不能够连接一个特定的数据库不存在任何问题,因为worker在*尝试连接时,通过startingWorker指针销毁自己.*通过postmaster检测到问题(如fork()失败)会报告并且进行不同的处理,*这里唯一的问题是可能导致这里的处理逻辑在AutoVacWorkerMain的早起触发错误,*而且实在worker通过startingWorker指针清除WorkerInfo前.*/waittime=Min(autovacuum_naptime,60)*1000;if(TimestampDifferenceExceeds(worker->wi_launchtime,current_time,waittime)){LWLockRelease(AutovacuumLock);LWLockAcquire(AutovacuumLock,LW_EXCLUSIVE);/**Nootherprocesscanputaworkerinstartingmode,soif*startingWorkerisstillINVALIDafterexchangingourlock,*weassumeit'sthesameonewesawabove(sowedon't*recheckthelaunchtime).*/if(AutoVacuumShmem->av_startingWorker!=NULL){worker=AutoVacuumShmem->av_startingWorker;worker->wi_dboid=InvalidOid;worker->wi_tableoid=InvalidOid;worker->wi_sharedrel=false;worker->wi_proc=NULL;worker->wi_launchtime=0;dlist_push_head(&AutoVacuumShmem->av_freeWorkers,&worker->wi_links);AutoVacuumShmem->av_startingWorker=NULL;elog(WARNING,"workertooktoolongtostart;canceled");}}elsecan_launch=false;}//释放锁LWLockRelease(AutovacuumLock);/*eithersharedorexclusive*//*ifwecan'tdoanything,justgobacktosleep*///什么都做不了,继续休眠if(!can_launch)continue;/*We'reOKtostartanewworker*///现在可以启动新的workerif(dlist_is_empty(&DatabaseList)){/**Specialcasewhenthelistisempty:startaworkerrightaway.*Thiscoverstheinitialcase,whennodatabaseisinpgstats*(thusthelistisempty).Notethattheconstraintsin*launcher_determine_sleepkeepusfromstartingworkerstoo*quickly(atmostonceeveryautovacuum_naptimewhenthelistis*empty).*在链表为空时的特殊情况:正确的启动一个worker.*这涵盖了刚初始的情况，即pgstats中没有数据库(因此链表为空)。*请注意，launcher_determine_sleep中的约束使我们不能过快地启动worker*(当链表为空时，最多一次autovacuum_naptime)。*/launch_worker(current_time);}else{/**becauserebuild_database_listconstructsalistwithmost*distantadl_next_workerfirst,weobtainourdatabasefromthe*tailofthelist.*因为rebuild_database_list首先用最远的adl_next_worker构造了链表，*所以我们从链表的尾部获取数据库。*/avl_dbase*avdb;avdb=dlist_tail_element(avl_dbase,adl_node,&DatabaseList);/**launchaworkerifnext_workerisrightnoworitisinthe*past*启动worker,如果next_worker正当其时或者已成为过去时.*/if(TimestampDifferenceExceeds(avdb->adl_next_worker,current_time,0))launch_worker(current_time);}}/*Normalexitfromtheautovaclauncherishere*///常规的退出.shutdown:ereport(DEBUG1,(errmsg("autovacuumlaunchershuttingdown")));AutoVacuumShmem->av_launcherpid=0;proc_exit(0);/*done*/}

“PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么”的内容就介绍到这里了，感谢大家的阅读。如果想了解更多行业相关的知识可以关注亿速云网站，小编将为大家输出更多高质量的实用文章！