Monitorování lwlocku pomocí systemtapu

Z PostgreSQL
Skočit na navigaci Skočit na vyhledávání

Trochu jsem zápasil - jednak se svojí neznalostí systemtapu, druhak s tím, že se DTRACE interface v PostgreSQL 9.4 změnil - přičemž stap skripty pro Postgres, které jsou na internetu jsou samozřejmě pro starší verze (jedu na developerské 9.4). Ve starších verzích má popis zámku 2 parametry, v 9.4 tři - přičemž nový je první parametr. Navíc přibyly nové zámky - a změnili se rozsahy dynamické a LockMgr zámky.

stap -L 'process("postgres").mark("*")'

global query_time, query_summary

probe process("postgres").mark("query__start") {
  query_time[tid(), $arg1] = gettimeofday_us();
}

probe process("postgres").mark("query__done") {
  p = tid()
  t = query_time[p, $arg1]; delete query_time[p, $arg1]
  if (t) {
    query_summary[p] <<< (gettimeofday_us() - t);
  }
}

probe end {
  printf("\ntid count min(us) avg(us) max(us)\n");
  foreach (p in query_summary) {
    printf("%d %d %d %d %d\n", p, @count(query_summary[p]),
     @min(query_summary[p]), @avg(query_summary[p]), @max(query_summary[p]));
  }
}

probe process ("postgres").mark("query__done") {
	now = gettimeofday_us()
	t = tid()
	if ([t, $arg1] in livequeries) {
		delta = now - livequeries[t, $arg1]
		query = user_string($arg1)
		completequeries[query] <<< delta
		delete livequeries[t, $arg1]
	}
}

probe end {
  foreach (p in jit_summary) {
    printf("tid = %d\n", p)
    print(@hist_log(jit_summary[p]))
  }
}
#!/usr/bin/stap

# pglockwaits systemtap version 
# based on pglockwaits_84.d by Jignesh Shah
# modified for 9.4

## Global variables
#
global lckmode, lckid, lwlock, tlwlock, waiting

probe begin
{
	lckmode[0] = "Exclusive"
	lckmode[1] = "Shared"
	
	lckid[0] = "BufFreelistLock"
	lckid[1] = "ShmemIndexLock"
	lckid[2] = "OidGenLock"
	lckid[3] = "XidGenLock"
	lckid[4] = "ProcArrayLock"
	lckid[5] = "SInvalReadLock"
	lckid[6] = "SInvalWriteLock"
	lckid[7] = "WALInsertLock"
	lckid[8] = "WALWriteLock"
	lckid[9] = "ControlFileLock"
	lckid[10] = "CheckpointLock"
	lckid[11] = "CLogControlLock"
	lckid[12] = "SubtransControlLock"
	lckid[13] = "MultiXactGenLock"
	lckid[14] = "MultiXactOffsetControlLock"
	lckid[15] = "MultiXactMemberControlLock"
	lckid[16] = "RelCacheInitLock"
	lckid[17] = "BgWriterCommLock"
	lckid[18] = "TwoPhaseStateLock"
	lckid[19] = "TablespaceCreateLock"
	lckid[20] = "BtreeVacuumLock"
	lckid[21] = "AddinShemInitLock"
	lckid[22] = "AutovacuumLock"
	lckid[23] = "AutovacuumScheduleLock"
	lckid[24] = "SyncScanLock"
	lckid[25] = "RelationMappingLocks"
	lckid[26] = "AsyncCtlLock"
	lckid[27] = "AsyncQueueLock"
	lckid[28] = "SerializableXactHashLock"
	lckid[29] = "SerializableFinishedListLock"
	lckid[30] = "SerializablePredicateLockListLock"
	lckid[31] = "OldSerXidLock"
	lckid[32] = "SyncRepLock"
	lckid[33] = "BackgroundWorkerLock"
	lckid[34] = "DynamicSharedMemoryControlLock"
	lckid[35] = "AutoFileLock"
	lckid[36] = "ReplicationSlotAllocationLock"
	lckid[37] = "ReplicationSlotControlLock"
	lckid[38] = "IndividualLock "
	lckid[54] = "LockMgrLocks"
	lckid[70] = "DynamicLocks"
}

probe process("/usr/local/pgsql/bin/postgres").mark("lwlock__wait__start")
{
	lwlock[$arg1, $arg2, tid()] = gettimeofday_us()

	if ($arg2 < 38)
		ln = lckid[$arg2]
	else if ($arg2 >= 38 && $arg2 < 54)
		ln = lckid[38]
	else if ($arg2 >= 54 && $arg2 < 70)
		ln = lckid[54]
	else
		ln = lckid[70]

	waiting[ln, $arg3]++
}

probe process("/usr/local/pgsql/bin/postgres").mark("lwlock__wait__done")
{
	p = tid(); t = gettimeofday_us()

	if ( [$arg1, $arg2, p] in lwlock) {
		if ($arg2 < 38)
			ln = lckid[$arg2]
		else if ($arg2 >= 38 && $arg2 < 54)
			ln = lckid[38]
		else if ($arg2 >= 54 && $arg2 < 70)
			ln = lckid[54]
		else
			ln = lckid[70]

		# pridani zaznamu do statistiky
		tlwlock[ln, $arg3] <<< t - lwlock[$arg1, $arg2, p]
		waiting[ln, $arg3]--

		delete lwlock[$arg1, $arg2, p]
   }
}

# spusti se kazde 2 sec
probe timer.s(2)
{
	printf("\n\nlight weight locks\n")
	printf("%20s %10s %10s %10s\n",
				"lockname", "mode", "count", "avg")
	foreach ([l,m] in tlwlock-)
		printf("%20s %10s %10d %10d\n", l, lckmode[m],
					@count(tlwlock[l,m]),  @avg(tlwlock[l,m]))
	# delete tlwlock

	printf("\n\still waiting\n")
	foreach ([l,m] in waiting)
		printf("%20s %10s %10d\n", 
					l, lckmode[m], waiting[l,m])
}

a pak po spuštění

[root@localhost data]# stap /home/pavel/pglockwaits.stp 

light weight locks
            lockname       mode      count        avg
     IndividualLock      Shared         11          7
     IndividualLock   Exclusive         11          7
        WALWriteLock  Exclusive          4       5410
       ProcArrayLock     Shared          1          9
    BgWriterCommLock  Exclusive          1          7
        DynamicLocks     Shared          1         10

still waiting
     IndividualLock      Shared          0
     IndividualLock   Exclusive          0
        WALWriteLock  Exclusive          0
        DynamicLocks     Shared          0
       ProcArrayLock     Shared          0
    BgWriterCommLock  Exclusive          0


light weight locks
            lockname       mode      count        avg
     IndividualLock      Shared         11          7
     IndividualLock   Exclusive         11          7
        WALWriteLock  Exclusive          4       5410
       ProcArrayLock     Shared          1          9
    BgWriterCommLock  Exclusive          1          7
        DynamicLocks     Shared          1         10

still waiting
     IndividualLock      Shared          0
     IndividualLock   Exclusive          0
        WALWriteLock  Exclusive          0
        DynamicLocks     Shared          0
       ProcArrayLock     Shared          0
    BgWriterCommLock  Exclusive          0