Monitorování lwlocku pomocí systemtapu
Skočit na navigaci
Skočit na vyhledávání
Trochu jsem zápasil - jednak se svojí neznalostí systemtapu, druhak s tím, že se DTRACE interface v PostgreSQL 9.4 změnil - přičemž stap skripty pro Postgres, které jsou na internetu jsou samozřejmě pro starší verze (jedu na developerské 9.4). Ve starších verzích má popis zámku 2 parametry, v 9.4 tři - přičemž nový je první parametr. Navíc přibyly nové zámky - a změnili se rozsahy dynamické a LockMgr zámky.
- http://www.postgresql.org/docs/9.3/static/dynamic-trace.html
- http://www.postgresql.org/docs/devel/static/dynamic-trace.html
stap -L 'process("postgres").mark("*")'
global query_time, query_summary
probe process("postgres").mark("query__start") {
query_time[tid(), $arg1] = gettimeofday_us();
}
probe process("postgres").mark("query__done") {
p = tid()
t = query_time[p, $arg1]; delete query_time[p, $arg1]
if (t) {
query_summary[p] <<< (gettimeofday_us() - t);
}
}
probe end {
printf("\ntid count min(us) avg(us) max(us)\n");
foreach (p in query_summary) {
printf("%d %d %d %d %d\n", p, @count(query_summary[p]),
@min(query_summary[p]), @avg(query_summary[p]), @max(query_summary[p]));
}
}
probe process ("postgres").mark("query__done") {
now = gettimeofday_us()
t = tid()
if ([t, $arg1] in livequeries) {
delta = now - livequeries[t, $arg1]
query = user_string($arg1)
completequeries[query] <<< delta
delete livequeries[t, $arg1]
}
}
probe end {
foreach (p in jit_summary) {
printf("tid = %d\n", p)
print(@hist_log(jit_summary[p]))
}
}
#!/usr/bin/stap
# pglockwaits systemtap version
# based on pglockwaits_84.d by Jignesh Shah
# modified for 9.4
## Global variables
#
global lckmode, lckid, lwlock, tlwlock, waiting
probe begin
{
lckmode[0] = "Exclusive"
lckmode[1] = "Shared"
lckid[0] = "BufFreelistLock"
lckid[1] = "ShmemIndexLock"
lckid[2] = "OidGenLock"
lckid[3] = "XidGenLock"
lckid[4] = "ProcArrayLock"
lckid[5] = "SInvalReadLock"
lckid[6] = "SInvalWriteLock"
lckid[7] = "WALInsertLock"
lckid[8] = "WALWriteLock"
lckid[9] = "ControlFileLock"
lckid[10] = "CheckpointLock"
lckid[11] = "CLogControlLock"
lckid[12] = "SubtransControlLock"
lckid[13] = "MultiXactGenLock"
lckid[14] = "MultiXactOffsetControlLock"
lckid[15] = "MultiXactMemberControlLock"
lckid[16] = "RelCacheInitLock"
lckid[17] = "BgWriterCommLock"
lckid[18] = "TwoPhaseStateLock"
lckid[19] = "TablespaceCreateLock"
lckid[20] = "BtreeVacuumLock"
lckid[21] = "AddinShemInitLock"
lckid[22] = "AutovacuumLock"
lckid[23] = "AutovacuumScheduleLock"
lckid[24] = "SyncScanLock"
lckid[25] = "RelationMappingLocks"
lckid[26] = "AsyncCtlLock"
lckid[27] = "AsyncQueueLock"
lckid[28] = "SerializableXactHashLock"
lckid[29] = "SerializableFinishedListLock"
lckid[30] = "SerializablePredicateLockListLock"
lckid[31] = "OldSerXidLock"
lckid[32] = "SyncRepLock"
lckid[33] = "BackgroundWorkerLock"
lckid[34] = "DynamicSharedMemoryControlLock"
lckid[35] = "AutoFileLock"
lckid[36] = "ReplicationSlotAllocationLock"
lckid[37] = "ReplicationSlotControlLock"
lckid[38] = "IndividualLock "
lckid[54] = "LockMgrLocks"
lckid[70] = "DynamicLocks"
}
probe process("/usr/local/pgsql/bin/postgres").mark("lwlock__wait__start")
{
lwlock[$arg1, $arg2, tid()] = gettimeofday_us()
if ($arg2 < 38)
ln = lckid[$arg2]
else if ($arg2 >= 38 && $arg2 < 54)
ln = lckid[38]
else if ($arg2 >= 54 && $arg2 < 70)
ln = lckid[54]
else
ln = lckid[70]
waiting[ln, $arg3]++
}
probe process("/usr/local/pgsql/bin/postgres").mark("lwlock__wait__done")
{
p = tid(); t = gettimeofday_us()
if ( [$arg1, $arg2, p] in lwlock) {
if ($arg2 < 38)
ln = lckid[$arg2]
else if ($arg2 >= 38 && $arg2 < 54)
ln = lckid[38]
else if ($arg2 >= 54 && $arg2 < 70)
ln = lckid[54]
else
ln = lckid[70]
# pridani zaznamu do statistiky
tlwlock[ln, $arg3] <<< t - lwlock[$arg1, $arg2, p]
waiting[ln, $arg3]--
delete lwlock[$arg1, $arg2, p]
}
}
# spusti se kazde 2 sec
probe timer.s(2)
{
printf("\n\nlight weight locks\n")
printf("%20s %10s %10s %10s\n",
"lockname", "mode", "count", "avg")
foreach ([l,m] in tlwlock-)
printf("%20s %10s %10d %10d\n", l, lckmode[m],
@count(tlwlock[l,m]), @avg(tlwlock[l,m]))
# delete tlwlock
printf("\n\still waiting\n")
foreach ([l,m] in waiting)
printf("%20s %10s %10d\n",
l, lckmode[m], waiting[l,m])
}
a pak po spuštění
[root@localhost data]# stap /home/pavel/pglockwaits.stp
light weight locks
lockname mode count avg
IndividualLock Shared 11 7
IndividualLock Exclusive 11 7
WALWriteLock Exclusive 4 5410
ProcArrayLock Shared 1 9
BgWriterCommLock Exclusive 1 7
DynamicLocks Shared 1 10
still waiting
IndividualLock Shared 0
IndividualLock Exclusive 0
WALWriteLock Exclusive 0
DynamicLocks Shared 0
ProcArrayLock Shared 0
BgWriterCommLock Exclusive 0
light weight locks
lockname mode count avg
IndividualLock Shared 11 7
IndividualLock Exclusive 11 7
WALWriteLock Exclusive 4 5410
ProcArrayLock Shared 1 9
BgWriterCommLock Exclusive 1 7
DynamicLocks Shared 1 10
still waiting
IndividualLock Shared 0
IndividualLock Exclusive 0
WALWriteLock Exclusive 0
DynamicLocks Shared 0
ProcArrayLock Shared 0
BgWriterCommLock Exclusive 0