Monitorování lwlocku pomocí systemtapu
Skočit na navigaci
Skočit na vyhledávání
Trochu jsem zápasil - jednak se svojí neznalostí systemtapu, druhak s tím, že se DTRACE interface v PostgreSQL 9.4 změnil - přičemž stap skripty pro Postgres, které jsou na internetu jsou samozřejmě pro starší verze (jedu na developerské 9.4). Ve starších verzích má popis zámku 2 parametry, v 9.4 tři - přičemž nový je první parametr. Navíc přibyly nové zámky - a změnili se rozsahy dynamické a LockMgr zámky.
- http://www.postgresql.org/docs/9.3/static/dynamic-trace.html
- http://www.postgresql.org/docs/devel/static/dynamic-trace.html
stap -L 'process("postgres").mark("*")' global query_time, query_summary probe process("postgres").mark("query__start") { query_time[tid(), $arg1] = gettimeofday_us(); } probe process("postgres").mark("query__done") { p = tid() t = query_time[p, $arg1]; delete query_time[p, $arg1] if (t) { query_summary[p] <<< (gettimeofday_us() - t); } } probe end { printf("\ntid count min(us) avg(us) max(us)\n"); foreach (p in query_summary) { printf("%d %d %d %d %d\n", p, @count(query_summary[p]), @min(query_summary[p]), @avg(query_summary[p]), @max(query_summary[p])); } } probe process ("postgres").mark("query__done") { now = gettimeofday_us() t = tid() if ([t, $arg1] in livequeries) { delta = now - livequeries[t, $arg1] query = user_string($arg1) completequeries[query] <<< delta delete livequeries[t, $arg1] } } probe end { foreach (p in jit_summary) { printf("tid = %d\n", p) print(@hist_log(jit_summary[p])) } }
#!/usr/bin/stap # pglockwaits systemtap version # based on pglockwaits_84.d by Jignesh Shah # modified for 9.4 ## Global variables # global lckmode, lckid, lwlock, tlwlock, waiting probe begin { lckmode[0] = "Exclusive" lckmode[1] = "Shared" lckid[0] = "BufFreelistLock" lckid[1] = "ShmemIndexLock" lckid[2] = "OidGenLock" lckid[3] = "XidGenLock" lckid[4] = "ProcArrayLock" lckid[5] = "SInvalReadLock" lckid[6] = "SInvalWriteLock" lckid[7] = "WALInsertLock" lckid[8] = "WALWriteLock" lckid[9] = "ControlFileLock" lckid[10] = "CheckpointLock" lckid[11] = "CLogControlLock" lckid[12] = "SubtransControlLock" lckid[13] = "MultiXactGenLock" lckid[14] = "MultiXactOffsetControlLock" lckid[15] = "MultiXactMemberControlLock" lckid[16] = "RelCacheInitLock" lckid[17] = "BgWriterCommLock" lckid[18] = "TwoPhaseStateLock" lckid[19] = "TablespaceCreateLock" lckid[20] = "BtreeVacuumLock" lckid[21] = "AddinShemInitLock" lckid[22] = "AutovacuumLock" lckid[23] = "AutovacuumScheduleLock" lckid[24] = "SyncScanLock" lckid[25] = "RelationMappingLocks" lckid[26] = "AsyncCtlLock" lckid[27] = "AsyncQueueLock" lckid[28] = "SerializableXactHashLock" lckid[29] = "SerializableFinishedListLock" lckid[30] = "SerializablePredicateLockListLock" lckid[31] = "OldSerXidLock" lckid[32] = "SyncRepLock" lckid[33] = "BackgroundWorkerLock" lckid[34] = "DynamicSharedMemoryControlLock" lckid[35] = "AutoFileLock" lckid[36] = "ReplicationSlotAllocationLock" lckid[37] = "ReplicationSlotControlLock" lckid[38] = "IndividualLock " lckid[54] = "LockMgrLocks" lckid[70] = "DynamicLocks" } probe process("/usr/local/pgsql/bin/postgres").mark("lwlock__wait__start") { lwlock[$arg1, $arg2, tid()] = gettimeofday_us() if ($arg2 < 38) ln = lckid[$arg2] else if ($arg2 >= 38 && $arg2 < 54) ln = lckid[38] else if ($arg2 >= 54 && $arg2 < 70) ln = lckid[54] else ln = lckid[70] waiting[ln, $arg3]++ } probe process("/usr/local/pgsql/bin/postgres").mark("lwlock__wait__done") { p = tid(); t = gettimeofday_us() if ( [$arg1, $arg2, p] in lwlock) { if ($arg2 < 38) ln = lckid[$arg2] else if ($arg2 >= 38 && $arg2 < 54) ln = lckid[38] else if ($arg2 >= 54 && $arg2 < 70) ln = lckid[54] else ln = lckid[70] # pridani zaznamu do statistiky tlwlock[ln, $arg3] <<< t - lwlock[$arg1, $arg2, p] waiting[ln, $arg3]-- delete lwlock[$arg1, $arg2, p] } } # spusti se kazde 2 sec probe timer.s(2) { printf("\n\nlight weight locks\n") printf("%20s %10s %10s %10s\n", "lockname", "mode", "count", "avg") foreach ([l,m] in tlwlock-) printf("%20s %10s %10d %10d\n", l, lckmode[m], @count(tlwlock[l,m]), @avg(tlwlock[l,m])) # delete tlwlock printf("\n\still waiting\n") foreach ([l,m] in waiting) printf("%20s %10s %10d\n", l, lckmode[m], waiting[l,m]) }
a pak po spuštění
[root@localhost data]# stap /home/pavel/pglockwaits.stp light weight locks lockname mode count avg IndividualLock Shared 11 7 IndividualLock Exclusive 11 7 WALWriteLock Exclusive 4 5410 ProcArrayLock Shared 1 9 BgWriterCommLock Exclusive 1 7 DynamicLocks Shared 1 10 still waiting IndividualLock Shared 0 IndividualLock Exclusive 0 WALWriteLock Exclusive 0 DynamicLocks Shared 0 ProcArrayLock Shared 0 BgWriterCommLock Exclusive 0 light weight locks lockname mode count avg IndividualLock Shared 11 7 IndividualLock Exclusive 11 7 WALWriteLock Exclusive 4 5410 ProcArrayLock Shared 1 9 BgWriterCommLock Exclusive 1 7 DynamicLocks Shared 1 10 still waiting IndividualLock Shared 0 IndividualLock Exclusive 0 WALWriteLock Exclusive 0 DynamicLocks Shared 0 ProcArrayLock Shared 0 BgWriterCommLock Exclusive 0