Bug#22551677 SIGNAL 11 IN LF_PINBOX_PUT_PINS

marcalff · marcalff · commit 05e2386eccd3 · 2016-08-30T11:59:39.000+02:00
Before this fix, the following query:
    SET GLOBAL offline_mode = ON
  could cause the server to crash.

  The root cause is actually complex, as described below.

  1)

  A session A is connected, and performing network io,
  typically waiting for the next command to execute.
  The performance schema socket instrumentation is enabled,
  so that pfs_start_socket_wait() / pfs_end_socket_wait()
  are executed.

  2)

  A session B executes SET GLOBAL offline_mode = ON,
  which terminates session A.
  In particular, session B forcefully closes the socket used by session A.

  3)

  Session A and session B are different threads, but they both
  execute performance schema instrumented code against the same socket.

  4)

  Because a socket is "owned" by a thread,
  the instrumentation in pfs_start/end_socket_wait()
  uses the same PFS_thread (of thread A) in both thread A and B.
  This leads to race conditions when using member m_events_waits_current.

  5)

  Because PFS_thread::m_events_waits_current can be damaged with race conditions,
  the m_events_waits_current pointer can point outside of the waits array.
  Using this pointer to populate current waits can damage other members
  of the PFS_thread structure, most notably LF_HASH pins.

  6)

  Upon thread disconnect, using a corrupted LF_HASH pin when calling
  lf_hash_put_pins leads to a crash.

---

  The fix for this issue is to use the current thread, not the socket owner,
  in the performance schema socket instrumenttion.

  Also, asserts have been added to detect similar failures.
  With the asserts, the original issue,
  which was spurious and only occured rarely,
  is not detected systematically.
diff --git a/storage/perfschema/pfs.cc b/storage/perfschema/pfs.cc
@@ -3093,9 +3093,9 @@ pfs_start_table_io_wait_v1(PSI_table_locker_state *state,
   if (! pfs_table->m_io_enabled)
     return NULL;
 
-  PFS_thread *pfs_thread= pfs_table->m_thread_owner;
+  PFS_thread *pfs_thread= my_thread_get_THR_PFS();
 
-  DBUG_ASSERT(pfs_thread == my_thread_get_THR_PFS());
+  DBUG_ASSERT(pfs_thread == pfs_table->m_thread_owner);
 
   uint flags;
   ulonglong timer_start= 0;
@@ -3198,7 +3198,9 @@ pfs_start_table_lock_wait_v1(PSI_table_locker_state *state,
   if (! pfs_table->m_lock_enabled)
     return NULL;
 
-  PFS_thread *pfs_thread= pfs_table->m_thread_owner;
+  PFS_thread *pfs_thread= my_thread_get_THR_PFS();
+
+  DBUG_ASSERT(pfs_thread == pfs_table->m_thread_owner);
 
   PFS_TL_LOCK_TYPE lock_type;
 
@@ -3611,7 +3613,12 @@ pfs_start_socket_wait_v1(PSI_socket_locker_state *state,
 
   if (flag_thread_instrumentation)
   {
-    PFS_thread *pfs_thread= pfs_socket->m_thread_owner;
+    /*
+       Do not use pfs_socket->m_thread_owner here,
+       as different threads may use concurrently the same socket,
+       for example during a KILL.
+    */
+    PFS_thread *pfs_thread= my_thread_get_THR_PFS();
 
     if (unlikely(pfs_thread == NULL))
       return NULL;
@@ -3983,6 +3990,8 @@ void pfs_end_idle_wait_v1(PSI_idle_locker* locker)
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 
@@ -4067,6 +4076,8 @@ void pfs_end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 }
@@ -4146,6 +4157,8 @@ void pfs_end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 }
@@ -4223,6 +4236,8 @@ void pfs_end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 }
@@ -4287,6 +4302,8 @@ void pfs_end_cond_wait_v1(PSI_cond_locker* locker, int rc)
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 }
@@ -4382,6 +4399,8 @@ void pfs_end_table_io_wait_v1(PSI_table_locker* locker, ulonglong numrows)
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 
@@ -4451,6 +4470,8 @@ void pfs_end_table_lock_wait_v1(PSI_table_locker* locker)
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 
@@ -4723,6 +4744,8 @@ void pfs_end_file_wait_v1(PSI_file_locker *locker,
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
 }
@@ -6307,6 +6330,8 @@ void pfs_end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)
     if (thread->m_flag_events_waits_history_long)
       insert_events_waits_history_long(wait);
     thread->m_events_waits_current--;
+
+    DBUG_ASSERT(wait == thread->m_events_waits_current);
   }
 }
 
@@ -7039,6 +7064,8 @@ pfs_end_metadata_wait_v1(PSI_metadata_locker *locker,
       if (thread->m_flag_events_waits_history_long)
         insert_events_waits_history_long(wait);
       thread->m_events_waits_current--;
+
+      DBUG_ASSERT(wait == thread->m_events_waits_current);
     }
   }
   else

Original file line number	Diff line number	Diff line change
`@@ -3093,9 +3093,9 @@ pfs_start_table_io_wait_v1(PSI_table_locker_state *state,`
`3093`	`3093`	`if (! pfs_table->m_io_enabled)`
`3094`	`3094`	`return NULL;`
`3095`	`3095`
`3096`		`- PFS_thread *pfs_thread= pfs_table->m_thread_owner;`
	`3096`	`+ PFS_thread *pfs_thread= my_thread_get_THR_PFS();`
`3097`	`3097`
`3098`		`- DBUG_ASSERT(pfs_thread == my_thread_get_THR_PFS());`
	`3098`	`+ DBUG_ASSERT(pfs_thread == pfs_table->m_thread_owner);`
`3099`	`3099`
`3100`	`3100`	`uint flags;`
`3101`	`3101`	`ulonglong timer_start= 0;`
`@@ -3198,7 +3198,9 @@ pfs_start_table_lock_wait_v1(PSI_table_locker_state *state,`
`3198`	`3198`	`if (! pfs_table->m_lock_enabled)`
`3199`	`3199`	`return NULL;`
`3200`	`3200`
`3201`		`- PFS_thread *pfs_thread= pfs_table->m_thread_owner;`
	`3201`	`+ PFS_thread *pfs_thread= my_thread_get_THR_PFS();`
	`3202`	`+`
	`3203`	`+ DBUG_ASSERT(pfs_thread == pfs_table->m_thread_owner);`
`3202`	`3204`
`3203`	`3205`	`PFS_TL_LOCK_TYPE lock_type;`
`3204`	`3206`
`@@ -3611,7 +3613,12 @@ pfs_start_socket_wait_v1(PSI_socket_locker_state *state,`
`3611`	`3613`
`3612`	`3614`	`if (flag_thread_instrumentation)`
`3613`	`3615`	`{`
`3614`		`- PFS_thread *pfs_thread= pfs_socket->m_thread_owner;`
	`3616`	`+ /*`
	`3617`	`+ Do not use pfs_socket->m_thread_owner here,`
	`3618`	`+ as different threads may use concurrently the same socket,`
	`3619`	`+ for example during a KILL.`
	`3620`	`+ */`
	`3621`	`+ PFS_thread *pfs_thread= my_thread_get_THR_PFS();`
`3615`	`3622`
`3616`	`3623`	`if (unlikely(pfs_thread == NULL))`
`3617`	`3624`	`return NULL;`
`@@ -3983,6 +3990,8 @@ void pfs_end_idle_wait_v1(PSI_idle_locker* locker)`
`3983`	`3990`	`if (thread->m_flag_events_waits_history_long)`
`3984`	`3991`	`insert_events_waits_history_long(wait);`
`3985`	`3992`	`thread->m_events_waits_current--;`
	`3993`	`+`
	`3994`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`3986`	`3995`	`}`
`3987`	`3996`	`}`
`3988`	`3997`
`@@ -4067,6 +4076,8 @@ void pfs_end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)`
`4067`	`4076`	`if (thread->m_flag_events_waits_history_long)`
`4068`	`4077`	`insert_events_waits_history_long(wait);`
`4069`	`4078`	`thread->m_events_waits_current--;`
	`4079`	`+`
	`4080`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`4070`	`4081`	`}`
`4071`	`4082`	`}`
`4072`	`4083`	`}`
`@@ -4146,6 +4157,8 @@ void pfs_end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)`
`4146`	`4157`	`if (thread->m_flag_events_waits_history_long)`
`4147`	`4158`	`insert_events_waits_history_long(wait);`
`4148`	`4159`	`thread->m_events_waits_current--;`
	`4160`	`+`
	`4161`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`4149`	`4162`	`}`
`4150`	`4163`	`}`
`4151`	`4164`	`}`
`@@ -4223,6 +4236,8 @@ void pfs_end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)`
`4223`	`4236`	`if (thread->m_flag_events_waits_history_long)`
`4224`	`4237`	`insert_events_waits_history_long(wait);`
`4225`	`4238`	`thread->m_events_waits_current--;`
	`4239`	`+`
	`4240`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`4226`	`4241`	`}`
`4227`	`4242`	`}`
`4228`	`4243`	`}`
`@@ -4287,6 +4302,8 @@ void pfs_end_cond_wait_v1(PSI_cond_locker* locker, int rc)`
`4287`	`4302`	`if (thread->m_flag_events_waits_history_long)`
`4288`	`4303`	`insert_events_waits_history_long(wait);`
`4289`	`4304`	`thread->m_events_waits_current--;`
	`4305`	`+`
	`4306`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`4290`	`4307`	`}`
`4291`	`4308`	`}`
`4292`	`4309`	`}`
`@@ -4382,6 +4399,8 @@ void pfs_end_table_io_wait_v1(PSI_table_locker* locker, ulonglong numrows)`
`4382`	`4399`	`if (thread->m_flag_events_waits_history_long)`
`4383`	`4400`	`insert_events_waits_history_long(wait);`
`4384`	`4401`	`thread->m_events_waits_current--;`
	`4402`	`+`
	`4403`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`4385`	`4404`	`}`
`4386`	`4405`	`}`
`4387`	`4406`
`@@ -4451,6 +4470,8 @@ void pfs_end_table_lock_wait_v1(PSI_table_locker* locker)`
`4451`	`4470`	`if (thread->m_flag_events_waits_history_long)`
`4452`	`4471`	`insert_events_waits_history_long(wait);`
`4453`	`4472`	`thread->m_events_waits_current--;`
	`4473`	`+`
	`4474`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`4454`	`4475`	`}`
`4455`	`4476`	`}`
`4456`	`4477`
`@@ -4723,6 +4744,8 @@ void pfs_end_file_wait_v1(PSI_file_locker *locker,`
`4723`	`4744`	`if (thread->m_flag_events_waits_history_long)`
`4724`	`4745`	`insert_events_waits_history_long(wait);`
`4725`	`4746`	`thread->m_events_waits_current--;`
	`4747`	`+`
	`4748`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`4726`	`4749`	`}`
`4727`	`4750`	`}`
`4728`	`4751`	`}`
`@@ -6307,6 +6330,8 @@ void pfs_end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)`
`6307`	`6330`	`if (thread->m_flag_events_waits_history_long)`
`6308`	`6331`	`insert_events_waits_history_long(wait);`
`6309`	`6332`	`thread->m_events_waits_current--;`
	`6333`	`+`
	`6334`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`6310`	`6335`	`}`
`6311`	`6336`	`}`
`6312`	`6337`
`@@ -7039,6 +7064,8 @@ pfs_end_metadata_wait_v1(PSI_metadata_locker *locker,`
`7039`	`7064`	`if (thread->m_flag_events_waits_history_long)`
`7040`	`7065`	`insert_events_waits_history_long(wait);`
`7041`	`7066`	`thread->m_events_waits_current--;`
	`7067`	`+`
	`7068`	`+ DBUG_ASSERT(wait == thread->m_events_waits_current);`
`7042`	`7069`	`}`
`7043`	`7070`	`}`
`7044`	`7071`	`else`