源码分析STOP REPLICA

发布时间 2023-04-04 17:27:38作者: 吃饭端住碗

STOP REPLICA的用户线程堆栈:

#0  0x00007f73d8878de2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1  0x00000000048d9f56 in native_cond_timedwait (cond=0xc4abe60, mutex=0xc4379a8, abstime=0x7f73c196bdd0) at /root/mysql-8.0.32/include/thr_cond.h:99
#2  0x00000000048da2c1 in safe_cond_timedwait (cond=0xc4abe60, mp=0xc437980, abstime=0x7f73c196bdd0, file=0x6529e48 "/root/mysql-8.0.32/sql/rpl_replica.cc", line=1953) at /root/mysql-8.0.32/mysys/thr_cond.cc:113
#3  0x00000000045949d4 in my_cond_timedwait (cond=0xc4abe60, mp=0xc4abd60, abstime=0x7f73c196bdd0, file=0x6529e48 "/root/mysql-8.0.32/sql/rpl_replica.cc", line=1953) at /root/mysql-8.0.32/include/thr_cond.h:146
#4  0x0000000004594baa in inline_mysql_cond_timedwait (that=0xc4abe60, mutex=0xc4abd60, abstime=0x7f73c196bdd0, src_file=0x6529e48 "/root/mysql-8.0.32/sql/rpl_replica.cc", src_line=1953) at /root/mysql-8.0.32/include/mysql/psi/mysql_cond.h:242
#5  0x000000000459a300 in terminate_slave_thread (thd=0x7f71bc1c51b0, term_lock=0xc4abd60, term_cond=0xc4abe60, slave_running=0xc4abf1c, stop_wait_timeout=0x7f73c196be68, need_lock_term=false, force=false) at /root/mysql-8.0.32/sql/rpl_replica.cc:1953
#6  0x00000000045999ff in terminate_slave_threads (mi=0xc2d7f50, thread_mask=3, stop_wait_timeout=31536000, need_lock_term=false) at /root/mysql-8.0.32/sql/rpl_replica.cc:1719
#7  0x00000000045b3395 in stop_slave (thd=0x7f71c40103a0, mi=0xc2d7f50, net_report=true, for_one_channel=false, push_temp_tables_warning=0x7f73c196c00f) at /root/mysql-8.0.32/sql/rpl_replica.cc:9023
#8  0x000000000459641d in stop_slave (thd=0x7f71c40103a0) at /root/mysql-8.0.32/sql/rpl_replica.cc:680
#9  0x0000000004596bd8 in stop_slave_cmd (thd=0x7f71c40103a0) at /root/mysql-8.0.32/sql/rpl_replica.cc:842
#10 0x00000000032a0ecf in mysql_execute_command (thd=0x7f71c40103a0, first_level=true) at /root/mysql-8.0.32/sql/sql_parse.cc:3595
#11 0x00000000032a67df in dispatch_sql_command (thd=0x7f71c40103a0, parser_state=0x7f73c196d910) at /root/mysql-8.0.32/sql/sql_parse.cc:5322
#12 0x000000000329c743 in dispatch_command (thd=0x7f71c40103a0, com_data=0x7f73c196ea00, command=COM_QUERY) at /root/mysql-8.0.32/sql/sql_parse.cc:2036
#13 0x000000000329a7d3 in do_command (thd=0x7f71c40103a0) at /root/mysql-8.0.32/sql/sql_parse.cc:1439
#14 0x00000000034b925f in handle_connection (arg=0xc48edd0) at /root/mysql-8.0.32/sql/conn_handler/connection_handler_per_thread.cc:302
#15 0x00000000051e835c in pfs_spawn_thread (arg=0xc530c80) at /root/mysql-8.0.32/storage/perfschema/pfs.cc:2986
#16 0x00007f73d8874ea5 in start_thread () from /lib64/libpthread.so.0
#17 0x00007f73d729cb0d in clone () from /lib64/libc.so.6
从命令入口函数开始说,mysql_execute_command主要是用来为了防止发生死锁,判断当前会话是否存在表锁:
// 为了防止出现死锁,当前会话执行STOP命令之前不能存在表锁
case SQLCOM_SLAVE_STOP: {
  if (thd->locked_tables_mode || thd->in_active_multi_stmt_transaction() ||
      thd->global_read_lock.is_acquired()) {
    my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0));
    goto error;
  }  res = stop_slave_cmd(thd);
  break;
}

随后开始进入stop_slave_cmd函数,stop_slave_cmd函数主要是用来判断用户是否指定了需要停止的channel name:

  // 先获取备份锁(lock instance for backup)
  MDL_lock_guard backup_sentry{thd};
  /* During provisioning we stop slave after acquiring backup lock. */
  if (!Clone_handler::is_provisioning() &&
      (!thd->lex->slave_thd_opt || (thd->lex->slave_thd_opt & SLAVE_SQL))) {
    if (backup_sentry.lock(MDL_key::BACKUP_LOCK, MDL_INTENTION_EXCLUSIVE)) {
      my_error(ER_RPL_CANT_STOP_SLAVE_WHILE_LOCKED_BACKUP, MYF(0));
      channel_map.unlock();
      return true;
    }
  }

// 如果没有指定channel name,则停止所有运行中的channel
  if (!lex->mi.for_channel)
    res = stop_slave(thd);
  else {
    // 否则获取并停止指定的channel
    mi = channel_map.get_mi(lex->mi.channel);
......
 // 对指定的channel执行STOP操作
    if (mi)
      res = stop_slave(thd, mi, true /*net report */, true /*for_one_channel*/,
                       &push_temp_table_warning);
    else if (strcmp(channel_map.get_default_channel(), lex->mi.channel))
      my_error(ER_SLAVE_CHANNEL_DOES_NOT_EXIST, MYF(0), lex->mi.channel);
  }

  channel_map.unlock();

stop_slave有两个重载,stop_slave(THD *thd, Master_info *mi, bool net_report, bool for_one_channel,bool *push_temp_tables_warning)用来停止某个单独的channel;stop_slave(THD *thd)主要是用来当没有指定channel name时,调用stop_slave(THD *thd, Master_info *mi, bool net_report, bool for_one_channel,bool *push_temp_tables_warning)来停止默认的channel或停止多源复制中所有的channel

stop_slave(THD *thd)代码如下:

  // 如果不是多源复制则停止默认channel
  if (channel_map.get_num_instances() == 1) {
    mi = channel_map.get_default_channel_mi();

    assert(!strcmp(mi->get_channel(), channel_map.get_default_channel()));

    error = stop_slave(thd, mi, true, false /*for_one_channel*/,
                       &push_temp_table_warning);
  } else {
    // 如果为多源复制则循环停止所有的channel
    for (mi_map::iterator it = channel_map.begin(); it != channel_map.end();
         it++) {
      mi = it->second;

      if (Master_info::is_configured(mi)) {
        if (stop_slave(thd, mi, true, false /*for_one_channel*/,
                       &push_temp_table_warning)) {
          LogErr(ERROR_LEVEL, ER_RPL_SLAVE_CANT_STOP_SLAVE_FOR_CHANNEL,
                 mi->get_channel());
          error = 1;
        }
      }
    }

停止单个channel的函数stop_slave(THD *thd, Master_info *mi, bool net_report, bool for_one_channel,bool *push_temp_tables_warning)的主要作用是获取运行中的复制线程并调用terminate_slave_threads来停止运行或指定的复制线程

主要代码如下:

  /*
    因为需要更新'mysql.slave_master_info'和
    'mysql.slave_relay_log_info'表所以需要跳过只读限制.
  */
  thd->set_skip_readonly_check();

  // 检查用户是否拥有SUPER或REPLICATION_SLAVE_ADMIN权限
  Security_context *sctx = thd->security_context();
  if (!sctx->check_access(SUPER_ACL) &&
      !sctx->has_global_grant(STRING_WITH_LEN("REPLICATION_SLAVE_ADMIN"))
           .first) {
    my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0),
             "SUPER or REPLICATION_SLAVE_ADMIN");
    return 1;
  }

...

// 获取正在运行中的复制线程
  init_thread_mask(&thread_mask, mi, false /* not inverse*/);

  /*
    如果用户指定了需要停止的复制线程,则把其他的复制线程标记为0(不停止)
  */
  if (thd->lex->slave_thd_opt) {
    thread_mask &= thd->lex->slave_thd_opt;

    // 停止IO线程时还需要考虑IO监控线程
    if ((thread_mask & SLAVE_IO) && mi->is_source_connection_auto_failover()) {
      thread_mask |= SLAVE_MONITOR;
    }
  } 
  // 停止复制线程
  if (thread_mask) {
    slave_errno =
        terminate_slave_threads(mi, thread_mask, rpl_stop_replica_timeout,
                                false /*need_lock_term=false*/);
  }

terminate_slave_threads函数的主要作用为:停止SQL线程、刷盘Relay Log Info、停止IO线程、刷盘Master Info并再次刷盘Relay Log Info:

 /*
    STOP SLAVE的超时时间,从8.0.26开始新增了rpl_stop_replica_timeout
    参数,默认31536000s(365天)
  */
  ulong total_stop_wait_timeout = stop_wait_timeout;

  if (thread_mask & (SLAVE_SQL | SLAVE_FORCE_ALL)) {
    DBUG_PRINT("info", ("Terminating SQL thread"));
    
    // 标记需要终止SQL线程(加※)
    mi->rli->abort_slave = true;

    DEBUG_SYNC(current_thd, "terminate_slave_threads_after_set_abort_slave");

    // 停止SQL线程
    if ((error = terminate_slave_thread(
             mi->rli->info_thd, sql_lock, &mi->rli->stop_cond,
             &mi->rli->slave_running, &total_stop_wait_timeout,
             need_lock_term)) &&
        !force_all) {
      if (error == 1) {
        return ER_STOP_SLAVE_SQL_THREAD_TIMEOUT;
      }
      return error;
    }

   /*
      输盘Relay Log Info
    */
    if (mi->rli->flush_info(Relay_log_info::RLI_FLUSH_IGNORE_SYNC_OPT)) {
      return ER_ERROR_DURING_FLUSH_LOGS;
    }
......
if (thread_mask & (SLAVE_IO | SLAVE_FORCE_ALL)) {
    DBUG_PRINT("info", ("Terminating IO thread"));
    // 标记需要终止IO线程(加※)
    mi->abort_slave = true;

 // 终止IO线程
    if ((error = terminate_slave_thread(
             mi->info_thd, io_lock, &mi->stop_cond, &mi->slave_running,
             &total_stop_wait_timeout, need_lock_term, force_io_stop)) &&
        !force_all) {
      if (error == 1) {
        return ER_STOP_SLAVE_IO_THREAD_TIMEOUT;
      }
      return error;


    // 刷盘 Master Info
    if (!mi->is_gtid_only_mode()) {
      mysql_mutex_lock(&mi->data_lock);
      if (mi->flush_info(true)) {
        mysql_mutex_unlock(&mi->data_lock);
        mysql_mutex_unlock(log_lock);
        return ER_ERROR_DURING_FLUSH_LOGS;
      }
      mysql_mutex_unlock(&mi->data_lock);
    }
    // 输盘Relay Log Info
    if (mi->rli->relay_log.is_open() &&
        mi->rli->relay_log.flush_and_sync(true)) {
      mysql_mutex_unlock(log_lock);
      return ER_ERROR_DURING_FLUSH_LOGS;
    }

    mysql_mutex_unlock(log_lock);

terminate_slave_thread函数的主要作用是在死循环中唤醒线程并等待复制线程的停止信号:

  while (*slave_running)  // Should always be true
  {
    DBUG_PRINT("loop", ("killing slave thread"));

    mysql_mutex_lock(&thd->LOCK_thd_data);
    /*
      Error codes from pthread_kill are:
      EINVAL: invalid signal number (can't happen)
      ESRCH: thread already killed (can happen, should be ignored)
    */
#ifndef _WIN32
    // 给复制线程发送信号,判断线程是否存活
    int err [[maybe_unused]] = pthread_kill(thd->real_id, SIGALRM);
    assert(err != EINVAL);
#endif
    // 唤醒复制线程
    if (force)
      thd->awake(THD::KILL_CONNECTION);
    else
      thd->awake(THD::NOT_KILLED);
    mysql_mutex_unlock(&thd->LOCK_thd_data);

    DBUG_EXECUTE_IF("block_on_thread_stop_after_awake", {
      rpl_replica_debug_point(DBUG_RPL_R_WAIT_AFTER_AWAKE_ON_THREAD_STOP);
    });

    // 设置等待信号变量超时时间为2s
    struct timespec abstime;
    set_timespec(&abstime, 2);
#ifndef NDEBUG
    int error =
#endif
        // 等待复制线程STOP过的信号,每次等待2s超时
        mysql_cond_timedwait(term_cond, term_lock, &abstime);
    // 如果整个STOP操作超过了stop_wait_timeout,则操作超时
    if ((*stop_wait_timeout) >= 2)
      (*stop_wait_timeout) = (*stop_wait_timeout) - 2;
    else if (*slave_running) {
      if (need_lock_term) mysql_mutex_unlock(term_lock);
      return 1;
    }

Coordinator线程堆栈

#0  0x00007f73d8878a35 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1  0x00000000048d9f7b in native_cond_wait (cond=0x7f71bc021f30, mutex=0x7f71bc22e348) at /root/mysql-8.0.32/include/thr_cond.h:108
#2  0x00000000048da0e6 in safe_cond_wait (cond=0x7f71bc021f30, mp=0x7f71bc22e320, file=0x6529e48 "/root/mysql-8.0.32/sql/rpl_replica.cc", line=6693) at /root/mysql-8.0.32/mysys/thr_cond.cc:71
#3  0x0000000004594a07 in my_cond_wait (cond=0x7f71bc021f30, mp=0x7f71bc021f00, file=0x6529e48 "/root/mysql-8.0.32/sql/rpl_replica.cc", line=6693) at /root/mysql-8.0.32/include/thr_cond.h:159
#4  0x0000000004594acc in inline_mysql_cond_wait (that=0x7f71bc021f30, mutex=0x7f71bc021f00, src_file=0x6529e48 "/root/mysql-8.0.32/sql/rpl_replica.cc", src_line=6693) at /root/mysql-8.0.32/include/mysql/psi/mysql_cond.h:198
#5  0x00000000045aba5b in slave_stop_workers (rli=0xc4ab8a0, mts_inited=0x7f73c176a527) at /root/mysql-8.0.32/sql/rpl_replica.cc:6693
#6  0x00000000045adb8a in handle_slave_sql (arg=0xc2d7f50) at /root/mysql-8.0.32/sql/rpl_replica.cc:7213
#7  0x00000000051e835c in pfs_spawn_thread (arg=0x7f71c402c4f0) at /root/mysql-8.0.32/storage/perfschema/pfs.cc:2986
#8  0x00007f73d8874ea5 in start_thread () from /lib64/libpthread.so.0
#9  0x00007f73d729cb0d in clone () from /lib64/libc.so.6

 handle_slave_sql函数的主要作用也是在while循环中将event下发给worker并每次循环都判断是否需要终止SQL线程,这里就用上了之前设置的rli->abort_slave:

// 判断rli->abort_slave    
while (!main_loop_error && !sql_slave_killed(thd, rli)) { Log_event *ev = nullptr; THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log); assert(rli->info_thd == thd); THD_CHECK_SENTRY(thd); if (saved_skip && rli->slave_skip_counter == 0) { LogErr(INFORMATION_LEVEL, ER_RPL_SLAVE_SKIP_COUNTER_EXECUTED, (ulong)saved_skip, saved_log_name, (ulong)saved_log_pos, saved_master_log_name, (ulong)saved_master_log_pos, rli->get_group_relay_log_name(), (ulong)rli->get_group_relay_log_pos(), rli->get_group_master_log_name_info(), (ulong)rli->get_group_master_log_pos_info()); saved_skip = 0; } // read next event mysql_mutex_lock(&rli->data_lock); ev = applier_reader.read_next_event(); mysql_mutex_unlock(&rli->data_lock); // set additional context as needed by the scheduler before execution // takes place if (ev != nullptr && rli->is_parallel_exec() && rli->current_mts_submode != nullptr) rli->current_mts_submode->set_multi_threaded_applier_context(*rli, *ev); // try to execute the event switch (exec_relay_log_event(thd, rli, &applier_reader, ev)) { case SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK: /** success, we read the next event. */ /** fall through */ ...... err: // report error if (main_loop_error == true && !sql_slave_killed(thd, rli)) slave_errno = report_apply_event_error(thd, rli); /* At this point the SQL thread will not try to work anymore. */ rli->atomic_is_stopping = true; (void)RUN_HOOK( binlog_relay_io, applier_stop, (thd, rli->mi, rli->is_error() || !rli->sql_thread_kill_accepted));    // 停止worker线程 slave_stop_workers(rli, &mts_inited); // stopping worker pool ......
  // 广播SQL线程停止信号变量 mysql_cond_broadcast(&rli->stop_cond); DBUG_EXECUTE_IF("simulate_replica_delay_at_terminate_bug38694", sleep(5);); mysql_mutex_unlock(&rli->run_lock); // tell the world we are done

当需要终止SQL线程时调用slave_stop_workers来终止worker线程。

slave_stop_workers函数的主要作用是设置worker后面运行到的event的截至位置(max_updated_index )、将worker线程状态设置为Slave_worker::STOP,并在最后做一次checkpoint:

 /*
    If request for stop slave is received notify worker
    to stop.
  */
  // Initialize worker exit count and max_updated_index to 0 during each stop.
  rli->exit_counter = 0;
  rli->max_updated_index = (rli->until_condition != Relay_log_info::UNTIL_NONE)
                               ? rli->mts_groups_assigned
                               : 0;
  if (!rli->workers.empty()) {
    for (int i = static_cast<int>(rli->workers.size()) - 1; i >= 0; i--) {
      Slave_worker *w = rli->workers[i];
      struct slave_job_item item = {nullptr, 0, 0};
      struct slave_job_item *job_item = &item;
      mysql_mutex_lock(&w->jobs_lock);

      if (w->running_status != Slave_worker::RUNNING) {
        mysql_mutex_unlock(&w->jobs_lock);
        continue;
      }

      w->running_status = Slave_worker::STOP;
      (void)set_max_updated_index_on_stop(w, job_item);
      mysql_cond_signal(&w->jobs_cond);

      mysql_mutex_unlock(&w->jobs_lock);

      DBUG_PRINT("info", ("Notifying worker %lu%s to exit, thd %p", w->id, 
                          w->get_for_channel_str(), w->info_thd));
    }
  }
......
if (thd->killed == THD::NOT_KILLED) (void)mta_checkpoint_routine(rli, false);

worker线程主要工作在handle_slave_worker函数中,会在while循环中调用slave_worker_exec_job_group:

  while (!error) {
    error = slave_worker_exec_job_group(w, rli);
  }
slave_worker_exec_job_group函数会调用worker->slave_worker_exec_event(ev)函数进行event的应用并在最后调用pop_jobs_item:
  while (true) {
      error = worker->slave_worker_exec_event(ev);
      ......
      job_item = pop_jobs_item(worker, job_item);

在pop_jobs_item函数中的set_max_updated_index_on_stop函数中会判断worker线程的运行状态是不是Slave_worker::STOP,如果是则调用handle_slave_worker_stop函数,handle_slave_worker_stop函数会判断是否需要停止worker线程,当队列为空或者应用的event位置超过了max_updated_index的位置则会停止worker线程(也就是说worker线程停止之前会先应用当前的event group),并发送停止信号

  /*
    Now let's decide about the deferred exit to consider
    the empty queue and the counter value reached
    replica_parallel_workers.
  */
  if (!job_item->data) {
    worker->running_status = Slave_worker::STOP_ACCEPTED;
    mysql_cond_signal(&worker->jobs_cond);
    mysql_mutex_unlock(&rli->exit_count_lock);
    return (true);
  } else if (rli->exit_counter == rli->replica_parallel_workers) {
    // over steppers should exit with accepting STOP
    if (group_index > rli->max_updated_index) {
      worker->running_status = Slave_worker::STOP_ACCEPTED;
      mysql_cond_signal(&worker->jobs_cond);
      mysql_mutex_unlock(&rli->exit_count_lock);
      return (true);
    }
  }

IO线程的终止

IO线程请求Binlog写入到Relay Log中并读取Relay Log都在handle_slave_sql函数中,相关逻辑是两个while循环,每次循环都判断是否需要终止IO线程,这里就用上了之前设置的mi->abort_slave:

// 判断mi->abort_slave
while (!io_slave_killed(thd, mi)) {
      MYSQL_RPL rpl;

      THD_STAGE_INFO(thd, stage_requesting_binlog_dump);
      if (request_dump(thd, mysql, &rpl, mi, &suppress_warnings) ||
          DBUG_EVALUATE_IF("simulate_reconnect_after_failed_binlog_dump", 1,
                           0)) {
        LogErr(ERROR_LEVEL, ER_RPL_SLAVE_ERROR_REQUESTING_BINLOG_DUMP,
               mi->get_for_channel_str());
        if (check_io_slave_killed(thd, mi,
                                  "Slave I/O thread killed while "
                                  "requesting master dump") ||
            try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
                             reconnect_messages_after_failed_dump))
          goto err;
        goto connected;
      }
      const char *event_buf;

      assert(mi->last_error().number == 0);
      while (!io_slave_killed(thd, mi)) {
        ulong event_len;
        /*
           We say "waiting" because read_event() will wait if there's nothing to
           read. But if there's something to read, it will not wait. The
           important thing is to not confuse users by saying "reading" whereas
           we're in fact receiving nothing.
        */
        THD_STAGE_INFO(thd, stage_waiting_for_source_to_send_event);
        event_len = read_event(mysql, &rpl, mi, &suppress_warnings);
        if (check_io_slave_killed(thd, mi,
                                  "Slave I/O thread killed while "
                                  "reading event"))
          goto err;
          
          
 err:
    /*
      Note: the order of the two following calls (first broadcast, then unlock)
      is important. Otherwise a killer_thread can execute between the calls and
      delete the mi structure leading to a crash! (see BUG#25306 for details)
     */
  // IO线程停止之后广播停止信号变量 mysql_cond_broadcast(&mi->stop_cond); // tell the world we are done DBUG_EXECUTE_IF("simulate_replica_delay_at_terminate_bug38694", sleep(5);); mysql_mutex_unlock(&mi->run_lock); }