SDIO WiFi调试经验总结-锐单电子商城

最近和硬件同事调SDIO WiFi遇到一个奇怪的问题：只要启动wlan0网卡，CPU负载会很高，系统会严重卡住。top命令查看：

Mem: 9744K used, 16672K free, 0K shrd, 0K buff, 5248K cached CPU: 0.0% usr 96.9% sys 0.0% nic 3.0% idle 0.0% io 0.0% irq 0.0% sirq Load average: 1.27 0.32 0.11 2/35 119 PID PPID USER STAT VSZ %VSZ CPU %CPU COMMAND 68 2 root RW 0 0.0 0 96.9 [ksdioirqd/mmc1] 73 57 root D 4716 17.8 0 0.0 wpa_supplicant -Dnl80211 -iwlan0 - 57 1 root S 1840 6.9 0 0.0 -sh 1 0 root S 1824 6.9 0 0.0 {linuxrc} init 119 57 root R 1820 6.8 0 0.0 top 59 1 root S 1812 6.8 0 0.0 telnetd 45 2 root SWN 0 0.0 0 0.0 [jffs2_gcd_mtd2] 40 2 root DW 0 0.0 0 0.0 [wl_event_handle] 35 2 root SW 0 0.0 0 0.0 [kworker/u2:1] 15 2 root SW 0 0.0 0 0.0 [kworker/0:1] 7 2 root SW 0 0.0 0 0.0 [rcu_preempt] 11 2 root SW< 0 0.0 0 0.0 [khelper] 13 2 root SW< 0 0.0 0 0.0 [bioset] 10 2 root SW 0 0.0 0 0.0 [watchdog/0] 2 0 root SW 0 0.0 0 0.0 [kthreadd] 3 2 root SW 0 0.0 0 0.0 [ksoftirqd/0] 4 2 root SW 0 0.0 0 0.0 [kworker/0:0] 14 2 root SW< 0 0.0 0 0.0 [kblockd] 6 2 root SW 0 0.0 0 0.0 [kworker/u2:0]

发现ksdioirqd/mmc1 CPU占用率很高，所以查看核心代码发现：

drivers/mmc/core/sdio_irq.c中的sdio_card_irq_get函数创建了 ksdioirqd/mmc1线程。

static int sdio_card_irq_get(struct mmc_card *card) {  struct mmc_host *host = card->host;   WARN_ON(!host->claimed);   if (!host->sdio_irqs  ) {   atomic_set(&host->sdio_irq_thread_abort, 0);   host->sdio_irq_thread =    kthread_run(sdio_irq_thread, host, "ksdioirqd/%s",     mmc_hostname(host));   if (IS_ERR(host->sdio_irq_thread)) {    int err = PTR_ERR(host->sdio_irq_thread);    host->sdio_irqs--;    return err;   }  }   return 0; }

然后跟踪代码：

static int sdio_irq_thread(void *_host) {  struct mmc_host *host = _host;  struct sched_param param = { .sched_priority = 1 };  unsigned long period, idle_period;  int ret;   sched_setscheduler(current, SCHED_FIFO, &param);   /*   * We want to allow for SDIO cards to work even on non SDIO   * aware hosts.  One thing that non SDIO host cannot do is   * asynchronous notification of pending SDIO card interrupts   * hence we poll for them in that case.   */  idle_period = msecs_to_jiffies(10);  period = (host->caps & MMC_CAP_SDIO_IRQ) ?   MAX_SCHEDULE_TIMEOUT : idle_period;   pr_debug("%s: IRQ thread started (poll period = %lu jiffies)\n",    mmc_hostname(host), period);   do {   /*    * We claim the host here on drivers behalf for a couple    * reasons:    *    * 1) it is already needed to retrieve the CCCR_INTx;    * 2) we want the driver(s) to clear the IRQ condition ASAP;    * 3) we need to control the abort condition locally.    *    * Just like traditional hard IRQ handlers, we expect SDIO    * IRQ handlers to be quick and to the point, so that the    * holding of the host lock does not cover too much work    * that doesn't require that lock to be held.    */   ret = __mmc_claim_host(host, &host->sdio_irq_thread_abort);   if (ret)    break;   ret = process_sdio_pending_irqs(host);///检查是否有中断   host->sdio_irq_pending = false;   mmc_release_host(host);    /*    * Give other threads a chance to run in the presence of    * erors.
		 */
		if (ret < 0) {
			set_current_state(TASK_INTERRUPTIBLE);
			if (!kthread_should_stop())
				schedule_timeout(HZ);
			set_current_state(TASK_RUNNING);
		}

		/*
		 * Adaptive polling frequency based on the assumption
		 * that an interrupt will be closely followed by more.
		 * This has a substantial benefit for network devices.
		 */
		if (!(host->caps & MMC_CAP_SDIO_IRQ)) {
			if (ret > 0)
				period /= 2;//如果有中断产生则将唤醒的时间间隔减半，否则的话不断的加大唤醒间隔直到最大。
			else {
				period++; //在有中断产生的时候产生polling，一旦polling检查到没有中断就改成中断模式
				if (period > idle_period)
					period = idle_period;
			}
		}

		set_current_state(TASK_INTERRUPTIBLE);
		if (host->caps & MMC_CAP_SDIO_IRQ) {
			mmc_host_clk_hold(host);
			host->ops->enable_sdio_irq(host, 1);
			mmc_host_clk_release(host);
		}
		if (!kthread_should_stop())
			schedule_timeout(period);
		set_current_state(TASK_RUNNING);
	} while (!kthread_should_stop());

	if (host->caps & MMC_CAP_SDIO_IRQ) {
		mmc_host_clk_hold(host);
		host->ops->enable_sdio_irq(host, 0);
		mmc_host_clk_release(host);
	}

	pr_debug("%s: IRQ thread exiting with code %d\n",
		 mmc_hostname(host), ret);

	return ret;
}

对于不支持SDIO 中断的host，kerne会采用polling的方式来实现伪中断：不断的唤醒ksdioirqd来检查SDIO的CCCR的中断标志位。支持SDIO中断的host，则直接在SDIO中断产生的时候由host的驱动来负责通知mmc子系统唤醒ksdioirqd来检查中断。

顺着process_sdio_pending_irqs函数往下跟会发现：

static int mmc_io_rw_direct_host(struct mmc_host *host, int write, unsigned fn,
	unsigned addr, u8 in, u8 *out)
{
	struct mmc_command cmd = {0};
	int err;

	BUG_ON(!host);
	BUG_ON(fn > 7);

	/* sanity check */
	if (addr & ~0x1FFFF)
		return -EINVAL;

	cmd.opcode = SD_IO_RW_DIRECT;
	cmd.arg = write ? 0x80000000 : 0x00000000;
	cmd.arg |= fn << 28;
	cmd.arg |= (write && out) ? 0x08000000 : 0x00000000;
	cmd.arg |= addr << 9;
	cmd.arg |= in;
	cmd.flags = MMC_RSP_SPI_R5 | MMC_RSP_R5 | MMC_CMD_AC;

	err = mmc_wait_for_cmd(host, &cmd, 0);
	if (err)
		return err;

	if (mmc_host_is_spi(host)) {
		/* host driver already reported errors */
	} else {
		if (cmd.resp[0] & R5_ERROR)
			return -EIO;
		if (cmd.resp[0] & R5_FUNCTION_NUMBER)
			return -EINVAL;
		if (cmd.resp[0] & R5_OUT_OF_RANGE)
			return -ERANGE;
	}

	if (out) {
		if (mmc_host_is_spi(host))
			*out = (cmd.resp[0] >> 8) & 0xFF;
		else
			*out = cmd.resp[0] & 0xFF;
	}

	return 0;
}

其中mmc_wait_for_cmd函数引起了我的注意，因为，听硬件同事提起过SDIO cmd线的事。

接着看mmc_wait_for_cmd函数：

/**
 *	mmc_wait_for_cmd - start a command and wait for completion
 *	@host: MMC host to start command
 *	@cmd: MMC command to start
 *	@retries: maximum number of retries
 *
 *	Start a new MMC command for a host, and wait for the command
 *	to complete.  Return any error that occurred while the command
 *	was executing.  Do not attempt to parse the response.
 */
int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd, int retries)
{
	struct mmc_request mrq = {NULL};

	WARN_ON(!host->claimed);

	memset(cmd->resp, 0, sizeof(cmd->resp));
	cmd->retries = retries;

	mrq.cmd = cmd;
	cmd->data = NULL;

	mmc_wait_for_req(host, &mrq);

	return cmd->error;
}

先看注释：

* Start a new MMC command for a host, and wait for the command * to complete. Return any error that occurred while the command * was executing. Do not attempt to parse the response.

翻译：为host开启一个新的MMC命令，并等待命令执行完成，返回命令执行时发生的任何错误。不要试图解释响应。

再顺着跟一跟代码：

static void mmc_wait_for_req_done(struct mmc_host *host,
				  struct mmc_request *mrq)
{
	struct mmc_command *cmd;

	while (1) {
		wait_for_completion(&mrq->completion);

		cmd = mrq->cmd;
		if (!cmd->error || !cmd->retries ||
		    mmc_card_removed(host->card))
			break;

		pr_debug("%s: req failed (CMD%u): %d, retrying...\n",
			 mmc_hostname(host), cmd->opcode, cmd->error);
		cmd->retries--;
		cmd->error = 0;
		host->ops->request(host, mrq);
	}
}

其中host->ops->request(host, mrq);回调函数会调到：

static void jzmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
{
	struct jzmmc_host *host = mmc_priv(mmc);

	if (!test_bit(JZMMC_CARD_PRESENT, &host->flags)) {
		dev_vdbg(host->dev, "No card present\n");
		mrq->cmd->error = -ENOMEDIUM;
		mmc_request_done(mmc, mrq);
		return;
	}

	/*
	 * It means that this request may flush cache in interrupt context.
	 * It never happens in design, but we add BUG_ON here to prevent it.
	 */
	if ((host->state != STATE_IDLE) && (mrq->data != NULL)) {
		dev_warn(host->dev, "operate in non-idle state\n");
		WARN_ON(1);
	}

	host->mrq = mrq;
	host->data = mrq->data;
	host->cmd = mrq->cmd;

	if (host->data)
		dev_dbg(host->dev, "op:%d arg:0x%08X sz:%uk\n",
			 host->cmd->opcode, host->cmd->arg,
			 host->data->blocks >> 1);
	else
		dev_dbg(host->dev, "op:%d\n", host->cmd->opcode);

	host->cmdat = host->cmdat_def;

	if(host->data) {
		if ((host->data->sg_len == 1)
		    && (sg_dma_len(host->data->sg)) < PIO_THRESHOLD) {
			enable_pio_mode(host);
		}

		jzmmc_data_pre(host, host->data);
	}
	/*
	 * We would get mmc_request_done at last, unless some terrible error
	 * occurs such as intensity rebounding of VDD, that maybe result in
	 * no action to complete the request.
	 */
	host->timeout_cnt = 0;
	mod_timer(&host->request_timer, jiffies +
		  msecs_to_jiffies(TIMEOUT_PERIOD));

	jzmmc_command_start(host, host->cmd);
	if (host->data) {
		jzmmc_data_start(host, host->data);

	}
	if (unlikely(test_and_clear_bit(JZMMC_CARD_NEED_INIT, &host->flags)))
		host->cmdat_def &= ~CMDAT_INIT;
}

看注释：

/* * We would get mmc_request_done at last, unless some terrible error * occurs such as intensity rebounding of VDD, that maybe result in * no action to complete the request. */

翻译：我们将最终调用mmc_request_done函数完成请求，除非出现一些严重的错误，例如VDD的强烈反弹，这种错误可能导致请求不能完成。

看来，jzmmc_request这个函数在执行过程中出现的某些错误，会导致请求不能完成。这样mmc_wait_for_cmd函数就等不到命令执行完成而返回错误。由于sdio_irq_thread采用polling的方式来实现伪中断：不断的唤醒ksdioirqd来检查SDIO的CCCR的中断标志位。所以，mmc_wait_for_cmd如果一直等不到命令执行完成，sdio_irq_thread就会一直唤醒ksdioirqd线程。

那么，具体是什么原因导致请求不能完成呢？

我们接着分析jzmmc_request函数，其中jzmmc_command_start函数真正为SDIO host开启了一个mmc 命令，我们看一下它的实现：

static void jzmmc_command_start(struct jzmmc_host *host, struct mmc_command *cmd)
{
	unsigned long cmdat = 0;
	unsigned long imsk;

	if (cmd->flags & MMC_RSP_BUSY)
		cmdat |= CMDAT_BUSY;
	if (request_need_stop(host->mrq))
		cmdat |= CMDAT_AUTO_CMD12;


	switch (mmc_resp_type(cmd)) {
#define _CASE(S,D) case MMC_RSP_##S: cmdat |= CMDAT_RESPONSE_##D; break
		_CASE(R1, R1); 	/* r1 = r5,r6,r7 */
		_CASE(R1B, R1);
		_CASE(R2, R2);
		_CASE(R3, R3); 	/* r3 = r4 */
	default:
		break;
#undef _CASE
	}
	host->cmdat |= cmdat;
	if (!is_pio_mode(host)) {
		imsk = IMASK_TIME_OUT_RES | IMASK_END_CMD_RES;
		enable_msc_irq(host, imsk);
		host->state = STATE_WAITING_RESP;
	}
	msc_writel(host, CMD, cmd->opcode);
	msc_writel(host, ARG, cmd->arg);
	msc_writel(host, CMDAT, host->cmdat);
	msc_writel(host, CTRL, CTRL_START_OP);
	if (is_pio_mode(host)) {
		if (wait_cmd_response(host) < 0) {
			cmd->error = -ETIMEDOUT;
			del_timer_sync(&host->request_timer);
			mmc_request_done(host->mmc, host->mrq);
			return;
		}
		jzmmc_command_done(host, host->cmd);
		if (!host->data) {
			del_timer_sync(&host->request_timer);
			mmc_request_done(host->mmc, host->mrq);
		}
	}
}

我们看到了msc_writel函数，这个函数开始操作寄存器写命令了。正常情况下msc_writel不会执行失败，除非硬件有问题。再次想起硬件同事关于SDIO cmd线的话。跟硬件同事确认发现，SDIO cmd线没有接上拉电阻。分析可能是因为SDIO cmd线没有接上拉电阻导致msc_writel执行失败，进而导致上述jzmmc_request函数执行失败，请求不能完成。

于是，硬件同事接上上拉电阻，一切正常了。

资讯详情

SDIO WiFi调试经验总结

动力学技术KTU1121 USB Type-C 端口保护器的介绍、特性、及应用

SDIO WiFi调试经验总结

动力学技术KTU1121 USB Type-C 端口保护器的介绍、特性、及应用

最近热搜

历史搜索 清除历史记录

历史搜索清除历史记录