Commit c78f837a authored by Mikko Perttunen's avatar Mikko Perttunen Committed by Thierry Reding
Browse files

gpu: host1x: Add no-recovery mode



Add a new property for jobs to enable or disable recovery i.e.
CPU increments of syncpoints to max value on job timeout. This
allows for a more solid model for hanged jobs, where userspace
doesn't need to guess if a syncpoint increment happened because
the job completed, or because job timeout was triggered.

On job timeout, we stop the channel, NOP all future jobs on the
channel using the same syncpoint, mark the syncpoint as locked
and resume the channel from the next job, if any.

The future jobs are NOPed, since because we don't do the CPU
increments, the value of the syncpoint is no longer synchronized,
and any waiters would become confused if a future job incremented
the syncpoint. The syncpoint is marked locked to ensure that any
future jobs cannot increment the syncpoint either, until the
application has recognized the situation and reallocated the
syncpoint.

Signed-off-by: default avatarMikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: default avatarThierry Reding <treding@nvidia.com>
parent 687db220
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -201,6 +201,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
	job->client = client;
	job->class = client->class;
	job->serialize = true;
	job->syncpt_recovery = true;

	/*
	 * Track referenced BOs so that they can be unreferenced after the
+52 −6
Original line number Diff line number Diff line
@@ -312,10 +312,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
	bool signal = false;
	struct host1x_job *job, *n;

	/* If CDMA is stopped, queue is cleared and we can return */
	if (!cdma->running)
		return;

	/*
	 * Walk the sync queue, reading the sync point registers as necessary,
	 * to consume as many sync queue entries as possible without blocking
@@ -324,7 +320,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
		struct host1x_syncpt *sp = job->syncpt;

		/* Check whether this syncpt has completed, and bail if not */
		if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) {
		if (!host1x_syncpt_is_expired(sp, job->syncpt_end) &&
		    !job->cancelled) {
			/* Start timer on next pending syncpt */
			if (job->timeout)
				cdma_start_timer_locked(cdma, job);
@@ -413,8 +410,11 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
	else
		restart_addr = cdma->last_pos;

	if (!job)
		goto resume;

	/* do CPU increments for the remaining syncpts */
	if (job) {
	if (job->syncpt_recovery) {
		dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
			__func__);

@@ -433,8 +433,44 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,

		dev_dbg(dev, "%s: finished sync_queue modification\n",
			__func__);
	} else {
		struct host1x_job *failed_job = job;

		host1x_job_dump(dev, job);

		host1x_syncpt_set_locked(job->syncpt);
		failed_job->cancelled = true;

		list_for_each_entry_continue(job, &cdma->sync_queue, list) {
			unsigned int i;

			if (job->syncpt != failed_job->syncpt)
				continue;

			for (i = 0; i < job->num_slots; i++) {
				unsigned int slot = (job->first_get/8 + i) %
						    HOST1X_PUSHBUFFER_SLOTS;
				u32 *mapped = cdma->push_buffer.mapped;

				/*
				 * Overwrite opcodes with 0 word writes
				 * to offset 0xbad. This does nothing but
				 * has a easily detected signature in debug
				 * traces.
				 */
				mapped[2*slot+0] = 0x1bad0000;
				mapped[2*slot+1] = 0x1bad0000;
			}

			job->cancelled = true;
		}

		wmb();

		update_cdma_locked(cdma);
	}

resume:
	/* roll back DMAGET and start up channel again */
	host1x_hw_cdma_resume(host1x, cdma, restart_addr);
}
@@ -490,6 +526,16 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)

	mutex_lock(&cdma->lock);

	/*
	 * Check if syncpoint was locked due to previous job timeout.
	 * This needs to be done within the cdma lock to avoid a race
	 * with the timeout handler.
	 */
	if (job->syncpt->locked) {
		mutex_unlock(&cdma->lock);
		return -EPERM;
	}

	if (job->timeout) {
		/* init state on first submit with timeout value */
		if (!cdma->timeout.initialized) {
+1 −1
Original line number Diff line number Diff line
@@ -191,7 +191,7 @@ static int channel_submit(struct host1x_job *job)
	/* schedule a submit complete interrupt */
	err = host1x_intr_add_action(host, sp, syncval,
				     HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
				     completed_waiter, NULL);
				     completed_waiter, &job->waiter);
	completed_waiter = NULL;
	WARN(err, "Failed to set submit complete interrupt");

+4 −0
Original line number Diff line number Diff line
@@ -79,6 +79,10 @@ static void job_free(struct kref *ref)
{
	struct host1x_job *job = container_of(ref, struct host1x_job, ref);

	if (job->waiter)
		host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
				    job->waiter, false);

	if (job->syncpt)
		host1x_syncpt_put(job->syncpt);

+2 −0
Original line number Diff line number Diff line
@@ -407,6 +407,8 @@ static void syncpt_release(struct kref *ref)

	atomic_set(&sp->max_val, host1x_syncpt_read(sp));

	sp->locked = false;

	mutex_lock(&sp->host->syncpt_mutex);

	host1x_syncpt_base_free(sp->base);
Loading