diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 5ec2cbda2ba9323952f815feeed22366568428a8..46acdfb4a03a221742a99ebda37968d378d3a3ff 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -25,6 +25,8 @@ #include "i915_drv.h" #include "intel_uc.h" +#include + /** * DOC: GuC-based command submission * @@ -522,8 +524,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); - trace_i915_gem_request_in(rq, 0); - spin_lock_irqsave(&client->wq_lock, flags); guc_wq_item_append(client, rq); @@ -542,10 +542,99 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) static void i915_guc_submit(struct drm_i915_gem_request *rq) { - i915_gem_request_submit(rq); + __i915_gem_request_submit(rq); __i915_guc_submit(rq); } +static void nested_enable_signaling(struct drm_i915_gem_request *rq) +{ + /* If we use dma_fence_enable_sw_signaling() directly, lockdep + * detects an ordering issue between the fence lockclass and the + * global_timeline. This circular dependency can only occur via 2 + * different fences (but same fence lockclass), so we use the nesting + * annotation here to prevent the warn, equivalent to the nesting + * inside i915_gem_request_submit() for when we also enable the + * signaler. + */ + + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags)) + return; + + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); + trace_dma_fence_enable_signal(&rq->fence); + + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); + intel_engine_enable_signaling(rq); + spin_unlock(&rq->lock); +} + +static bool i915_guc_dequeue(struct intel_engine_cs *engine) +{ + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *last = port[0].request; + unsigned long flags; + struct rb_node *rb; + bool submit = false; + + spin_lock_irqsave(&engine->timeline->lock, flags); + rb = engine->execlist_first; + while (rb) { + struct drm_i915_gem_request *rq = + rb_entry(rb, typeof(*rq), priotree.node); + + if (last && rq->ctx != last->ctx) { + if (port != engine->execlist_port) + break; + + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + port++; + } + + rb = rb_next(rb); + rb_erase(&rq->priotree.node, &engine->execlist_queue); + RB_CLEAR_NODE(&rq->priotree.node); + rq->priotree.priority = INT_MAX; + + trace_i915_gem_request_in(rq, port - engine->execlist_port); + i915_guc_submit(rq); + last = rq; + submit = true; + } + if (submit) { + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + engine->execlist_first = rb; + } + spin_unlock_irqrestore(&engine->timeline->lock, flags); + + return submit; +} + +static void i915_guc_irq_handler(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *rq; + bool submit; + + do { + rq = port[0].request; + while (rq && i915_gem_request_completed(rq)) { + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); + port[0].request = port[1].request; + port[1].request = NULL; + rq = port[0].request; + } + + submit = false; + if (!port[1].request) + submit = i915_guc_dequeue(engine); + } while (submit); +} + /* * Everything below here is concerned with setup & teardown, and is * therefore not part of the somewhat time-critical batch-submission @@ -987,18 +1076,21 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) guc_init_doorbell_hw(guc); /* Take over from manual control of ELSP (execlists) */ - for_each_engine(engine, dev_priv, id) { - engine->submit_request = i915_guc_submit; - engine->schedule = NULL; - } - guc_interrupts_capture(dev_priv); - /* Replay the current set of previously submitted requests */ for_each_engine(engine, dev_priv, id) { const int wqi_size = sizeof(struct guc_wq_item); struct drm_i915_gem_request *rq; + /* The tasklet was initialised by execlists, and may be in + * a state of flux (across a reset) and so we just want to + * take over the callback without changing any other state + * in the tasklet. + */ + engine->irq_tasklet.func = i915_guc_irq_handler; + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + + /* Replay the current set of previously submitted requests */ spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) { guc_client_update_wq_rsvd(client, wqi_size); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ea92af2291334ea987bd728cee07b7dd281c9e04..6e716399ea44e02bbc2e1fa8c40a967848a9c33e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1355,13 +1355,20 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, static __always_inline void gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) - notify_ring(engine); + bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet_hi_schedule(&engine->irq_tasklet); + tasklet = true; } + + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { + notify_ring(engine); + tasklet |= i915.enable_guc_submission; + } + + if (tasklet) + tasklet_hi_schedule(&engine->irq_tasklet); } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 89f38e7def9f7fc2fd9a4a28883601ff25a097b7..e664dbf6bd11edb02bd55843859ab4fb0c8404c0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1159,7 +1159,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - if (!execlists_elsp_idle(engine)) { + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", engine->name, port_seqno(&engine->execlist_port[0]), @@ -1244,9 +1244,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, request->ring->last_retired_head = -1; intel_ring_update_space(request->ring); - if (i915.enable_guc_submission) - return; - /* Catch up with any missed context-switch interrupts */ if (request->ctx != port[0].request->ctx) { i915_gem_request_put(port[0].request);