Skip to content

Commit

Permalink
Merge branch 'bits/210-gpu' into asahi-wip
Browse files Browse the repository at this point in the history
  • Loading branch information
jannau committed Sep 23, 2024
2 parents 4c2c5cc + c881c40 commit a28ecd7
Show file tree
Hide file tree
Showing 11 changed files with 253 additions and 43 deletions.
30 changes: 27 additions & 3 deletions drivers/gpu/drm/asahi/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,10 +353,10 @@ impl EventChannel::ver {
},
EventMsg::Timeout {
counter,
unk_8,
event_slot,
..
} => match self.gpu.as_ref() {
Some(gpu) => gpu.handle_timeout(counter, event_slot),
Some(gpu) => gpu.handle_timeout(counter, event_slot, unk_8),
None => {
dev_crit!(self.dev, "EventChannel: No GPU manager available!\n")
}
Expand All @@ -374,7 +374,6 @@ impl EventChannel::ver {
vm_slot,
buffer_slot,
counter,
..
} => match self.gpu.as_ref() {
Some(gpu) => {
self.buf_mgr.grow(buffer_slot);
Expand All @@ -384,6 +383,31 @@ impl EventChannel::ver {
dev_crit!(self.dev, "EventChannel: No GPU manager available!\n")
}
},
EventMsg::ChannelError {
error_type,
pipe_type,
event_slot,
event_value,
} => match self.gpu.as_ref() {
Some(gpu) => {
let error_type = match error_type {
0 => ChannelErrorType::MemoryError,
1 => ChannelErrorType::DMKill,
2 => ChannelErrorType::Aborted,
3 => ChannelErrorType::Unk3,
a => ChannelErrorType::Unknown(a),
};
gpu.handle_channel_error(
error_type,
pipe_type,
event_slot,
event_value,
);
}
None => {
dev_crit!(self.dev, "EventChannel: No GPU manager available!\n")
}
},
msg => {
dev_crit!(self.dev, "Unknown event message: {:?}\n", msg);
}
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/asahi/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub(crate) enum DebugFlags {
Debug6 = 54,
Debug7 = 55,

VerboseFaults = 61,
AllowUnknownOverrides = 62,
OopsOnGpuCrash = 63,
}
Expand Down
10 changes: 10 additions & 0 deletions drivers/gpu/drm/asahi/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ impl EventManager {
}
}

/// Returns a reference to the workqueue owning an event.
pub(crate) fn get_owner(
&self,
slot: u32,
) -> Option<Arc<dyn workqueue::WorkQueue + Send + Sync>> {
self.alloc
.with_inner(|inner| inner.owners[slot as usize].as_ref().cloned())
.map(|a| a.clone())
}

/// Fail all commands, used when the GPU crashes.
pub(crate) fn fail_all(&self, error: workqueue::WorkError) {
let mut owners: Vec<Arc<dyn workqueue::WorkQueue + Send + Sync>> = Vec::new();
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/asahi/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ impl File {
ualloc,
ualloc_priv,
vm,
kernel_range,
kernel_range,
_dummy_mapping: dummy_mapping,
},
GFP_KERNEL,
Expand Down
41 changes: 34 additions & 7 deletions drivers/gpu/drm/asahi/fw/channels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,25 +173,34 @@ pub(crate) enum DeviceControlMsg {
Unk0a(Array<DEVICECONTROL_SZ::ver, u8>),
Unk0b(Array<DEVICECONTROL_SZ::ver, u8>),
Unk0c(Array<DEVICECONTROL_SZ::ver, u8>),
#[ver(V >= V13_3)]
Unk0d(Array<DEVICECONTROL_SZ::ver, u8>),
GrowTVBAck {
unk_4: u32,
buffer_slot: u32,
vm_slot: u32,
counter: u32,
subpipe: u32,
__pad: Pad<{ DEVICECONTROL_SZ::ver - 0x14 }>,
halt_count: U64,
__pad: Pad<{ DEVICECONTROL_SZ::ver - 0x1c }>,
},
RecoverChannel {
pipe_type: u32,
work_queue: GpuWeakPointer<super::workqueue::QueueInfo::ver>,
event_value: u32,
__pad: Pad<{ DEVICECONTROL_SZ::ver - 0x10 }>,
},
IdlePowerOff {
val: u32,
__pad: Pad<{ DEVICECONTROL_SZ::ver - 0x4 }>,
},
Unk0e(Array<DEVICECONTROL_SZ::ver, u8>),
Unk0f(Array<DEVICECONTROL_SZ::ver, u8>),
Unk10(Array<DEVICECONTROL_SZ::ver, u8>),
Unk11(Array<DEVICECONTROL_SZ::ver, u8>),
Unk12(Array<DEVICECONTROL_SZ::ver, u8>),
Unk13(Array<DEVICECONTROL_SZ::ver, u8>),
Unk14(Array<DEVICECONTROL_SZ::ver, u8>), // Init?
Unk15(Array<DEVICECONTROL_SZ::ver, u8>), // Enable something
Unk16(Array<DEVICECONTROL_SZ::ver, u8>), // Disable something
#[ver(V >= V13_3)]
Unk17(Array<DEVICECONTROL_SZ::ver, u8>),
DestroyContext {
unk_4: u32,
ctx_23: u8,
Expand Down Expand Up @@ -235,6 +244,17 @@ pub(crate) struct FwCtlMsg {

pub(crate) const EVENT_SZ: usize = 0x34;

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(C, u32)]
#[allow(dead_code)]
pub(crate) enum ChannelErrorType {
MemoryError,
DMKill,
Aborted,
Unk3,
Unknown(u32),
}

#[derive(Debug, Copy, Clone)]
#[repr(C, u32)]
#[allow(dead_code)]
Expand All @@ -257,12 +277,19 @@ pub(crate) enum EventMsg {
vm_slot: u32,
buffer_slot: u32,
counter: u32,
}, // Max discriminant: 0x7
},
ChannelError {
error_type: u32,
pipe_type: u32,
event_slot: u32,
event_value: u32,
},
// Max discriminant: 0x8
}

static_assert!(core::mem::size_of::<EventMsg>() == 4 + EVENT_SZ);

pub(crate) const EVENT_MAX: u32 = 0x7;
pub(crate) const EVENT_MAX: u32 = 0x8;

#[derive(Copy, Clone)]
#[repr(C)]
Expand Down
14 changes: 7 additions & 7 deletions drivers/gpu/drm/asahi/fw/initdata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ pub(crate) mod raw {
#[derive(Debug, Default)]
#[repr(C)]
pub(crate) struct FwStatusFlags {
pub(crate) halt_count: AtomicU32,
__pad0: Pad<0xc>,
pub(crate) halt_count: AtomicU64,
__pad0: Pad<0x8>,
pub(crate) halted: AtomicU32,
__pad1: Pad<0xc>,
pub(crate) resume: AtomicU32,
Expand Down Expand Up @@ -1159,9 +1159,9 @@ pub(crate) mod raw {
pub(crate) unk_10e88: Array<0x188, u8>,
pub(crate) idle_ts: U64,
pub(crate) idle_unk: U64,
pub(crate) unk_11020: u32,
pub(crate) unk_11024: u32,
pub(crate) unk_11028: u32,
pub(crate) progress_check_interval_3d: u32,
pub(crate) progress_check_interval_ta: u32,
pub(crate) progress_check_interval_cl: u32,

#[ver(V >= V13_0B4)]
pub(crate) unk_1102c_0: u32,
Expand Down Expand Up @@ -1202,10 +1202,10 @@ pub(crate) mod raw {
#[ver(V >= V13_3)]
pub(crate) unk_118e0_9c_x: Array<0x8, u8>,

pub(crate) unk_118e0: u32,
pub(crate) cl_context_switch_timeout_ms: u32,

#[ver(V >= V13_0B4)]
pub(crate) unk_118e4_0: u32,
pub(crate) cl_kill_timeout_ms: u32,

pub(crate) cdm_context_store_latency_threshold: u32,
pub(crate) unk_118e8: u32,
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/asahi/fw/workqueue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,15 @@ pub(crate) mod raw {
pub(crate) unk_58: U64,
pub(crate) busy: AtomicU32,
pub(crate) __pad: Pad<0x20>,
#[ver(V >= V13_2 && G < G14X)]
pub(crate) unk_84_0: u32,
pub(crate) unk_84_state: AtomicU32,
pub(crate) unk_88: u32,
pub(crate) error_count: AtomicU32,
pub(crate) unk_8c: u32,
pub(crate) unk_90: u32,
pub(crate) unk_94: u32,
pub(crate) pending: AtomicU32,
pub(crate) unk_9c: u32,
#[ver(V >= V13_2 && G < G14X)]
pub(crate) unk_a0_0: u32,
pub(crate) gpu_context: GpuPointer<'a, super::GpuContextData>,
pub(crate) unk_a8: U64,
#[ver(V >= V13_2 && G < G14X)]
Expand Down
97 changes: 94 additions & 3 deletions drivers/gpu/drm/asahi/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use kernel::{
use crate::alloc::Allocator;
use crate::debug::*;
use crate::driver::{AsahiDevRef, AsahiDevice};
use crate::fw::channels::PipeType;
use crate::fw::channels::{ChannelErrorType, PipeType};
use crate::fw::types::{U32, U64};
use crate::{
alloc, buffer, channel, event, fw, gem, hw, initdata, mem, mmu, queue, regs, workqueue,
Expand Down Expand Up @@ -253,9 +253,17 @@ pub(crate) trait GpuManager: Send + Sync {
/// TODO: Does this actually work?
fn flush_fw_cache(&self) -> Result;
/// Handle a GPU work timeout event.
fn handle_timeout(&self, counter: u32, event_slot: i32);
fn handle_timeout(&self, counter: u32, event_slot: i32, unk: u32);
/// Handle a GPU fault event.
fn handle_fault(&self);
/// Handle a channel error event.
fn handle_channel_error(
&self,
error_type: ChannelErrorType,
pipe_type: u32,
event_slot: u32,
event_value: u32,
);
/// Acknowledge a Buffer grow op.
fn ack_grow(&self, buffer_slot: u32, vm_slot: u32, counter: u32);
/// Wait for the GPU to become idle and power off.
Expand Down Expand Up @@ -1294,7 +1302,7 @@ impl GpuManager for GpuManager::ver {
&self.ids
}

fn handle_timeout(&self, counter: u32, event_slot: i32) {
fn handle_timeout(&self, counter: u32, event_slot: i32, unk: u32) {
dev_err!(self.dev, " (\\________/) \n");
dev_err!(self.dev, " | | \n");
dev_err!(self.dev, "'.| \\ , / |.'\n");
Expand All @@ -1304,6 +1312,7 @@ impl GpuManager for GpuManager::ver {
dev_err!(self.dev, "** GPU timeout nya~!!!!! **\n");
dev_err!(self.dev, " Event slot: {}\n", event_slot);
dev_err!(self.dev, " Timeout count: {}\n", counter);
dev_err!(self.dev, " Unk: {}\n", unk);

// If we have fault info, consider it a fault.
let error = match self.get_fault_info() {
Expand All @@ -1330,13 +1339,95 @@ impl GpuManager for GpuManager::ver {
self.recover();
}

fn handle_channel_error(
&self,
error_type: ChannelErrorType,
pipe_type: u32,
event_slot: u32,
event_value: u32,
) {
dev_err!(self.dev, " (\\________/) \n");
dev_err!(self.dev, " | | \n");
dev_err!(self.dev, "'.| \\ , / |.'\n");
dev_err!(self.dev, "--| / (( \\ |--\n");
dev_err!(self.dev, ".'| _-_- |'.\n");
dev_err!(self.dev, " |________| \n");
dev_err!(self.dev, "GPU channel error nya~!!!!!\n");
dev_err!(self.dev, " Error type: {:?}\n", error_type);
dev_err!(self.dev, " Pipe type: {}\n", pipe_type);
dev_err!(self.dev, " Event slot: {}\n", event_slot);
dev_err!(self.dev, " Event value: {:#x?}\n", event_value);

self.event_manager.mark_error(
event_slot,
event_value,
workqueue::WorkError::ChannelError(error_type),
);

let wq = match self.event_manager.get_owner(event_slot) {
Some(wq) => wq,
None => {
dev_err!(self.dev, "Workqueue not found for this event slot!\n");
return;
}
};

let wq = match wq.as_any().downcast_ref::<workqueue::WorkQueue::ver>() {
Some(wq) => wq,
None => {
dev_crit!(self.dev, "GpuManager mismatched with WorkQueue!\n");
return;
}
};

if debug_enabled(DebugFlags::VerboseFaults) {
wq.dump_info();
}

let dc = fw::channels::DeviceControlMsg::ver::RecoverChannel {
pipe_type,
work_queue: wq.info_pointer(),
event_value,
__pad: Default::default(),
};

mod_dev_dbg!(self.dev, "Recover Channel command: {:?}\n", &dc);
let mut txch = self.tx_channels.lock();

let token = txch.device_control.send(&dc);
{
let mut guard = self.rtkit.lock();
let rtk = guard.as_mut().unwrap();
if rtk
.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_DEVCTRL)
.is_err()
{
dev_err!(self.dev, "Failed to send Recover Channel command\n");
}
}

if txch.device_control.wait_for(token).is_err() {
dev_err!(self.dev, "Timed out waiting for Recover Channel command\n");
}

if debug_enabled(DebugFlags::VerboseFaults) {
wq.dump_info();
}
}

fn ack_grow(&self, buffer_slot: u32, vm_slot: u32, counter: u32) {
let halt_count = self
.initdata
.fw_status
.with(|raw, _inner| raw.flags.halt_count.load(Ordering::Relaxed));

let dc = fw::channels::DeviceControlMsg::ver::GrowTVBAck {
unk_4: 1,
buffer_slot,
vm_slot,
counter,
subpipe: 0, // TODO
halt_count: U64(halt_count),
__pad: Default::default(),
};

Expand Down
10 changes: 5 additions & 5 deletions drivers/gpu/drm/asahi/initdata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -704,9 +704,9 @@ impl<'a> InitDataBuilder::ver<'a> {
unk_903c: 0,
fault_control: *crate::fault_control.read(),
do_init: 1,
unk_11020: 40,
unk_11024: 10,
unk_11028: 250,
progress_check_interval_3d: 40,
progress_check_interval_ta: 10,
progress_check_interval_cl: 250,
#[ver(V >= V13_0B4)]
unk_1102c_0: 1,
#[ver(V >= V13_0B4)]
Expand All @@ -718,9 +718,9 @@ impl<'a> InitDataBuilder::ver<'a> {
idle_off_delay_ms: AtomicU32::new(pwr.idle_off_delay_ms),
fender_idle_off_delay_ms: pwr.fender_idle_off_delay_ms,
fw_early_wake_timeout_ms: pwr.fw_early_wake_timeout_ms,
unk_118e0: 40,
cl_context_switch_timeout_ms: 40,
#[ver(V >= V13_0B4)]
unk_118e4_0: 50,
cl_kill_timeout_ms: 50,
#[ver(V >= V13_0B4)]
unk_11edc: 0,
#[ver(V >= V13_0B4)]
Expand Down
Loading

0 comments on commit a28ecd7

Please sign in to comment.