Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 146 additions & 33 deletions cmd/host/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,15 @@
//! # etc...
//! ```

use anyhow::{Result, anyhow};
use anyhow::{Result, bail};
use chrono::DateTime;
use clap::Parser;

use humility::{
core::Core,
hubris::HubrisArchive,
log::{Logger, info, warn},
reflect,
reflect::Load,
reflect::{self, Load, Value},
};
use humility_cli::{ExecutionContext, humility_cmd};
use humility_doppel as doppel;
Expand Down Expand Up @@ -154,9 +153,24 @@ static SEPARATE_HOST_BOOT_FAIL_NAME: &str = "LAST_HOST_BOOT_FAIL";

static SEPARATE_LAST_HOST_PANIC_NAME: &str = "LAST_HOST_PANIC";

static HOST_STATE_BUF_NAME: &str =
// Pre-2544
static HOST_STATE_BUF_NAME_1: &str =
"task_host_sp_comms::ServerImpl::claim_static_resources::BUFS";

// Post-2544
static HOST_STATE_BUF_NAME_2: &str =
"<task_host_sp_comms::ServerImpl>::claim_static_resources::BUFS";

static PACKRAT_BUF_NAME: &str = "task_packrat::main::BUFS";

// packrat field names
const PACKRAT_LAST_PANIC_PAYLOAD: &str =
"cell.value.host_info.host_panic_payload";
const PACKRAT_LAST_PANIC_STATE: &str = "cell.value.host_info.host_panic_state";
const PACKRAT_BOOT_FAIL_PAYLOAD: &str =
"cell.value.host_info.host_panic_payload";
const PACKRAT_BOOT_FAIL_STATE: &str = "cell.value.host_info.host_panic_state";

/// Mirror type of the internal buf struct in `host_sp_comms`. Must be kept in
/// (partial) sync with that structure (fields that are present need to match,
/// other fields can be ignored).
Expand Down Expand Up @@ -229,48 +243,147 @@ fn print_escaped_ascii(mut bytes: &[u8]) {
println!("{buf}");
}

/// Try getting last panic/boot fail from packrat, where it moved to in
/// https://github.com/oxidecomputer/hubris/pull/2518.
fn host_bootinfo_packrat(
hubris: &HubrisArchive,
core: &mut dyn Core,
payload: &str,
state: &str,
) -> Result<Option<Vec<u8>>> {
// If this variable doesn't exist, we're probably on a REALLY old version
// of hubris, but don't return the error here, as it means we'll still want
// to check the host-sp-comms's vars.
let lookup = hubris.lookup_qualified_variable(PACKRAT_BUF_NAME);
let Ok(buf_ty) = lookup else {
return Ok(None);
};

// We do ? the error here, because errors while loading indicate some
// kind of transport error.
let buf: Value = humility::reflect::read_variable(hubris, core, buf_ty)?;

// Again, it's possible the image DOES have the packrat buf, but NOT this
// field (either it is older than #2518, or it is a hostless SP), so treat
// errors here as "no data".
let res_payload: Result<Vec<u8>> = buf.field(payload);
let res_state: Result<Option<Value>> = buf.field(state);

let (Ok(mut payload), Ok(state)) = (res_payload, res_state) else {
return Ok(None);
};

// Cool, cool, we have the fields! Now check if the state is in a place
// where there is something reasonable to extract. If the variables DO
// exist, but DON'T have data, return an empty vec.
let Some(state) = state else {
return Ok(Some(vec![]));
};

let total: u32 = state.field("total_length")?;
payload.truncate(total as usize);

Ok(Some(payload))
}

fn host_boot_fail_spcomms_old(
hubris: &HubrisArchive,
core: &mut dyn Core,
) -> Result<Option<Vec<u8>>> {
read_uqvar(hubris, core, SEPARATE_HOST_BOOT_FAIL_NAME)
}

fn host_boot_fail_spcomms_new(
hubris: &HubrisArchive,
core: &mut dyn Core,
base_buf: &str,
) -> Result<Option<Vec<u8>>> {
let buf = read_qualified_state_buf(hubris, core, base_buf)?;
let maybe_bf = buf.map(|b| b.last_boot_fail);
Ok(maybe_bf)
}

/// Try getting boot fail from packrat, where it moved to in
/// https://github.com/oxidecomputer/hubris/pull/2518.
fn host_boot_fail(hubris: &HubrisArchive, core: &mut dyn Core) -> Result<()> {
// Try old name:
let d = read_uqvar(hubris, core, SEPARATE_HOST_BOOT_FAIL_NAME)?;
if let Some(d) = d {
print_escaped_ascii(&d);
return Ok(());
}
// Try new name
let buf = read_qualified_state_buf(hubris, core, HOST_STATE_BUF_NAME)?
.ok_or_else(|| {
anyhow!(
"Could not find host boot variables under any known name; \
is this a Gimlet image?"
// Work through the different places "boot fail" info could be hiding
let sources: [fn(&HubrisArchive, &mut dyn Core) -> _; _] = [
|h, c| {
host_bootinfo_packrat(
h,
c,
PACKRAT_BOOT_FAIL_PAYLOAD,
PACKRAT_BOOT_FAIL_STATE,
)
})?;
},
host_boot_fail_spcomms_old,
|h, c| host_boot_fail_spcomms_new(h, c, HOST_STATE_BUF_NAME_1),
|h, c| host_boot_fail_spcomms_new(h, c, HOST_STATE_BUF_NAME_2),
];

for source in sources {
if let Some(bootfail) = source(hubris, core)? {
print_escaped_ascii(&bootfail);
return Ok(());
}
}

print_escaped_ascii(&buf.last_boot_fail[..]);
bail!(
"Could not find host boot variables under any known name; is this a \
Gimlet image?"
)
}

Ok(())
/// In host-sp-comms, with the legacy-ish name
fn host_last_panic_spcomms_old(
hubris: &HubrisArchive,
core: &mut dyn Core,
) -> Result<Option<Vec<u8>>> {
read_uqvar(hubris, core, SEPARATE_LAST_HOST_PANIC_NAME)
}

/// In host-sp-comms, with the modern-ish name
fn host_last_panic_spcomms_new(
hubris: &HubrisArchive,
core: &mut dyn Core,
base_buf: &str,
) -> Result<Option<Vec<u8>>> {
// Try new name:
let buf = read_qualified_state_buf(hubris, core, base_buf)?;
let maybe_panic = buf.map(|b| b.last_panic);
Ok(maybe_panic)
}

fn host_last_panic(
hubris: &HubrisArchive,
core: &mut dyn Core,
log: &Logger,
) -> Result<()> {
// Try original name:
let d = read_uqvar(hubris, core, SEPARATE_LAST_HOST_PANIC_NAME)?;
if let Some(d) = d {
return print_panic(d, log);
}

// Try new name:
let buf = read_qualified_state_buf(hubris, core, HOST_STATE_BUF_NAME)?
.ok_or_else(|| {
anyhow!(
"Could not find host boot variables under any known name; \
is this a Gimlet image?"
// Work through the different places "last panic" info could be hiding
let sources: [fn(&HubrisArchive, &mut dyn Core) -> _; _] = [
|h, c| {
host_bootinfo_packrat(
h,
c,
PACKRAT_LAST_PANIC_PAYLOAD,
PACKRAT_LAST_PANIC_STATE,
)
})?;
},
host_last_panic_spcomms_old,
|h, c| host_last_panic_spcomms_new(h, c, HOST_STATE_BUF_NAME_1),
|h, c| host_last_panic_spcomms_new(h, c, HOST_STATE_BUF_NAME_2),
];
Comment on lines +362 to +375

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this in order of newest to oldest?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old code did basically:

  • host_last_panic_spcomms_old
  • host_last_panic_spcomms_new(h, c, HOST_STATE_BUF_NAME_1)

I made the executive decision to try packrat first, then the new-new spcomms last. Open to re-order this, it shouldn't be order dependent.


for source in sources {
if let Some(panic) = source(hubris, core)? {
return print_panic(panic, log);
}
}

print_panic(buf.last_panic, log)
bail!(
"Could not find host boot variables under any known name; is this a \
Gimlet image?"
)
}

fn print_panic(d: Vec<u8>, log: &Logger) -> Result<()> {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
humility: attached to dump
humility host failed: read of 24964 bytes failed: could not find addr 0x24021ff4 in memory
humility host failed: read of 8264 bytes failed: could not find addr 0x24004380 in memory
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
humility: attached to dump
humility host failed: read of 24964 bytes failed: could not find addr 0x24021c1c in memory
humility host failed: read of 8264 bytes failed: could not find addr 0x24004380 in memory
Loading