Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/ros2_medkit_fault_manager/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
find_package(ament_cmake_gtest REQUIRED)
find_package(launch_testing_ament_cmake REQUIRED)
find_package(std_msgs REQUIRED)

set(ament_cmake_clang_format_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../../.clang-format")
list(APPEND AMENT_LINT_AUTO_EXCLUDE ament_cmake_uncrustify ament_cmake_cpplint ament_cmake_clang_tidy)
Expand Down Expand Up @@ -154,7 +155,7 @@ if(BUILD_TESTING)
# Snapshot capture tests
ament_add_gtest(test_snapshot_capture test/test_snapshot_capture.cpp)
target_link_libraries(test_snapshot_capture fault_manager_lib)
medkit_target_dependencies(test_snapshot_capture rclcpp ros2_medkit_msgs)
medkit_target_dependencies(test_snapshot_capture rclcpp ros2_medkit_msgs std_msgs)
medkit_set_test_domain(test_snapshot_capture)

# Rosbag capture tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ struct SnapshotData {
int64_t captured_at_ns{0};
};

/// Compact freeze-frame captured when a fault confirms: a single JSON object mapping
/// each captured topic to its latest value at confirmation time. Unlike per-topic
/// snapshots, a freeze-frame is keyed by fault_code (one row per code) and is RETAINED
/// across clear_fault, so the confirmed-state record persists after acknowledgement.
struct FreezeFrameData {
std::string fault_code;
std::string data; ///< Compact JSON object: {"<topic>": <value>, ...}
int64_t captured_at_ns{0};
};

/// Rosbag file metadata for time-window recording
struct RosbagFileInfo {
std::string fault_code;
Expand Down Expand Up @@ -189,6 +199,17 @@ class FaultStorage {
virtual std::vector<SnapshotData> get_snapshots(const std::string & fault_code,
const std::string & topic_filter = "") const = 0;

/// Store the compact freeze-frame captured for a fault (JSON dict of topic values).
/// Keyed by fault_code: a later capture for the same code replaces the frame. The frame
/// is retained across clear_fault so the confirmed-state record survives acknowledgement.
/// @param frame The freeze-frame to store
virtual void store_freeze_frame(const FreezeFrameData & frame) = 0;

/// Get the freeze-frame captured for a fault, if any.
/// @param fault_code The fault code to look up
/// @return The freeze-frame if one was captured, nullopt otherwise
virtual std::optional<FreezeFrameData> get_freeze_frame(const std::string & fault_code) const = 0;
Comment thread
mfaferek93 marked this conversation as resolved.

/// Store rosbag file metadata for a fault
/// @param info The rosbag file info to store (replaces any existing entry for fault_code)
virtual void store_rosbag_file(const RosbagFileInfo & info) = 0;
Expand Down Expand Up @@ -267,6 +288,9 @@ class InMemoryFaultStorage : public FaultStorage {
std::vector<SnapshotData> get_snapshots(const std::string & fault_code,
const std::string & topic_filter = "") const override;

void store_freeze_frame(const FreezeFrameData & frame) override;
std::optional<FreezeFrameData> get_freeze_frame(const std::string & fault_code) const override;

void store_rosbag_file(const RosbagFileInfo & info) override;
std::optional<RosbagFileInfo> get_rosbag_file(const std::string & fault_code) const override;
bool delete_rosbag_file(const std::string & fault_code) override;
Expand All @@ -283,7 +307,8 @@ class InMemoryFaultStorage : public FaultStorage {
mutable std::mutex mutex_;
std::map<std::string, FaultState> faults_;
std::vector<SnapshotData> snapshots_;
std::map<std::string, RosbagFileInfo> rosbag_files_; ///< fault_code -> rosbag info
std::map<std::string, FreezeFrameData> freeze_frames_; ///< fault_code -> freeze-frame (retained across clear)
std::map<std::string, RosbagFileInfo> rosbag_files_; ///< fault_code -> rosbag info
DebounceConfig config_;
size_t max_snapshots_per_fault_{0}; ///< 0 = unlimited
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <string>
#include <vector>

#include <nlohmann/json.hpp>

#include "rclcpp/rclcpp.hpp"
#include "ros2_medkit_fault_manager/fault_storage.hpp"

Expand Down Expand Up @@ -166,12 +168,16 @@ class SnapshotCapture {
std::vector<std::string> resolve_topics(const std::string & fault_code) const;

/// Capture a single topic on-demand (creates temporary subscription)
/// On success also records the captured value into @p freeze_frame under the topic key.
/// @return true if capture was successful
bool capture_topic_on_demand(const std::string & fault_code, const std::string & topic);
bool capture_topic_on_demand(const std::string & fault_code, const std::string & topic,
nlohmann::json & freeze_frame);

/// Capture a topic from background cache
/// On success also records the cached value into @p freeze_frame under the topic key.
/// @return true if data was available in cache
bool capture_topic_from_cache(const std::string & fault_code, const std::string & topic);
bool capture_topic_from_cache(const std::string & fault_code, const std::string & topic,
nlohmann::json & freeze_frame);

/// Initialize background subscriptions for all configured topics
void init_background_subscriptions();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class SqliteFaultStorage : public FaultStorage {
std::vector<SnapshotData> get_snapshots(const std::string & fault_code,
const std::string & topic_filter = "") const override;

void store_freeze_frame(const FreezeFrameData & frame) override;
std::optional<FreezeFrameData> get_freeze_frame(const std::string & fault_code) const override;

void store_rosbag_file(const RosbagFileInfo & info) override;
std::optional<RosbagFileInfo> get_rosbag_file(const std::string & fault_code) const override;
bool delete_rosbag_file(const std::string & fault_code) override;
Expand Down
14 changes: 14 additions & 0 deletions src/ros2_medkit_fault_manager/src/fault_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,20 @@ std::vector<SnapshotData> InMemoryFaultStorage::get_snapshots(const std::string
return result;
}

void InMemoryFaultStorage::store_freeze_frame(const FreezeFrameData & frame) {
std::lock_guard<std::mutex> lock(mutex_);
freeze_frames_[frame.fault_code] = frame;
}

std::optional<FreezeFrameData> InMemoryFaultStorage::get_freeze_frame(const std::string & fault_code) const {
std::lock_guard<std::mutex> lock(mutex_);
auto it = freeze_frames_.find(fault_code);
if (it == freeze_frames_.end()) {
return std::nullopt;
}
return it->second;
}

void InMemoryFaultStorage::store_rosbag_file(const RosbagFileInfo & info) {
std::lock_guard<std::mutex> lock(mutex_);

Expand Down
35 changes: 31 additions & 4 deletions src/ros2_medkit_fault_manager/src/snapshot_capture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,20 +124,24 @@ void SnapshotCapture::capture(const std::string & fault_code) {

auto topics = resolve_topics(fault_code);
if (topics.empty()) {
// Unconfigured fault code: no capture set resolved, so no freeze-frame is written and
// no subscriptions are created (no overhead). get_freeze_frame() stays empty for it.
RCLCPP_DEBUG(node_->get_logger(), "No topics configured for fault '%s'", fault_code.c_str());
return;
Comment thread
mfaferek93 marked this conversation as resolved.
}

RCLCPP_INFO(node_->get_logger(), "Capturing snapshots for fault '%s' (%zu topics)", fault_code.c_str(),
topics.size());

// Accumulate a compact dict of captured topic values alongside the per-topic snapshots.
nlohmann::json freeze_frame = nlohmann::json::object();
size_t captured_count = 0;
for (const auto & topic : topics) {
bool success = false;
if (config_.background_capture) {
success = capture_topic_from_cache(fault_code, topic);
success = capture_topic_from_cache(fault_code, topic, freeze_frame);
} else {
success = capture_topic_on_demand(fault_code, topic);
success = capture_topic_on_demand(fault_code, topic, freeze_frame);
}

if (success) {
Expand All @@ -147,6 +151,15 @@ void SnapshotCapture::capture(const std::string & fault_code) {

RCLCPP_INFO(node_->get_logger(), "Captured %zu/%zu snapshots for fault '%s'", captured_count, topics.size(),
fault_code.c_str());

// Persist the compact freeze-frame keyed by fault_code (retained across clear_fault).
// Written whenever the fault has a configured capture set, even if nothing was publishing
// at confirmation time (an empty object then records that capture ran).
FreezeFrameData frame;
frame.fault_code = fault_code;
frame.data = freeze_frame.dump();
frame.captured_at_ns = get_wall_clock_ns();
storage_->store_freeze_frame(frame);
Comment thread
mfaferek93 marked this conversation as resolved.
}

std::vector<std::string> SnapshotCapture::resolve_topics(const std::string & fault_code) const {
Expand Down Expand Up @@ -174,7 +187,8 @@ std::vector<std::string> SnapshotCapture::resolve_topics(const std::string & fau
return {};
}

bool SnapshotCapture::capture_topic_on_demand(const std::string & fault_code, const std::string & topic) {
bool SnapshotCapture::capture_topic_on_demand(const std::string & fault_code, const std::string & topic,
nlohmann::json & freeze_frame) {
// Get topic type
std::string msg_type = get_topic_type(topic);
if (msg_type.empty()) {
Expand Down Expand Up @@ -286,6 +300,9 @@ bool SnapshotCapture::capture_topic_on_demand(const std::string & fault_code, co

storage_->store_snapshot(snapshot);

// Record the value into the compact freeze-frame dict under the topic key.
freeze_frame[topic] = std::move(json_data);

RCLCPP_DEBUG(node_->get_logger(), "Captured snapshot from '%s' for fault '%s'", topic.c_str(), fault_code.c_str());
return true;

Expand All @@ -300,7 +317,8 @@ bool SnapshotCapture::capture_topic_on_demand(const std::string & fault_code, co
return false;
}

bool SnapshotCapture::capture_topic_from_cache(const std::string & fault_code, const std::string & topic) {
bool SnapshotCapture::capture_topic_from_cache(const std::string & fault_code, const std::string & topic,
nlohmann::json & freeze_frame) {
std::lock_guard<std::mutex> lock(cache_mutex_);

auto it = message_cache_.find(topic);
Expand All @@ -321,6 +339,15 @@ bool SnapshotCapture::capture_topic_from_cache(const std::string & fault_code, c

storage_->store_snapshot(snapshot);

// Record the cached value into the compact freeze-frame dict. The cache holds a serialized
// JSON string; parse it back so the frame nests structured values (fall back to the raw
// string only if it somehow fails to parse).
try {
freeze_frame[topic] = nlohmann::json::parse(cached.data);
} catch (const nlohmann::json::parse_error &) {
freeze_frame[topic] = cached.data;
}

RCLCPP_DEBUG(node_->get_logger(), "Captured snapshot from cache for '%s' (fault '%s')", topic.c_str(),
fault_code.c_str());
return true;
Expand Down
50 changes: 50 additions & 0 deletions src/ros2_medkit_fault_manager/src/sqlite_fault_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,23 @@ void SqliteFaultStorage::initialize_schema() {
throw std::runtime_error("Failed to create snapshots table: " + error);
}

// Create freeze_frames table: one compact JSON dict of captured topic values per fault
// code. Unlike snapshots, freeze frames are keyed by fault_code and are NOT removed on
// clear_fault, so the confirmed-state record is retained after acknowledgement.
const char * create_freeze_frames_table_sql = R"(
CREATE TABLE IF NOT EXISTS freeze_frames (
fault_code TEXT PRIMARY KEY,
data TEXT NOT NULL,
captured_at_ns INTEGER NOT NULL
);
)";

if (sqlite3_exec(db_, create_freeze_frames_table_sql, nullptr, nullptr, &err_msg) != SQLITE_OK) {
std::string error = err_msg ? err_msg : "Unknown error";
sqlite3_free(err_msg);
throw std::runtime_error("Failed to create freeze_frames table: " + error);
}

// Create rosbag_files table for storing time-window bag file metadata
const char * create_rosbag_files_table_sql = R"(
CREATE TABLE IF NOT EXISTS rosbag_files (
Expand Down Expand Up @@ -795,6 +812,39 @@ std::vector<SnapshotData> SqliteFaultStorage::get_snapshots(const std::string &
return result;
}

void SqliteFaultStorage::store_freeze_frame(const FreezeFrameData & frame) {
std::lock_guard<std::mutex> lock(mutex_);

// Keyed by fault_code (PRIMARY KEY): a re-confirm replaces the previous frame.
SqliteStatement stmt(db_,
"INSERT OR REPLACE INTO freeze_frames (fault_code, data, captured_at_ns) "
"VALUES (?, ?, ?)");
stmt.bind_text(1, frame.fault_code);
stmt.bind_text(2, frame.data);
stmt.bind_int64(3, frame.captured_at_ns);

if (stmt.step() != SQLITE_DONE) {
throw std::runtime_error(std::string("Failed to store freeze frame: ") + sqlite3_errmsg(db_));
}
}

std::optional<FreezeFrameData> SqliteFaultStorage::get_freeze_frame(const std::string & fault_code) const {
std::lock_guard<std::mutex> lock(mutex_);

SqliteStatement stmt(db_, "SELECT fault_code, data, captured_at_ns FROM freeze_frames WHERE fault_code = ?");
stmt.bind_text(1, fault_code);

if (stmt.step() != SQLITE_ROW) {
return std::nullopt;
}

FreezeFrameData frame;
frame.fault_code = stmt.column_text(0);
frame.data = stmt.column_text(1);
frame.captured_at_ns = stmt.column_int64(2);
return frame;
}

void SqliteFaultStorage::store_rosbag_file(const RosbagFileInfo & info) {
std::lock_guard<std::mutex> lock(mutex_);

Expand Down
70 changes: 70 additions & 0 deletions src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1397,6 +1397,76 @@ TEST(InMemorySnapshotLimitTest, UnlimitedWhenZero) {
EXPECT_EQ(result.size(), 20u);
}

// --- InMemoryFaultStorage freeze-frame tests ---

TEST(InMemoryFreezeFrameTest, StoreAndRetrieve) {
InMemoryFaultStorage storage;

ros2_medkit_fault_manager::FreezeFrameData frame;
frame.fault_code = "PLC_PRESSURE_HIGH";
frame.data = R"({"/plc/pressure":{"data":8.4}})";
frame.captured_at_ns = 1000;
storage.store_freeze_frame(frame);

auto retrieved = storage.get_freeze_frame("PLC_PRESSURE_HIGH");
ASSERT_TRUE(retrieved.has_value());
EXPECT_EQ(retrieved->data, frame.data);
EXPECT_EQ(retrieved->captured_at_ns, 1000);
}

TEST(InMemoryFreezeFrameTest, AbsentForUnknownFault) {
InMemoryFaultStorage storage;
EXPECT_FALSE(storage.get_freeze_frame("NEVER_CAPTURED").has_value());
}

TEST(InMemoryFreezeFrameTest, ReplacedOnRecapture) {
InMemoryFaultStorage storage;

ros2_medkit_fault_manager::FreezeFrameData frame;
frame.fault_code = "PLC_PRESSURE_HIGH";
frame.data = R"({"/plc/pressure":{"data":8.4}})";
frame.captured_at_ns = 1000;
storage.store_freeze_frame(frame);

frame.data = R"({"/plc/pressure":{"data":9.9}})";
frame.captured_at_ns = 2000;
storage.store_freeze_frame(frame);

auto retrieved = storage.get_freeze_frame("PLC_PRESSURE_HIGH");
ASSERT_TRUE(retrieved.has_value());
EXPECT_EQ(retrieved->data, R"({"/plc/pressure":{"data":9.9}})");
EXPECT_EQ(retrieved->captured_at_ns, 2000);
}

TEST(InMemoryFreezeFrameTest, SurvivesClearFault) {
InMemoryFaultStorage storage;
rclcpp::Clock clock;

storage.report_fault_event("PLC_PRESSURE_HIGH", ReportFault::Request::EVENT_FAILED, Fault::SEVERITY_ERROR,
"Pressure high", "/plc_node", clock.now(), DebounceConfig{});

ros2_medkit_fault_manager::SnapshotData snap;
snap.fault_code = "PLC_PRESSURE_HIGH";
snap.topic = "/plc/pressure";
snap.message_type = "std_msgs/msg/Float64";
snap.data = R"({"data":8.4})";
snap.captured_at_ns = 1000;
storage.store_snapshot(snap);

ros2_medkit_fault_manager::FreezeFrameData frame;
frame.fault_code = "PLC_PRESSURE_HIGH";
frame.data = R"({"/plc/pressure":{"data":8.4}})";
frame.captured_at_ns = 1000;
storage.store_freeze_frame(frame);

ASSERT_TRUE(storage.clear_fault("PLC_PRESSURE_HIGH"));

EXPECT_TRUE(storage.get_snapshots("PLC_PRESSURE_HIGH").empty());
auto retrieved = storage.get_freeze_frame("PLC_PRESSURE_HIGH");
ASSERT_TRUE(retrieved.has_value());
EXPECT_EQ(retrieved->data, frame.data);
}

// =============================================================================
// Snapshot recapture cooldown test
// =============================================================================
Expand Down
Loading
Loading