|
3 | 3 |
|
4 | 4 | #include "fw_reset.h"
|
5 | 5 |
|
| 6 | +enum { |
| 7 | + MLX5_FW_RESET_FLAGS_RESET_REQUESTED, |
| 8 | +}; |
| 9 | + |
| 10 | +struct mlx5_fw_reset { |
| 11 | + struct mlx5_core_dev *dev; |
| 12 | + struct mlx5_nb nb; |
| 13 | + struct workqueue_struct *wq; |
| 14 | + struct work_struct reset_request_work; |
| 15 | + struct work_struct reset_reload_work; |
| 16 | + unsigned long reset_flags; |
| 17 | + struct timer_list timer; |
| 18 | +}; |
| 19 | + |
6 | 20 | static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
|
7 | 21 | u8 reset_type_sel, u8 sync_resp, bool sync_start)
|
8 | 22 | {
|
@@ -49,3 +63,167 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
|
49 | 63 | {
|
50 | 64 | return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
|
51 | 65 | }
|
| 66 | + |
| 67 | +static void mlx5_sync_reset_reload_work(struct work_struct *work) |
| 68 | +{ |
| 69 | + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, |
| 70 | + reset_reload_work); |
| 71 | + struct mlx5_core_dev *dev = fw_reset->dev; |
| 72 | + |
| 73 | + mlx5_enter_error_state(dev, true); |
| 74 | + mlx5_unload_one(dev, false); |
| 75 | + if (mlx5_health_wait_pci_up(dev)) { |
| 76 | + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); |
| 77 | + return; |
| 78 | + } |
| 79 | + mlx5_load_one(dev, false); |
| 80 | +} |
| 81 | + |
| 82 | +static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) |
| 83 | +{ |
| 84 | + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; |
| 85 | + |
| 86 | + del_timer(&fw_reset->timer); |
| 87 | +} |
| 88 | + |
| 89 | +static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) |
| 90 | +{ |
| 91 | + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; |
| 92 | + |
| 93 | + mlx5_stop_sync_reset_poll(dev); |
| 94 | + clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); |
| 95 | + if (poll_health) |
| 96 | + mlx5_start_health_poll(dev); |
| 97 | +} |
| 98 | + |
| 99 | +#define MLX5_RESET_POLL_INTERVAL (HZ / 10) |
| 100 | +static void poll_sync_reset(struct timer_list *t) |
| 101 | +{ |
| 102 | + struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer); |
| 103 | + struct mlx5_core_dev *dev = fw_reset->dev; |
| 104 | + u32 fatal_error; |
| 105 | + |
| 106 | + if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) |
| 107 | + return; |
| 108 | + |
| 109 | + fatal_error = mlx5_health_check_fatal_sensors(dev); |
| 110 | + |
| 111 | + if (fatal_error) { |
| 112 | + mlx5_core_warn(dev, "Got Device Reset\n"); |
| 113 | + mlx5_sync_reset_clear_reset_requested(dev, false); |
| 114 | + queue_work(fw_reset->wq, &fw_reset->reset_reload_work); |
| 115 | + return; |
| 116 | + } |
| 117 | + |
| 118 | + mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL)); |
| 119 | +} |
| 120 | + |
| 121 | +static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev) |
| 122 | +{ |
| 123 | + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; |
| 124 | + |
| 125 | + timer_setup(&fw_reset->timer, poll_sync_reset, 0); |
| 126 | + fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL); |
| 127 | + add_timer(&fw_reset->timer); |
| 128 | +} |
| 129 | + |
| 130 | +static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev) |
| 131 | +{ |
| 132 | + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false); |
| 133 | +} |
| 134 | + |
| 135 | +static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) |
| 136 | +{ |
| 137 | + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; |
| 138 | + |
| 139 | + mlx5_stop_health_poll(dev, true); |
| 140 | + set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); |
| 141 | + mlx5_start_sync_reset_poll(dev); |
| 142 | +} |
| 143 | + |
| 144 | +static void mlx5_sync_reset_request_event(struct work_struct *work) |
| 145 | +{ |
| 146 | + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, |
| 147 | + reset_request_work); |
| 148 | + struct mlx5_core_dev *dev = fw_reset->dev; |
| 149 | + int err; |
| 150 | + |
| 151 | + mlx5_sync_reset_set_reset_requested(dev); |
| 152 | + err = mlx5_fw_reset_set_reset_sync_ack(dev); |
| 153 | + if (err) |
| 154 | + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); |
| 155 | + else |
| 156 | + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); |
| 157 | +} |
| 158 | + |
| 159 | +static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe) |
| 160 | +{ |
| 161 | + struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe; |
| 162 | + u8 sync_event_rst_type; |
| 163 | + |
| 164 | + sync_fw_update_eqe = &eqe->data.sync_fw_update; |
| 165 | + sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK; |
| 166 | + switch (sync_event_rst_type) { |
| 167 | + case MLX5_SYNC_RST_STATE_RESET_REQUEST: |
| 168 | + queue_work(fw_reset->wq, &fw_reset->reset_request_work); |
| 169 | + break; |
| 170 | + } |
| 171 | +} |
| 172 | + |
| 173 | +static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data) |
| 174 | +{ |
| 175 | + struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb); |
| 176 | + struct mlx5_eqe *eqe = data; |
| 177 | + |
| 178 | + switch (eqe->sub_type) { |
| 179 | + case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT: |
| 180 | + mlx5_sync_reset_events_handle(fw_reset, eqe); |
| 181 | + break; |
| 182 | + default: |
| 183 | + return NOTIFY_DONE; |
| 184 | + } |
| 185 | + |
| 186 | + return NOTIFY_OK; |
| 187 | +} |
| 188 | + |
| 189 | +void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) |
| 190 | +{ |
| 191 | + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; |
| 192 | + |
| 193 | + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); |
| 194 | + mlx5_eq_notifier_register(dev, &fw_reset->nb); |
| 195 | +} |
| 196 | + |
| 197 | +void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) |
| 198 | +{ |
| 199 | + mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); |
| 200 | +} |
| 201 | + |
| 202 | +int mlx5_fw_reset_init(struct mlx5_core_dev *dev) |
| 203 | +{ |
| 204 | + struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); |
| 205 | + |
| 206 | + if (!fw_reset) |
| 207 | + return -ENOMEM; |
| 208 | + fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); |
| 209 | + if (!fw_reset->wq) { |
| 210 | + kfree(fw_reset); |
| 211 | + return -ENOMEM; |
| 212 | + } |
| 213 | + |
| 214 | + fw_reset->dev = dev; |
| 215 | + dev->priv.fw_reset = fw_reset; |
| 216 | + |
| 217 | + INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); |
| 218 | + INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); |
| 219 | + |
| 220 | + return 0; |
| 221 | +} |
| 222 | + |
| 223 | +void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) |
| 224 | +{ |
| 225 | + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; |
| 226 | + |
| 227 | + destroy_workqueue(fw_reset->wq); |
| 228 | + kfree(dev->priv.fw_reset); |
| 229 | +} |
0 commit comments