在监控设备的时候,在server端的日志中有时候会见到类似another network error, wait for 15s seconds的异常,今天我们看下这个问题的出现原因和解决方案:
问题定位到poller.c,看下下面两份代码:
这个get_values的部分代码:
for (i = 0; i < num; i++) { switch (errcodes[i]) { case SUCCEED: case NOTSUPPORTED: case AGENT_ERROR: if (HOST_AVAILABLE_TRUE != last_available) { zbx_activate_item_host(&items[i], ×pec); last_available = HOST_AVAILABLE_TRUE; } break; case NETWORK_ERROR: case GATEWAY_ERROR: case TIMEOUT_ERROR: if (HOST_AVAILABLE_FALSE != last_available) { zbx_deactivate_item_host(&items[i], ×pec, results[i].msg); last_available = HOST_AVAILABLE_FALSE; } break; case CONFIG_ERROR: /* nothing to do */ break; default: zbx_error("unknown response code returned: %d", errcodes[i]); THIS_SHOULD_NEVER_HAPPEN; }
这里是zbx_deactivate_item_host的代码:
void zbx_deactivate_item_host(DC_ITEM *item, zbx_timespec_t *ts, const char *error) // #0 { const char *__function_name = "zbx_deactivate_item_host"; zbx_host_availability_t in, out; // #1 unsigned char agent_type; // #2 zabbix_log(LOG_LEVEL_DEBUG, "In %s() hostid:" ZBX_FS_UI64 " itemid:" ZBX_FS_UI64 " type:%d", // #3 __function_name, item->host.hostid, item->itemid, (int)item->type); zbx_host_availability_init(&in, item->host.hostid); // #4 zbx_host_availability_init(&out,item->host.hostid); // #5 if (ZBX_AGENT_UNKNOWN == (agent_type = host_availability_agent_by_item_type(item->type))) // #6 goto out; if (FAIL == host_get_availability(&item->host, agent_type, &in)) // #7 goto out; if (FAIL == DChost_deactivate(item->host.hostid, agent_type, ts, &in.agents[agent_type], // #8 &out.agents[agent_type], error)) { goto out; } if (FAIL == db_host_update_availability(&out)) // #9 goto out; host_set_availability(&item->host, agent_type, &out); // #10 if (0 == in.agents[agent_type].errors_from) // #11 { zabbix_log(LOG_LEVEL_WARNING, "%s item \"%s\" on host \"%s\" failed:" // #12 " first network error, wait for %d seconds", zbx_agent_type_string(item->type), item->key_orig, item->host.host, out.agents[agent_type].disable_until - ts->sec); } else { if (HOST_AVAILABLE_FALSE != in.agents[agent_type].available) // #13 { if (HOST_AVAILABLE_FALSE != out.agents[agent_type].available) // #14 { zabbix_log(LOG_LEVEL_WARNING, "%s item \"%s\" on host \"%s\" failed:" // #15 " another network error, wait for %d seconds", zbx_agent_type_string(item->type), item->key_orig, item->host.host, out.agents[agent_type].disable_until - ts->sec); } else { zabbix_log(LOG_LEVEL_WARNING, "temporarily disabling %s checks on host \"%s\":" // #16 " host unavailable", zbx_agent_type_string(item->type), item->host.host); } } } zabbix_log(LOG_LEVEL_DEBUG, "%s() errors_from:%d available:%d", __function_name, out.agents[agent_type].errors_from, out.agents[agent_type].available); out: zbx_host_availability_clean(&out); zbx_host_availability_clean(&in); zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name); }
下