diff --git a/services/grafana/provisioning/dashboards/storage-node.json b/services/grafana/provisioning/dashboards/storage-node.json index 62de402..df05dc0 100644 --- a/services/grafana/provisioning/dashboards/storage-node.json +++ b/services/grafana/provisioning/dashboards/storage-node.json @@ -244,7 +244,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "95%-tile latency of the requests.", + "description": "Latency of the requests for selected quantile", "fieldConfig": { "defaults": { "color": { @@ -326,7 +326,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "histogram_quantile(0.95, \n sum(rate(grpc_server_handling_seconds_bucket{job=\"node\", instance=\"$instance\"}[$__rate_interval])) by (grpc_service,grpc_method,le)\n)\n", + "expr": "histogram_quantile($quantile, \n sum(rate(grpc_server_handling_seconds_bucket{job=\"node\", instance=\"$instance\"}[$__rate_interval])) by (grpc_service,grpc_method,le)\n)\n", "format": "time_series", "instant": false, "interval": "", @@ -335,109 +335,560 @@ "refId": "A" } ], - "title": "Latency, 0.95q", + "title": "Latency", "transformations": [], "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "99%-tile latency of the requests.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 3, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, + "collapsed": true, "gridPos": { - "h": 10, + "h": 1, "w": 24, "x": 0, "y": 31 }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.5.6", - "targets": [ + "id": 6, + "panels": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "exemplar": false, - "expr": "histogram_quantile(0.99, \n sum(rate(grpc_server_handling_seconds_bucket{job=\"node\", instance=\"$instance\"}[$__rate_interval])) by (grpc_service,grpc_method,le)\n)\n", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "{{grpc_service}}.{{grpc_method}}", - "range": true, - "refId": "A" + "description": "Requests per second by Pilorama method for each shard", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (instance, shard_id, method) (\n rate(frostfs_node_pilorama_request_duration_seconds_count{instance=\"$instance\"}[$__rate_interval])\n)\n", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{method}}, shard {{shard_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Pilorama RPS", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Percentage of the failed pilorama requests for each shard", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(frostfs_node_pilorama_request_duration_seconds_count{instance=\"$instance\",success!=\"true\"}[$__rate_interval])) by (shard_id, method)\n / \nsum(rate(frostfs_node_pilorama_request_duration_seconds_count{instance=\"$instance\"}[$__rate_interval])) by (shard_id, method)\n * 100.0\n", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{method}}, shard {{shard_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Pilorama error rate", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Latency of the pilorama requests for selected quantile for each shard", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile($quantile, \n sum(rate(frostfs_node_pilorama_request_duration_seconds_bucket{job=\"node\", instance=\"$instance\"}[$__rate_interval])) by (shard_id, method,le)\n)\n", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{method}}, shard {{shard_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Pilorama latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Duration of waiting time for replication, replication task duration and synchronization for selected quantile", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile($quantile, \n sum(rate(frostfs_node_treeservice_replicate_wait_duration_seconds_bucket{job=\"node\", instance=\"$instance\"}[$__rate_interval])) by (success,le)\n)", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "replicate wait success:{{success}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile($quantile,\n sum(rate(frostfs_node_treeservice_replicate_task_duration_seconds_bucket{job=\"node\", instance=\"$instance\"}[$__rate_interval])) by (success,le)\n)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "replicate task success:{{success}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile($quantile, \n sum(rate(frostfs_node_treeservice_sync_duration_seconds_bucket{job=\"node\", instance=\"$instance\"}[$__rate_interval])) by (success,le)\n)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "sync success:{{success}}", + "range": true, + "refId": "C" + } + ], + "title": "Replication and synchronization", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Pilorama mode for each shard", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1.5, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 72 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(frostfs_node_pilorama_mode{job=\"node\", instance=\"$instance\"}) by (shard_id, mode)\n", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Shard {{shard_id}}, mode {{mode}}", + "range": true, + "refId": "A" + } + ], + "title": "Pilorama mode", + "transformations": [], + "type": "timeseries" } ], - "title": "Latency, 0.99q", - "transformations": [], - "type": "timeseries" + "title": "Tree service", + "type": "row" } ], "refresh": "1m", @@ -459,7 +910,7 @@ "definition": "label_values(frostfs_node_state_health,instance)", "hide": 0, "includeAll": false, - "label": "", + "label": "Instance", "multi": false, "name": "instance", "options": [], @@ -472,11 +923,44 @@ "skipUrlSync": false, "sort": 1, "type": "query" + }, + { + "current": { + "selected": false, + "text": "0.95", + "value": "0.95" + }, + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [ + { + "selected": true, + "text": "0.95", + "value": "0.95" + }, + { + "selected": false, + "text": "0.99", + "value": "0.99" + }, + { + "selected": false, + "text": "1.00", + "value": "1.00" + } + ], + "query": "0.95, 0.99, 1.00", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" } ] }, "time": { - "from": "now-30m", + "from": "now-1h", "to": "now" }, "timepicker": {},