OVHcloud GPU benchmark - LlaVa

Go back to list

Starting from 2024, certain LLMs possess the capability to perform computer vision tasks. The Ollama through LLaVa models enables testing of this functionality. To execute this operation, use the following command line:

model=llava:8b  # or llava:13b, llava:34b
question=cm001 # or cm002 cm003 cm004 cm005
ollama-benchmark speed --model $model --question $question_id --max-workers 1 --max_turns 1 --mirostat 0 --mirostat_eta 0.1 --mirostat_tau 5.0 --num_
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 550, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Llava" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "provider__short_name", "flavor__name", "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "AWS g5.xlarge NVIDIA A10G", "data": [ [ 0, 78.46780276437885 ] ], "grouping": false, "color": "#f7981d" }, { "name": "AWS p3.2xlarge Tesla V100-SXM2-16GB", "data": [ [ 1, 102.68721346651607 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL", "data": [ [ 2, 188.6150098069199 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Azure Standard_NC6s_v3 Tesla V100-PCIE-16GB", "data": [ [ 3, 51.85274324011339 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Google a2-highgpu-1g NVIDIA A100-SXM4-40GB", "data": [ [ 4, 117.56729920686392 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google a2-ultragpu-1g NVIDIA A100-SXM4-80GB", "data": [ [ 5, 126.46968990319273 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google g2-standard-16 NVIDIA L4", "data": [ [ 6, 49.96092165242029 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB", "data": [ [ 7, 104.61788493014754 ] ], "grouping": false, "color": "#55b400" }, { "name": "OVH H100-380 NVIDIA H100 PCIe", "data": [ [ 8, 120.96560765822298 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L4-90 NVIDIA L4", "data": [ [ 9, 51.930537665313004 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L40S-90 NVIDIA L40S", "data": [ [ 10, 122.4416929489508 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH T1-LE-45 Tesla V100-PCIE-16GB", "data": [ [ 11, 99.91688716726927 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "llava" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 550, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Llava:13b" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "provider__short_name", "flavor__name", "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "AWS g5.xlarge NVIDIA A10G", "data": [ [ 0, 47.88769588718513 ] ], "grouping": false, "color": "#f7981d" }, { "name": "AWS p3.2xlarge Tesla V100-SXM2-16GB", "data": [ [ 1, 67.58862629541319 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL", "data": [ [ 2, 129.35452767017705 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Azure Standard_NC6s_v3 Tesla V100-PCIE-16GB", "data": [ [ 3, 38.17865668349313 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Google a2-highgpu-1g NVIDIA A100-SXM4-40GB", "data": [ [ 4, 81.0158437965956 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google a2-ultragpu-1g NVIDIA A100-SXM4-80GB", "data": [ [ 5, 87.10302120973132 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google g2-standard-16 NVIDIA L4", "data": [ [ 6, 27.921030534621373 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB", "data": [ [ 7, 67.99903103506401 ] ], "grouping": false, "color": "#55b400" }, { "name": "OVH H100-380 NVIDIA H100 PCIe", "data": [ [ 8, 85.04619470314374 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L4-90 NVIDIA L4", "data": [ [ 9, 28.691167486901193 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L40S-90 NVIDIA L40S", "data": [ [ 10, 71.789495315772 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH T1-LE-45 Tesla V100-PCIE-16GB", "data": [ [ 11, 65.65104052627987 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "llava:13b" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 550, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Llava:34b" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "provider__short_name", "flavor__name", "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "AWS g5.xlarge NVIDIA A10G", "data": [ [ 0, 21.005920725479594 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL", "data": [ [ 1, 65.70381239747117 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Google a2-highgpu-1g NVIDIA A100-SXM4-40GB", "data": [ [ 2, 38.2504152428938 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google a2-ultragpu-1g NVIDIA A100-SXM4-80GB", "data": [ [ 3, 42.524679268543586 ] ], "grouping": false, "color": "#55b400" }, { "name": "Google g2-standard-16 NVIDIA L4", "data": [ [ 4, 11.715066896841101 ] ], "grouping": false, "color": "#55b400" }, { "name": "OVH H100-380 NVIDIA H100 PCIe", "data": [ [ 5, 55.37799090270114 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L4-90 NVIDIA L4", "data": [ [ 6, 12.057626760380025 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L40S-90 NVIDIA L40S", "data": [ [ 7, 31.316352843561866 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "llava:34b" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 550, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Llava:7b-v1.6-mistral-q8_0" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "provider__short_name", "flavor__name", "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "AWS g5.xlarge NVIDIA A10G", "data": [ [ 0, 51.518494289131375 ] ], "grouping": false, "color": "#f7981d" }, { "name": "AWS p3.2xlarge Tesla V100-SXM2-16GB", "data": [ [ 1, 71.44629092699648 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL", "data": [ [ 2, 158.18097615193804 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB", "data": [ [ 3, 72.74288956139445 ] ], "grouping": false, "color": "#55b400" }, { "name": "OVH L4-90 NVIDIA L4", "data": [ [ 4, 30.21959093804391 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L40S-90 NVIDIA L40S", "data": [ [ 5, 75.70524850992427 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH T1-45 Tesla V100-PCIE-16GB", "data": [ [ 6, 71.190758639073 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "llava:7b-v1.6-mistral-q8_0" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 550, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Llava:13b-v1.6-vicuna-q8_0" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "provider__short_name", "flavor__name", "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "AWS g5.xlarge NVIDIA A10G", "data": [ [ 0, 30.168178175296266 ] ], "grouping": false, "color": "#f7981d" }, { "name": "AWS p3.2xlarge Tesla V100-SXM2-16GB", "data": [ [ 1, 20.31856649520987 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL", "data": [ [ 2, 103.87089402779482 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB", "data": [ [ 3, 22.047040130015212 ] ], "grouping": false, "color": "#55b400" }, { "name": "OVH L4-90 NVIDIA L4", "data": [ [ 4, 16.662605883847593 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH L40S-90 NVIDIA L40S", "data": [ [ 5, 43.4203206115407 ] ], "grouping": false, "color": "#484848" }, { "name": "OVH T1-45 Tesla V100-PCIE-16GB", "data": [ [ 6, 20.807739438581798 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "llava:13b-v1.6-vicuna-q8_0" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 550, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Llava:34b-v1.6-q8_0" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "provider__short_name", "flavor__name", "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL", "data": [ [ 0, 45.55300976025811 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "OVH L40S-90 NVIDIA L40S", "data": [ [ 1, 17.934207893835328 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "llava:34b-v1.6-q8_0" }

Cloud Mercato's observations:

  • Without at least 20GB of VRAM, it's impossible for most VMs to run llava 34b
  • H100 NVL has the better rate.
  • Considering their age, the Tesla V100 performs well.
  • A100 and H100 produce similar results