OVHcloud GPU benchmark - LlaVa
Go back to listStarting from 2024, certain LLMs possess the capability to perform computer vision tasks. The Ollama through LLaVa models enables testing of this functionality. To execute this operation, use the following command line:
model=llava:8b # or llava:13b, llava:34b
question=cm001 # or cm002 cm003 cm004 cm005
ollama-benchmark speed --model $model --question $question_id --max-workers 1 --max_turns 1 --mirostat 0 --mirostat_eta 0.1 --mirostat_tau 5.0 --num_
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 550,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "Llava"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"provider__short_name",
"flavor__name",
"flavor__gpu_model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": true,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "AWS g5.xlarge NVIDIA A10G",
"data": [
[
0,
78.46780276437885
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "AWS p3.2xlarge Tesla V100-SXM2-16GB",
"data": [
[
1,
102.68721346651607
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL",
"data": [
[
2,
188.6150098069199
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "Azure Standard_NC6s_v3 Tesla V100-PCIE-16GB",
"data": [
[
3,
51.85274324011339
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "Google a2-highgpu-1g NVIDIA A100-SXM4-40GB",
"data": [
[
4,
117.56729920686392
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google a2-ultragpu-1g NVIDIA A100-SXM4-80GB",
"data": [
[
5,
126.46968990319273
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google g2-standard-16 NVIDIA L4",
"data": [
[
6,
49.96092165242029
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB",
"data": [
[
7,
104.61788493014754
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "OVH H100-380 NVIDIA H100 PCIe",
"data": [
[
8,
120.96560765822298
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L4-90 NVIDIA L4",
"data": [
[
9,
51.930537665313004
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L40S-90 NVIDIA L40S",
"data": [
[
10,
122.4416929489508
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH T1-LE-45 Tesla V100-PCIE-16GB",
"data": [
[
11,
99.91688716726927
]
],
"grouping": false,
"color": "#484848"
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "llava"
}
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 550,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "Llava:13b"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"provider__short_name",
"flavor__name",
"flavor__gpu_model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": true,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "AWS g5.xlarge NVIDIA A10G",
"data": [
[
0,
47.88769588718513
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "AWS p3.2xlarge Tesla V100-SXM2-16GB",
"data": [
[
1,
67.58862629541319
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL",
"data": [
[
2,
129.35452767017705
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "Azure Standard_NC6s_v3 Tesla V100-PCIE-16GB",
"data": [
[
3,
38.17865668349313
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "Google a2-highgpu-1g NVIDIA A100-SXM4-40GB",
"data": [
[
4,
81.0158437965956
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google a2-ultragpu-1g NVIDIA A100-SXM4-80GB",
"data": [
[
5,
87.10302120973132
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google g2-standard-16 NVIDIA L4",
"data": [
[
6,
27.921030534621373
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB",
"data": [
[
7,
67.99903103506401
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "OVH H100-380 NVIDIA H100 PCIe",
"data": [
[
8,
85.04619470314374
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L4-90 NVIDIA L4",
"data": [
[
9,
28.691167486901193
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L40S-90 NVIDIA L40S",
"data": [
[
10,
71.789495315772
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH T1-LE-45 Tesla V100-PCIE-16GB",
"data": [
[
11,
65.65104052627987
]
],
"grouping": false,
"color": "#484848"
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "llava:13b"
}
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 550,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "Llava:34b"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"provider__short_name",
"flavor__name",
"flavor__gpu_model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": true,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "AWS g5.xlarge NVIDIA A10G",
"data": [
[
0,
21.005920725479594
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL",
"data": [
[
1,
65.70381239747117
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "Google a2-highgpu-1g NVIDIA A100-SXM4-40GB",
"data": [
[
2,
38.2504152428938
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google a2-ultragpu-1g NVIDIA A100-SXM4-80GB",
"data": [
[
3,
42.524679268543586
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "Google g2-standard-16 NVIDIA L4",
"data": [
[
4,
11.715066896841101
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "OVH H100-380 NVIDIA H100 PCIe",
"data": [
[
5,
55.37799090270114
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L4-90 NVIDIA L4",
"data": [
[
6,
12.057626760380025
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L40S-90 NVIDIA L40S",
"data": [
[
7,
31.316352843561866
]
],
"grouping": false,
"color": "#484848"
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "llava:34b"
}
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 550,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "Llava:7b-v1.6-mistral-q8_0"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"provider__short_name",
"flavor__name",
"flavor__gpu_model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": true,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "AWS g5.xlarge NVIDIA A10G",
"data": [
[
0,
51.518494289131375
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "AWS p3.2xlarge Tesla V100-SXM2-16GB",
"data": [
[
1,
71.44629092699648
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL",
"data": [
[
2,
158.18097615193804
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB",
"data": [
[
3,
72.74288956139445
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "OVH L4-90 NVIDIA L4",
"data": [
[
4,
30.21959093804391
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L40S-90 NVIDIA L40S",
"data": [
[
5,
75.70524850992427
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH T1-45 Tesla V100-PCIE-16GB",
"data": [
[
6,
71.190758639073
]
],
"grouping": false,
"color": "#484848"
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "llava:7b-v1.6-mistral-q8_0"
}
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 550,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "Llava:13b-v1.6-vicuna-q8_0"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"provider__short_name",
"flavor__name",
"flavor__gpu_model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": true,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "AWS g5.xlarge NVIDIA A10G",
"data": [
[
0,
30.168178175296266
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "AWS p3.2xlarge Tesla V100-SXM2-16GB",
"data": [
[
1,
20.31856649520987
]
],
"grouping": false,
"color": "#f7981d"
},
{
"name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL",
"data": [
[
2,
103.87089402779482
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "Google n1-highmem-8 Intel Skylake Tesla V100 Tesla V100-SXM2-16GB",
"data": [
[
3,
22.047040130015212
]
],
"grouping": false,
"color": "#55b400"
},
{
"name": "OVH L4-90 NVIDIA L4",
"data": [
[
4,
16.662605883847593
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH L40S-90 NVIDIA L40S",
"data": [
[
5,
43.4203206115407
]
],
"grouping": false,
"color": "#484848"
},
{
"name": "OVH T1-45 Tesla V100-PCIE-16GB",
"data": [
[
6,
20.807739438581798
]
],
"grouping": false,
"color": "#484848"
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "llava:13b-v1.6-vicuna-q8_0"
}
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 550,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "Llava:34b-v1.6-q8_0"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"provider__short_name",
"flavor__name",
"flavor__gpu_model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": true,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "Azure Standard_NC40ads_H100_v5 NVIDIA H100 NVL",
"data": [
[
0,
45.55300976025811
]
],
"grouping": false,
"color": "#55b3ff"
},
{
"name": "OVH L40S-90 NVIDIA L40S",
"data": [
[
1,
17.934207893835328
]
],
"grouping": false,
"color": "#484848"
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "llava:34b-v1.6-q8_0"
}
Cloud Mercato's observations:
- Without at least 20GB of VRAM, it's impossible for most VMs to run llava 34b
- H100 NVL has the better rate.
- Considering their age, the Tesla V100 performs well.
- A100 and H100 produce similar results