Scaleway NVIDIA H100 Performance evaluation - Whisper

Go back to list

Whisper is a speech recognition model mainly used for its text-to-speech ability. To standardize our test suite, we created whisper-benchmark, a command line tool handling free audio sample and timing the text-to-speech through different models.

Like Ollama, different sizes and kinds of model exist:

  • Tiny: 39 millions of parameters in 1GB of VRAM
  • Base: 74M in 1GB
  • Small: 244M in 2GB
  • Medium: 769M in 5GB
  • Large: 1550M in 10GB

The bigger the model, the better its accuracy but the lower its speed.

Here's an example of command line:

$ whisper-benchmark en-male-1 --model-name tiny
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 400, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Tiny" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Frame/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Frame/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "NVIDIA A100-SXM4-40GB", "data": [ [ 0, 3213.7575833842616 ] ], "grouping": false, "color": "#55b400" }, { "name": "NVIDIA A10G", "data": [ [ 1, 3943.536100121548 ] ], "grouping": false, "color": "#f7981d" }, { "name": "NVIDIA H100 PCIe", "data": [ [ 2, 6795.312184966861 ] ], "grouping": false, "color": "#510099" }, { "name": "NVIDIA L4", "data": [ [ 3, 3047.1149930960887 ] ], "grouping": false, "color": "#55b400" }, { "name": "Tesla P100", "data": [ [ 4, 2898.5120745009363 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Tesla P100-PCIE-16GB", "data": [ [ 5, 4039.420374343763 ] ], "grouping": false, "color": "#510099" }, { "name": "Tesla T4", "data": [ [ 6, 3409.4305597107686 ] ], "grouping": false, "color": "#fa6600" }, { "name": "Tesla V100-PCIE-16GB", "data": [ [ 7, 3014.474969005871 ] ], "grouping": false, "color": "#484848" }, { "name": "Tesla V100-SXM2-16GB", "data": [ [ 8, 2434.7474053323544 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Tesla V100S-PCIE-32GB", "data": [ [ 9, 3829.1743163600927 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} frame/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "tiny" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 400, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Base" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Frame/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Frame/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "NVIDIA A100-SXM4-40GB", "data": [ [ 0, 2629.5587059520076 ] ], "grouping": false, "color": "#55b400" }, { "name": "NVIDIA A10G", "data": [ [ 1, 3234.890504047175 ] ], "grouping": false, "color": "#f7981d" }, { "name": "NVIDIA H100 PCIe", "data": [ [ 2, 5575.043800281516 ] ], "grouping": false, "color": "#510099" }, { "name": "NVIDIA L4", "data": [ [ 3, 2482.8830675720787 ] ], "grouping": false, "color": "#55b400" }, { "name": "Tesla P100", "data": [ [ 4, 2346.417425073513 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Tesla P100-PCIE-16GB", "data": [ [ 5, 3257.433336872889 ] ], "grouping": false, "color": "#510099" }, { "name": "Tesla T4", "data": [ [ 6, 2808.7171929436568 ] ], "grouping": false, "color": "#fa6600" }, { "name": "Tesla V100-PCIE-16GB", "data": [ [ 7, 2394.9228740139156 ] ], "grouping": false, "color": "#484848" }, { "name": "Tesla V100-SXM2-16GB", "data": [ [ 8, 1986.9604349920512 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Tesla V100S-PCIE-32GB", "data": [ [ 9, 3016.1003068818354 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} frame/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "base" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 400, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Small" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Frame/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Frame/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "NVIDIA A100-SXM4-40GB", "data": [ [ 0, 1822.5813637515876 ] ], "grouping": false, "color": "#55b400" }, { "name": "NVIDIA A10G", "data": [ [ 1, 2250.31911126752 ] ], "grouping": false, "color": "#f7981d" }, { "name": "NVIDIA H100 PCIe", "data": [ [ 2, 4245.6590479473125 ] ], "grouping": false, "color": "#510099" }, { "name": "NVIDIA L4", "data": [ [ 3, 1627.2790660610208 ] ], "grouping": false, "color": "#55b400" }, { "name": "Tesla P100", "data": [ [ 4, 1567.8009349051274 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Tesla P100-PCIE-16GB", "data": [ [ 5, 2164.9953496036796 ] ], "grouping": false, "color": "#510099" }, { "name": "Tesla T4", "data": [ [ 6, 1642.1839097805976 ] ], "grouping": false, "color": "#fa6600" }, { "name": "Tesla V100-PCIE-16GB", "data": [ [ 7, 1518.2014451274526 ] ], "grouping": false, "color": "#484848" }, { "name": "Tesla V100-SXM2-16GB", "data": [ [ 8, 1372.735787114163 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Tesla V100S-PCIE-32GB", "data": [ [ 9, 2121.840642106143 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} frame/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "small" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 400, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Medium" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Frame/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Frame/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "NVIDIA A100-SXM4-40GB", "data": [ [ 0, 1087.3665146661644 ] ], "grouping": false, "color": "#55b400" }, { "name": "NVIDIA A10G", "data": [ [ 1, 1337.2626920355706 ] ], "grouping": false, "color": "#f7981d" }, { "name": "NVIDIA H100 PCIe", "data": [ [ 2, 2584.265811550847 ] ], "grouping": false, "color": "#510099" }, { "name": "NVIDIA L4", "data": [ [ 3, 947.1313410023223 ] ], "grouping": false, "color": "#55b400" }, { "name": "Tesla P100", "data": [ [ 4, 897.7388285935202 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Tesla P100-PCIE-16GB", "data": [ [ 5, 1189.4777966969693 ] ], "grouping": false, "color": "#510099" }, { "name": "Tesla T4", "data": [ [ 6, 810.3430021410103 ] ], "grouping": false, "color": "#fa6600" }, { "name": "Tesla V100-PCIE-16GB", "data": [ [ 7, 850.9958340473235 ] ], "grouping": false, "color": "#484848" }, { "name": "Tesla V100-SXM2-16GB", "data": [ [ 8, 832.1679423472154 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Tesla V100S-PCIE-32GB", "data": [ [ 9, 1345.5327277138476 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} frame/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "medium" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 400, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "Large" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "flavor__gpu_model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Frame/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Frame/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{series.name}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": true, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "NVIDIA A100-SXM4-40GB", "data": [ [ 0, 835.3890225874771 ] ], "grouping": false, "color": "#55b400" }, { "name": "NVIDIA A10G", "data": [ [ 1, 947.9753912046613 ] ], "grouping": false, "color": "#f7981d" }, { "name": "NVIDIA H100 PCIe", "data": [ [ 2, 1950.3280298751433 ] ], "grouping": false, "color": "#510099" }, { "name": "NVIDIA L4", "data": [ [ 3, 627.0358176901652 ] ], "grouping": false, "color": "#55b400" }, { "name": "Tesla P100", "data": [ [ 4, 659.8705501758104 ] ], "grouping": false, "color": "#55b3ff" }, { "name": "Tesla P100-PCIE-16GB", "data": [ [ 5, 727.1582421092538 ] ], "grouping": false, "color": "#510099" }, { "name": "Tesla T4", "data": [ [ 6, 480.3780283823492 ] ], "grouping": false, "color": "#fa6600" }, { "name": "Tesla V100-PCIE-16GB", "data": [ [ 7, 640.860200216334 ] ], "grouping": false, "color": "#484848" }, { "name": "Tesla V100-SXM2-16GB", "data": [ [ 8, 619.6907857782959 ] ], "grouping": false, "color": "#f7981d" }, { "name": "Tesla V100S-PCIE-32GB", "data": [ [ 9, 996.4368050616931 ] ], "grouping": false, "color": "#484848" } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} frame/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "large" }

Cloud Mercato's observation

  • The H100 always leads the performance ranking
  • The Base model on H100 is faster than the Tiny on any other GPU