DeepSeek-R1 and Ollama3.2 - LLM Performance

Go back to list


For our LLM testing, we used our benchmark suite on top of the Ollama framework. Our goal is to evaluate the speed of LLM operations:

  • eval_rate: Inference speed in token/sec
  • prompt_eval_rate: Prompt reading speed token/sec

Here's an example of command line for reproducing our tests:

ollama-benchmark -v2 speed --question 81 --model llama3.1:8b --do-not-pull
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 700, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "eval_rate_mean" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": false, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "deepseek-r1:1.5b", "data": [ [ 0, 130.97930295544973 ] ], "grouping": false }, { "name": "deepseek-r1:14b", "data": [ [ 1, 49.23453349407974 ] ], "grouping": false }, { "name": "deepseek-r1:32b", "data": [ [ 2, 27.250536188121576 ] ], "grouping": false }, { "name": "deepseek-r1:7b", "data": [ [ 3, 82.94171372534385 ] ], "grouping": false }, { "name": "deepseek-r1:8b", "data": [ [ 4, 81.53827681205196 ] ], "grouping": false }, { "name": "llama3.1", "data": [ [ 5, 82.30207847220593 ] ], "grouping": false }, { "name": "llama3.2:1b", "data": [ [ 6, 174.85226774844486 ] ], "grouping": false }, { "name": "llama3.2:3b", "data": [ [ 7, 121.64555453669473 ] ], "grouping": false }, { "name": "phi4:14b", "data": [ [ 8, 55.21262857361432 ] ], "grouping": false } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "eval_rate_mean" }
{ "configuration": { "chart": { "type": "bar", "polar": false, "zoomType": "", "options3d": {}, "height": 700, "width": null, "margin": null, "inverted": false, "zooming": {} }, "credits": { "enabled": false }, "title": { "text": "prompt_eval_rate_mean" }, "colorAxis": null, "subtitle": { "text": "" }, "xAxis": { "title": { "text": [ "" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": [ "model" ], "lineWidth": 1, "tickInterval": null, "tickWidth": 0, "tickLength": 10, "tickPixelInterval": null, "plotLines": null, "labels": { "enabled": false, "format": null, "formatter": "", "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false, "step": 0 }, "plotBands": null, "visible": true, "floor": null, "ceiling": null, "type": "linear", "min": null, "gridLineWidth": null, "gridLineColor": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "tickmarkPlacement": null, "units": null, "minRange": null }, "yAxis": { "title": { "text": [ "Token/second<br>Higher is better" ], "useHTML": false, "style": { "color": "#666666" } }, "categories": null, "plotLines": null, "plotBands": null, "lineWidth": null, "tickInterval": null, "tickLength": 10, "floor": null, "ceiling": null, "gridLineInterpolation": null, "gridLineWidth": 1, "gridLineColor": "#CCC", "min": null, "max": null, "minorTickInterval": null, "minorTickWidth": 0, "minTickInterval": null, "startOnTick": true, "endOnTick": null, "minRange": null, "type": "linear", "tickmarkPlacement": null, "labels": { "enabled": true, "formatter": null, "style": { "color": "#666666", "cursor": "default", "fontSize": "11px" }, "useHTML": false } }, "zAxis": { "title": { "text": "Token/second<br>Higher is better" } }, "plotOptions": { "series": { "dataLabels": { "enabled": true, "format": "{}", "distance": 30, "align": "left", "inside": true, "allowOverlap": false, "style": { "fontSize": "17px" } }, "showInLegend": null, "turboThreshold": 1000, "stacking": "", "groupPadding": 0, "centerInCategory": false, "findNearestPointBy": "x" } }, "navigator": { "enabled": false }, "scrollbar": { "enabled": false }, "rangeSelector": { "enabled": false, "inputEnabled": false }, "legend": { "enabled": false, "maxHeight": null, "align": "center", "verticalAlign": "bottom", "layout": "horizontal", "width": null, "margin": 12, "reversed": false }, "series": [ { "name": "deepseek-r1:1.5b", "data": [ [ 0, 3376.043956043956 ] ], "grouping": false }, { "name": "deepseek-r1:14b", "data": [ [ 1, 958.8233654967887 ] ], "grouping": false }, { "name": "deepseek-r1:32b", "data": [ [ 2, 417.0385492174077 ] ], "grouping": false }, { "name": "deepseek-r1:7b", "data": [ [ 3, 2716.0872581151216 ] ], "grouping": false }, { "name": "deepseek-r1:8b", "data": [ [ 4, 2566.1321478658942 ] ], "grouping": false }, { "name": "llama3.1", "data": [ [ 5, 4415.684778184778 ] ], "grouping": false }, { "name": "llama3.2:1b", "data": [ [ 6, 24611.111111111113 ] ], "grouping": false }, { "name": "llama3.2:3b", "data": [ [ 7, 13805.555555555557 ] ], "grouping": false }, { "name": "phi4:14b", "data": [ [ 8, 1833.2018460466736 ] ], "grouping": false } ], "drilldown": {}, "tooltip": { "enabled": true, "useHTML": false, "format": null, "headerFormat": "", "pointFormat": "<span style=\"color:{series.color}\">{}</span>: <b>{point.y:.1f} token/sec</b>", "footerFormat": "", "shared": false, "outside": false, "valueDecimals": null, "split": false }, "annotations": null }, "hc_type": "chart", "id": "prompt_eval_rate_mean" }