DeepSeek-R1 and Ollama3.2 - LLM Performance
Go back to listFor our LLM testing, we used our benchmark suite on top of the Ollama framework. Our goal is to evaluate the speed of LLM operations:
- eval_rate: Inference speed in token/sec
- prompt_eval_rate: Prompt reading speed token/sec
Here's an example of command line for reproducing our tests:
ollama-benchmark -v2 speed --question 81 --model llama3.1:8b --do-not-pull
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 700,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "eval_rate_mean"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": false,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "deepseek-r1:1.5b",
"data": [
[
0,
130.97930295544973
]
],
"grouping": false
},
{
"name": "deepseek-r1:14b",
"data": [
[
1,
49.23453349407974
]
],
"grouping": false
},
{
"name": "deepseek-r1:32b",
"data": [
[
2,
27.250536188121576
]
],
"grouping": false
},
{
"name": "deepseek-r1:7b",
"data": [
[
3,
82.94171372534385
]
],
"grouping": false
},
{
"name": "deepseek-r1:8b",
"data": [
[
4,
81.53827681205196
]
],
"grouping": false
},
{
"name": "llama3.1",
"data": [
[
5,
82.30207847220593
]
],
"grouping": false
},
{
"name": "llama3.2:1b",
"data": [
[
6,
174.85226774844486
]
],
"grouping": false
},
{
"name": "llama3.2:3b",
"data": [
[
7,
121.64555453669473
]
],
"grouping": false
},
{
"name": "phi4:14b",
"data": [
[
8,
55.21262857361432
]
],
"grouping": false
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "eval_rate_mean"
}
{
"configuration": {
"chart": {
"type": "bar",
"polar": false,
"zoomType": "",
"options3d": {},
"height": 700,
"width": null,
"margin": null,
"inverted": false,
"zooming": {}
},
"credits": {
"enabled": false
},
"title": {
"text": "prompt_eval_rate_mean"
},
"colorAxis": null,
"subtitle": {
"text": ""
},
"xAxis": {
"title": {
"text": [
""
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": [
"model"
],
"lineWidth": 1,
"tickInterval": null,
"tickWidth": 0,
"tickLength": 10,
"tickPixelInterval": null,
"plotLines": null,
"labels": {
"enabled": false,
"format": null,
"formatter": "",
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false,
"step": 0
},
"plotBands": null,
"visible": true,
"floor": null,
"ceiling": null,
"type": "linear",
"min": null,
"gridLineWidth": null,
"gridLineColor": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"tickmarkPlacement": null,
"units": null,
"minRange": null
},
"yAxis": {
"title": {
"text": [
"Token/second<br>Higher is better"
],
"useHTML": false,
"style": {
"color": "#666666"
}
},
"categories": null,
"plotLines": null,
"plotBands": null,
"lineWidth": null,
"tickInterval": null,
"tickLength": 10,
"floor": null,
"ceiling": null,
"gridLineInterpolation": null,
"gridLineWidth": 1,
"gridLineColor": "#CCC",
"min": null,
"max": null,
"minorTickInterval": null,
"minorTickWidth": 0,
"minTickInterval": null,
"startOnTick": true,
"endOnTick": null,
"minRange": null,
"type": "linear",
"tickmarkPlacement": null,
"labels": {
"enabled": true,
"formatter": null,
"style": {
"color": "#666666",
"cursor": "default",
"fontSize": "11px"
},
"useHTML": false
}
},
"zAxis": {
"title": {
"text": "Token/second<br>Higher is better"
}
},
"plotOptions": {
"series": {
"dataLabels": {
"enabled": true,
"format": "{series.name}",
"distance": 30,
"align": "left",
"inside": true,
"allowOverlap": false,
"style": {
"fontSize": "17px"
}
},
"showInLegend": null,
"turboThreshold": 1000,
"stacking": "",
"groupPadding": 0,
"centerInCategory": false,
"findNearestPointBy": "x"
}
},
"navigator": {
"enabled": false
},
"scrollbar": {
"enabled": false
},
"rangeSelector": {
"enabled": false,
"inputEnabled": false
},
"legend": {
"enabled": false,
"maxHeight": null,
"align": "center",
"verticalAlign": "bottom",
"layout": "horizontal",
"width": null,
"margin": 12,
"reversed": false
},
"series": [
{
"name": "deepseek-r1:1.5b",
"data": [
[
0,
3376.043956043956
]
],
"grouping": false
},
{
"name": "deepseek-r1:14b",
"data": [
[
1,
958.8233654967887
]
],
"grouping": false
},
{
"name": "deepseek-r1:32b",
"data": [
[
2,
417.0385492174077
]
],
"grouping": false
},
{
"name": "deepseek-r1:7b",
"data": [
[
3,
2716.0872581151216
]
],
"grouping": false
},
{
"name": "deepseek-r1:8b",
"data": [
[
4,
2566.1321478658942
]
],
"grouping": false
},
{
"name": "llama3.1",
"data": [
[
5,
4415.684778184778
]
],
"grouping": false
},
{
"name": "llama3.2:1b",
"data": [
[
6,
24611.111111111113
]
],
"grouping": false
},
{
"name": "llama3.2:3b",
"data": [
[
7,
13805.555555555557
]
],
"grouping": false
},
{
"name": "phi4:14b",
"data": [
[
8,
1833.2018460466736
]
],
"grouping": false
}
],
"drilldown": {},
"tooltip": {
"enabled": true,
"useHTML": false,
"format": null,
"headerFormat": "",
"pointFormat": "<span style=\"color:{series.color}\">{series.name}</span>: <b>{point.y:.1f} token/sec</b>",
"footerFormat": "",
"shared": false,
"outside": false,
"valueDecimals": null,
"split": false
},
"annotations": null
},
"hc_type": "chart",
"id": "prompt_eval_rate_mean"
}