Implementare Least Frequently Used Cache in .NET cu teste unitare si teste de performanta

Principiul de functionare la Least Frequently Used Cache este urmatorul: fiecare element din cache pastreaza intr-o variabila numarul de accesari al respectivului element, cand cache-ul depaseste limita maxima se elimina elementul cu numarul minim de accesari, lasand loc unui nou element sa fie adaugat in cache.

Implementare Structura de Date

Este nevoie de o structura de date care sa pastreze elementele din cache sortate dupa numarul de accesari, astfel operatia de eliminare a elementului cu numarul minim de accesari sa ruleze in O(log n). Implementarea curenta foloseste un SortedList unde cheia este numarul de accesari iar valoarea este o lista intantuita cu elementele din cache care au acelas numar de accesari. SortedList sorteaza listele inlantuite folosind un arbore binar. Aceasta structura de date permite rulare operatilor de adaugare si accesare elemente din cache in complexitate de timp O(log n)

Am creat o interfata generica pe care clasa LFUCache sa o implementeze:

namespace LfuCache
{
public interface ICache<in TKey, TValue>
{
void Add(TKey key, TValue val);
TValue Get(TKey key);
}
}
view raw ICache.cs hosted with ❤ by GitHub

Implementarea clasa LfuCache este sub forma de structura de date generica care implementeaza interfata ICache, permite crearea unui cache care sa stocheze elemente de orice tip, impreuna cu operatiile Add si Get:
using System.Collections.Generic;
using System.Linq;
namespace LfuCache
{
public class LfuCache<TKey, TValue> : ICache<TKey, TValue>
{
private class CacheNode
{
public TKey Key { get; set; }
public TValue Data { get; set; }
public int UseCount { get; set; }
}
private readonly int _size;
#if DEBUG
public delegate void EvictDelegate(TValue value);
public event EvictDelegate EvictEvent;
#endif
private readonly Dictionary<TKey, LinkedListNode<CacheNode>> _cache = new Dictionary<TKey, LinkedListNode<CacheNode>>();
private readonly SortedDictionary<int, LinkedList<CacheNode>> _lfuBinaryTree = new SortedDictionary<int, LinkedList<CacheNode>>();
private int _entriesCount;
public LfuCache(int size)
{
_size = size;
}
public void Add(TKey key, TValue val)
{
TValue existing;
if (!TryGet(key, out existing))
{
var node = new CacheNode() { Key = key, Data = val };
if (_entriesCount == _size)
{
var removedData = Evict();
_entriesCount--;
#if DEBUG
RaiseEvictEvent(removedData);
#endif
}
var insertedNode = InsertCacheNodeInLfuBinaryTree(node);
_cache[key] = insertedNode;
_entriesCount++;
}
}
private TValue Evict()
{
var minimumUseCountLinkedList = _lfuBinaryTree.First().Value;
var cacheNode = minimumUseCountLinkedList.First.Value;
var dataToRemove = cacheNode.Data;
_cache.Remove(cacheNode.Key);
minimumUseCountLinkedList.RemoveFirst();
return dataToRemove;
}
#if DEBUG
private void RaiseEvictEvent(TValue data)
{
EvictDelegate handler = EvictEvent;
if (handler != null)
{
handler(data);
}
}
#endif
private LinkedListNode<CacheNode> InsertCacheNodeInLfuBinaryTree(CacheNode node)
{
return InsertCacheNodeInLfuBinaryTree(node, node.UseCount);
}
public TValue Get(TKey key)
{
TValue val;
TryGet(key, out val);
return val;
}
public bool TryGet(TKey key, out TValue val)
{
LinkedListNode<CacheNode> linkedListCacheNode;
bool success = false;
if (_cache.TryGetValue(key, out linkedListCacheNode))
{
var cacheNode = linkedListCacheNode.Value;
val = cacheNode.Data;
RemoveLinkedListNodeFromLfuBinaryTree(linkedListCacheNode);
var newIndex = ++cacheNode.UseCount;
var newNode = InsertCacheNodeInLfuBinaryTree(cacheNode, newIndex);
_cache[key] = newNode;
success = true;
}
else
{
val = default(TValue);
}
return success;
}
private void RemoveLinkedListNodeFromLfuBinaryTree(LinkedListNode<CacheNode> linkedListNode)
{
var cacheNode = linkedListNode.Value;
var oldIndex = cacheNode.UseCount;
_lfuBinaryTree[oldIndex].Remove(linkedListNode);
if (_lfuBinaryTree[oldIndex].Count == 0)
{
_lfuBinaryTree.Remove(oldIndex);
}
}
private LinkedListNode<CacheNode> InsertCacheNodeInLfuBinaryTree(CacheNode node, int index)
{
LinkedList<CacheNode> cacheNodes;
if (!_lfuBinaryTree.TryGetValue(index, out cacheNodes))
{
cacheNodes = new LinkedList<CacheNode>();
_lfuBinaryTree.Add(index, cacheNodes);
}
var insertedNode = cacheNodes.AddLast(node);
return insertedNode;
}
}
}
view raw LfuCache.cs hosted with ❤ by GitHub
Un exemplu de utilizare al clasei LfuCache:
ICache<string, string> cache = new LfuCache<string, string>(1000);
cache.Add("name", "Helene");
cache.Add("surname", "Stuart");
var name = cache.Get("name");
view raw UsingCache.cs hosted with ❤ by GitHub
Acest proiect este publicat pe GitHub si ca pachet NuGet. Daca vreti sa folositi acest cache in proiectul dumneavoastra il puteti instala ca si pachet NuGet cu urmatoarea comanda din Visual Studio sau din interfata grafica:
Install-Package LfuCache -Version 1.0.0
view raw InstallNuget hosted with ❤ by GitHub

Teste Unitare

In scrierea testelor unitare am folosit framework-ul MSTest de la Microsoft, acestea urmeaza pattern-ul Arrange - Act - Assert. Implementarea testelor unitare:
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace LfuCache.UnitTest
{
[TestClass]
public class LfuCacheTests
{
private LfuCache<string, string> _lfuCache;
[TestInitialize]
public void BeforeEach()
{
_lfuCache = new LfuCache<string, string>(3);
}
[TestMethod]
public void AddRetrieveToLfuCache()
{
// Arrange
_lfuCache.Add("1", "one");
_lfuCache.Add("2", "two");
_lfuCache.Add("3", "three");
// Act
var one = _lfuCache.Get("1");
var two = _lfuCache.Get("2");
var three = _lfuCache.Get("3");
// Assert
Assert.AreEqual("one", one);
Assert.AreEqual("two", two);
Assert.AreEqual("three", three);
}
[TestMethod]
public void AddRetrieveToLfuCacheSizeOne()
{
_lfuCache = new LfuCache<string, string>(1);
// Arrange
_lfuCache.Add("1", "one");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Add("2", "two");
_lfuCache.Get("2");
_lfuCache.Get("2");
_lfuCache.Get("2");
// Act
var one = _lfuCache.Get("1");
var two = _lfuCache.Get("2");
// Assert
Assert.IsNull(one);
Assert.AreEqual("two", two);
}
[TestMethod]
public void AddRetrieveToLfuCacheWithEvicts()
{
// Arrange
_lfuCache.Add("1", "one");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Add("2", "two");
_lfuCache.Get("2");
_lfuCache.Get("2");
_lfuCache.Get("2");
_lfuCache.Add("3", "three");
_lfuCache.Get("3");
_lfuCache.Get("3");
_lfuCache.Get("3");
_lfuCache.Get("3");
_lfuCache.Add("4", "four");
// Act
var one = _lfuCache.Get("1");
var two = _lfuCache.Get("2");
var three = _lfuCache.Get("3");
var four = _lfuCache.Get("4");
// Assert
Assert.AreEqual("one", one);
Assert.IsNull(two);
Assert.AreEqual("three", three);
Assert.AreEqual("four", four);
}
#if DEBUG
[TestMethod]
public void AddRetrieveToLfuCacheWithEvictsTestingUsingEvents()
{
_lfuCache.EvictEvent += delegate(string value)
{
Assert.AreEqual("two", value);
};
_lfuCache.Add("1", "one");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Get("1");
_lfuCache.Add("2", "two");
_lfuCache.Get("2");
_lfuCache.Get("2");
_lfuCache.Get("2");
_lfuCache.Add("3", "three");
_lfuCache.Get("3");
_lfuCache.Get("3");
_lfuCache.Get("3");
_lfuCache.Get("3");
_lfuCache.Add("4", "four");
}
#endif
}
}
Testele unitare se pot rula local din Visual Studio > Test Explorer sau ca pas in procesul de integrare continua pe server folosind un serviciu ca si Azure Pipeline.
Un aspect important in testele unitare e code coverage, cat din cod e acoperit de testele unitare, in cazul nostru code coverage este 100%, pentru a masura code coverage se poate folosi local dotCover de la JetBrains.

Teste de Performanta

Pentru testele de performanta am folosit urmatoarea librarie PerformanceDotNet, aceasta masoara timpul de executie si cantitatea de memorie consumata. Secventa de operatii pe cache de adaugare/accesare este generata aleatoriu intr-un array de operatii de lungime OperationsCount, aceste operatii proceseaza elementele dintr-o lista de dimensiune EelementsCount folosind LFUCache.
using System;
using System.Collections;
using System.Collections.Generic;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnostics.Windows;
namespace LfuCache.PerformanceTest
{
[Config(typeof(MemoryConfig))]
public class LfuCacheBenchmarks
{
[Params(100000)]
public int ProcessingElementsCount { get; set; }
[Params(1000000)]
public int OperationsCount { get; set; }
[Params(90000)]
public int CacheSize { get; set; }
private BitArray _operations;
private class OperationType
{
public const bool Read = false;
public const bool Write = true;
}
class ListElement
{
public string Key { get; set; }
public string Value { get; set; }
}
private ICache<string, string> _lfuCache;
private IList<ListElement> _processingElements;
[Setup]
public void BeforeEach()
{
_lfuCache = new LfuCache<string, string>(CacheSize);
_processingElements = new List<ListElement>();
_operations = new BitArray(OperationsCount);
var random = new Random();
for (int i = 0; i < ProcessingElementsCount; i++)
{
ListElement listElement = new ListElement();
var element = random.Next(1, ProcessingElementsCount).ToString();
listElement.Key = element;
listElement.Value = element;
_processingElements.Add(listElement);
}
for (int i = 0; i < OperationsCount; i++)
{
_operations[i] = random.Next(100) < 50 ? OperationType.Read : OperationType.Write;
}
}
[Benchmark]
public void BenchmarkLfuCachePerformance()
{
int index = 0;
for (int i = 0; i < OperationsCount; i++)
{
if (index >= _processingElements.Count)
index = index % _processingElements.Count;
if (_operations[i] == OperationType.Read)
{
_lfuCache.Get(_processingElements[index].Key);
}
else if (_operations[i] == OperationType.Write)
{
_lfuCache.Add(_processingElements[index].Key, _processingElements[index].Value);
}
index++;
}
}
private class MemoryConfig : ManualConfig
{
public MemoryConfig()
{
Add(new MemoryDiagnoser());
}
}
}
}
Pentru rularea testelor de performanta se poate folosi urmatoarea extensie pentru Visual Studio: BenchmarkDotNet VS Runner. Un exemplu de raport HTML cu rezultatul la benchmark: PerformanceDotNet poate genera rapoarte cu rezultatele la benchmark in format HTML, CSV, MarkDown GitHub, RPlotExporter(grafic). Rezultatele la benchmark pot varia in functie de performanta masini pe care se ruleaza testele de performanta, cea mai importanta este performanta procesorului pe care se executa dar mai poate depinde si de runtime daca este .NET Core sau .Net Framework, si de versiunea acestuia.

Comentarii

Postări populare