What is the best data structure for storing the top n elements in sort order?

I am looking for a data structure that holds the top n elements similar to this question , but with the additional requirement of maintaining the sort order. Obviously, I could just figure out the end, but there might be a more efficient way to do this on the fly. There will only be insertions, never deleting, and then iterating through the top n elements at the end.

This question is an agnostic of the language, but it will be in C #, so it's preferable to use your own .NET collections.

EDIT: I have to clarify that the sort order only matters at the very end when the top n elements are repeated. As there are inserts, the sort order does not matter if the top n elements are saved.

+5
source share
6 answers

If you really need to sort them all the time, you should use a self-balanced binary search tree. But keep in mind that this (sorting items) is not an optimization, but a luxury that has value.

The self-balanced binary search tree is slower than the implicit heap as a constant factor.

? , .

( ...), . , node , , .

+1

n , .

, , , n? , , n / .

- , n? ( Python deque) n + 1. , . (n + 1), . , n , , .

, ( n, b), , . O (1), x b.

+1

Kelly, . N < 100, , , () (, 20 ). (#), . , , , . .

public class TopNStructure<T> : IEnumerable<T> where T : IComparable<T>
{
    private const int SizeForLinearOrBinaryInsert = 20;

    private int _maxSize;
    private int _currentSize;
    private T[] _items;
    private IComparer<T> _comparer;

    /// <summary>
    /// The number of items
    /// </summary>
    public int Count { get { return _currentSize; } }

    public TopNStructure(int maxSize, IComparer<T> comparer)
    {
        if (maxSize <= 0)
        {
            throw new ArgumentOutOfRangeException("Max size must be a postive, non-zero value");
        }
        _maxSize = maxSize;
        _currentSize = 0;
        _items = new T[maxSize];
        _comparer = comparer;
    }

    public TopNStructure(int maxSize)
        : this(maxSize, Comparer<T>.Default) { }

    /// <summary>
    /// Adds an item to this structure
    /// </summary>
    /// <param name="item">The item to add</param>
    /// <returns>True if the item was added, false otherwise</returns>
    public bool Add(T item)
    {
        if (_currentSize == 0)
        {
            _items[0] = item;              
        }
        else if (_currentSize == _maxSize)
        {
            if (_comparer.Compare(_items[_currentSize - 1], item) <= 0)
            {
                return false;
            }
            else
            {
                Insert(item);
                return true;
            }
        }
        else if (_currentSize == 1)
        {   
            if (_comparer.Compare(_items[0], item) <= 0)
            {
                _items[1] = item;
            }
            else
            {
                _items[1] = _items[0];
                _items[0] = item;
            }               
        } 
        else 
        {
            if (_comparer.Compare(_items[_currentSize - 1], item) <= 0)
            {
                _items[_currentSize] = item;
            }
            else
            {
                Insert(item);
            }
        }
        _currentSize++;
        return true;
    }

    /// <summary>
    /// Insert the item into the data structure
    /// </summary>
    /// <param name="item">The item to insert</param>
    private void Insert(T item)
    {
        int start = 0;
        if (_currentSize >= SizeForLinearOrBinaryInsert)
        {
            start = Array.BinarySearch<T>(_items, 0, _currentSize, item, _comparer);
            if (start < 0)
            {
                start = ~start;
            }
            ShiftAndInsert(item, start, _currentSize);                
            return;
        }
        else
        {
            for (int i = start; i < _currentSize; i++)
            {
                if (_comparer.Compare(_items[i], item) > 0)
                {
                    ShiftAndInsert(item, i, _currentSize);                      
                    return;
                }
            }
            _items[_currentSize] = item;
        }                           
    }

    /// <summary>
    /// 
    /// </summary>
    /// <param name="index"></param>
    /// <param name="maxIndex"></param>
    private void ShiftAndInsert(T item, int index, int maxIndex)
    {
        if (maxIndex >= _maxSize)
        {
            maxIndex = _maxSize - 1;
        }
        for (int i = maxIndex; i > index; i--)
        {
            _items[i] = _items[i - 1];
        }
        _items[index] = item;
    }


    public IEnumerator<T> GetEnumerator()
    {
        return ((IEnumerable<T>)_items).GetEnumerator();
    }

    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
        return _items.GetEnumerator();
    }
}

static void Main(string[] args)
{
    TopNStructure<double> data = new TopNStructure<double>(25);

    Random rand = new Random(132151);
    for (int i = 0; i < 50; i++)
    {
        double value = rand.NextDouble();
        data.Add(value);
    }

    int j = 0;
    foreach (double value in data)
    {
        Console.WriteLine("{0} {1}", j, value);
        j++;
    }
    Console.ReadKey();
}
+1

, k- , . k (, ), , , .

, STL.

O (n log n) , , " " . .

0

, Linked list . , , ( > n) - O (1) . , Linked 2 .

public class TopNStructureLinkedList<T> : IEnumerable<T> where T : IComparable<T>
{
    private const int SizeForLinearOrBinaryInsert = 20;

    private int _maxSize;
    private int _currentSize;
    private LinkedList<T> _items;
    private IComparer<T> _comparer;
    private LinkedListNode<T> _largestItemNode;

    /// <summary>
    /// The number of items
    /// </summary>
    public int Count { get { return _currentSize; } }

    public TopNStructureLinkedList(int maxSize, IComparer<T> comparer)
    {
        _maxSize = maxSize;
        _currentSize = 0;
        _items = new LinkedList<T>();
        _comparer = comparer;
        _largestItemNode = null;
    }

    public TopNStructureLinkedList(int maxSize)
        : this(maxSize, Comparer<T>.Default) { }

    /// <summary>
    /// Adds an item to this structure
    /// </summary>
    /// <param name="item">The item to add</param>
    /// <returns>True if the item was added, false otherwise</returns>
    public bool Add(T item)
    {
        if (_currentSize == 0)
        {
            _largestItemNode = _items.AddFirst(item);               
        }
        else if (_currentSize == 1)
        {
            if (_comparer.Compare(_largestItemNode.Value, item) <= 0)
            {
                _largestItemNode = _items.AddAfter(_largestItemNode, item);                   
            }
            else
            {
                _items.AddBefore(_largestItemNode, item);                   
            }
        }
        else if (_currentSize == _maxSize)
        {
            if (_comparer.Compare(_largestItemNode.Value, item) <= 0)
            {
                return false;
            }
            else
            {
                Insert(item);
                _largestItemNode = _items.Last.Previous;
                _items.RemoveLast();
                return true;
            }
        }
        else
        {
            if (_comparer.Compare(_largestItemNode.Value, item) <= 0)
            {
                _largestItemNode = _items.AddAfter(_largestItemNode, item);       
            }
            else
            {
                Insert(item);
            }
        }
        _currentSize++;
        return true;
    }

    /// <summary>
    /// Insert the item into the data structure
    /// </summary>
    /// <param name="item">The item to insert</param>
    private void Insert(T item)
    {
        LinkedListNode<T> node = _largestItemNode.Previous;
        while (node != null)
        {              
            if(_comparer.Compare(node.Value, item) <= 0) {
                _items.AddAfter(node, item);
               return;
            }
            node = node.Previous;               
        }
        _items.AddFirst(item);

    }

    public IEnumerator<T> GetEnumerator()
    {
        return ((IEnumerable<T>)_items).GetEnumerator();
    }

    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
        return _items.GetEnumerator();
    }
}
0

, . O (log n), , , O (n log k): n , O (log k).

Using the Selection Algorithm to find the k top elements of an array gives you complexity O (k log n). k is less than n, therefore better.

There is an implementation in the wikipedia article for QuickSelect. Also, using pure PriorityQueue (in other words, most of the people mentioned here) is easier. Sight:

create_heap(array) // O(n)
for(int i=0; i<k; i++)
    sorted[i] = heap_pop(array) //O(log n)
0
source

All Articles