Disjoint Sets¶

CSCI 362: Data Structures¶

Prof. William Killian¶

Disjoint Sets¶

The universe consists of $n$ elements, each named $1, 2, ... , n$

The ADT is a collection of sets of elements
Each element is in exactly one set
- sets are disjoint
- to start, each set contains one element
Each set has a name, which is the name of one of its elements
- (any one will do)

Find operation¶

Setname = find ( elementname )

returns the name of the unique set that contains the given element
not the same as “find” in search trees

Union operation¶

union ( Setname1, Setname2 )

replaces both sets with a new set
the name of the new set is not specified

Analysis: worst-case total running time of a sequence of f finds and u unions

Quick Find¶

function initialize (N) {
  setname = []
  for (i = 0; i < N; ++i)
    setname[i] = i;
}
function union (i, j) { // O(?)
  for (k = 0; k < N; ++k)
    if (setname[k] == j)
      setname[k] = i;
}
function find (e) {  // O(?)
  return setname[e];
}

Quick Union¶

function initialize (N) {
  parent = []
  for (i = 0; i < N; ++i)
    parent[i] = 0;
}
function union (i, j) { // O(?)
  parent[j] = i;
}
function find (e) {  // O(?)
  while (parent[e] != 0) {
    e = parent[e];
  }
  return e;
}

Smart Union-Find (find compression + union by rank)¶

function initialize (N) {
  parent = []
  rank = []
  for (i = 0; i < N; ++i) {
    parent[i] = i;
    rank[i] = 0;
  }
}

function find (i) { // Complexity: O(?)
  if (parent[i] != i) {
    parent[i] = findSet (parent[i]);
    return parent[i];
  }
  return i;
}

function union (i, j) { // Complexity: O(?)
  if (find (i) == find (j)) { // compression
    return;
  }
  var x = findSet (i);
  var y = findSet (j);
  if (rank[x] < rank[y]) { // bigger rank at top
    [x, y] = [y, x];
  }
  parent[y] = x;
  if (rank[x] == rank[y]) { // if ranks match, increase parent
    ++rank[x];
  }
}

Complexity¶

$u$ union operations
$f$ find operations

Worst-case complexity: $O\left(u + f \times\alpha\left(f, u\right)\right)$

What is $\alpha(N,M)$?

Inverse of Ackermann's function
log*$(n)$ -- how many times log must be applied to reach 1
- log*$(65536) = 4$
- log*$(2^{65536}) = 5$
Growth of $\alpha(N,M)$ is far slower than log*

#include <vector>

struct DisjointSet {
  DisjointSet (size_t N);
  size_t findSet (size_t i);
  bool isSameSet (size_t i, size_t j);
  void unionSet (size_t i, size_t j);
//private:
  std::vector<size_t> m_parent; // "parent" links
  std::vector<size_t> m_rank;   // rank measure
};

#include <numeric>

DisjointSet::DisjointSet (size_t N)
: m_parent (N, 0), m_rank (N, 0) {
  std::iota (m_parent.begin (), m_parent.end (), 0);
  // parent: [ 0, 1, 2, 3, ... ]
  // rank:   [ 0, 0, 0, 0, ... ]
}

size_t DisjointSet::findSet (size_t i) {
  if (m_parent[i] != i) {
    m_parent[i] = findSet (m_parent[i]);
    return m_parent[i];
  }
  return i;
}

bool DisjointSet::isSameSet (size_t i, size_t j) {
  return findSet (i) == findSet (j);
}

#include <utility>
void DisjointSet::unionSet (size_t i, size_t j) {
  if (isSameSet (i, j)) {
    return;
  }
  // find both representative items
  size_t x = findSet (i);
  size_t y = findSet (j);
  // optimization (rank): keep y 'shorter' than x
  if (m_rank[x] < m_rank[y]) {
    std::swap (x, y);
  }
  // set y under x
  m_parent[y] = x;
  // optimization (rank):
  if (m_rank[x] == m_rank[y]) {
    ++m_rank[x];
  }
}

Example trace¶

DisjointSet uf (8);

uf.m_parent

{ 0, 1, 2, 3, 4, 5, 6, 7 }

uf.m_rank

{ 0, 0, 0, 0, 0, 0, 0, 0 }

After sequence of unions¶

uf.unionSet(4, 5);
uf.unionSet(6, 7);
uf.unionSet(4, 6);
uf.m_parent

{ 0, 1, 2, 3, 4, 4, 4, 6 }

uf.m_rank

{ 0, 0, 0, 0, 2, 0, 1, 0 }

After sequence of additional unions¶

uf.unionSet(0,1);
uf.unionSet(2,3);
uf.unionSet(1,2);
uf.m_parent

{ 0, 0, 0, 2, 4, 4, 4, 6 }

uf.m_rank

{ 2, 0, 1, 0, 2, 0, 1, 0 }

After union-ing the two unions¶

uf.unionSet(0, 4);
uf.m_parent

{ 0, 0, 0, 2, 0, 4, 4, 6 }

uf.m_rank

{ 3, 0, 1, 0, 2, 0, 1, 0 }

Find operations (cascade)¶

uf.findSet(7)

0

uf.m_parent

{ 0, 0, 0, 2, 0, 4, 0, 0 }

The lone finds¶

uf.findSet(5)

0

uf.findSet(3)

0

uf.m_parent

{ 0, 0, 0, 0, 0, 0, 0, 0 }