EsoErik

Sunday, July 29, 2012

 

Hopcroft-Karp Maximum Bipartite Matching in C++

I couldn't find any clear explanations of this algorithm or any C++ or Python implementations that were not a component of some larger graph library.  I wrote the following C++ implementation as I clicked through various tedious powerpoint slides describing Hopcroft-Karp.  It took a while to figure out the simple ideas represented by all those proofs and definitions.  I'm releasing this code for use and reference under GPL version 2.

MaxMatch.h
// Copyright Erik Hvatum 2012
// License: GPL v2

#pragma once
#include <vector>
#include <string>
#include <map>

class MaxMatch
{
public:
    public:
    typedef std::ptrdiff_t VertexIndex;
    typedef std::ptrdiff_t EdgeIndex;
    typedef std::ptrdiff_t Layer;

    struct Edge
    {
        Edge();
        Edge(const VertexIndex& u_vertex_, const VertexIndex& v_vertex_, const EdgeIndex& idx_);
        VertexIndex u_vertex, v_vertex;
        // Index of this edge in m_edges
        EdgeIndex idx;
    };

    typedef std::vector<Edge> Edges;
    typedef std::vector<EdgeIndex> EdgeIndexes;

    enum VertexType
    {
        U_Vertex,
        V_Vertex
    };

    struct Vertex
    {
        Vertex();
        Vertex(const VertexType& type_, const std::string& name_, const VertexIndex& idx_);
        VertexType type;
        std::string name;
        // Index of this vertex in m_vertexes
        VertexIndex idx;
        EdgeIndexes edges;
    };

    enum
    {
        NillVertIdx = 0
    };

    typedef std::vector<Vertex> Vertexes;
    typedef std::vector<VertexIndex> VertexIndexes;

    typedef std::vector<Layer> Layers;

    MaxMatch();
    const Vertexes& u_vertexes() const;
    const Vertexes& v_vertexes() const;
    const Edges& edges() const;
    void addVertex(const VertexType& type, const std::string& name);
    void addEdge(const std::string& u_vertexName, const std::string& v_vertexName);
    const VertexIndexes& us_to_vs() const;
    const VertexIndexes& vs_to_us() const;
    // Uses the Hopcroft-Karp algorithm to find a maximal matching set; returns the number of edges in the matching set
    // found.
    int hopcoftKarp();
    // Helper for hopcroftKarp - uses BFS to put U vertexes in layers (layer 0 for free U vertexes, layer 1 for the U
    // vertexes matched to V vertexes with edges incident to a layer 0 U vertex, etc).  Returns true if at least one
    // potential augmenting path exists.
    bool makeLayers();
    // Helper for hopcroftKarp - uses DFS guided by layer number to find augmenting paths.  Returns true if an
    // augmenting path from uIdx was found.
    bool findPath(const VertexIndex& uIdx);
    

protected:
    typedef std::map<std::string, VertexIndex> VertexNamesToIndexes;

    static const Layer InfLayer;

    Vertexes m_u_vertexes;
    Vertexes m_v_vertexes;
    VertexNamesToIndexes m_u_vertNamesToIdxs;
    VertexNamesToIndexes m_v_vertNamesToIdxs;
    Edges m_edges;
    // u -> v matchings.  For example, m_us_to_vs[2] == 3 indicates that vertex m_u_vertexes[2] is matched to
    // m_v_vertexes[3]
    VertexIndexes m_us_to_vs;
    // v -> u matchings.
    VertexIndexes m_vs_to_us;
    // u vertex layer values.  m_layers[2] == 1 indicates that m_u_vertexes[2] has been placed in layer 1 by the most
    // recent call to makeLayers().
    Layers m_layers;
}; 

MaxMatch.cpp
// Copyright Erik Hvatum 2012
// License: GPL v2

#include "MaxMatch.h"
#include <limits>
#include <list>

const MaxMatch::Layer MaxMatch::InfLayer(std::numeric_limits<MaxMatch::Layer>::max());

MaxMatch::Edge::Edge()
  : u_vertex(std::numeric_limits<VertexIndex>::min()),
    v_vertex(std::numeric_limits<VertexIndex>::min()),
    idx(std::numeric_limits<EdgeIndex>::min())
{
}

MaxMatch::Edge::Edge(const VertexIndex& u_vertex_, const VertexIndex& v_vertex_, const EdgeIndex& idx_)
  : u_vertex(u_vertex_),
    v_vertex(v_vertex_),
    idx(idx_)
{
}

MaxMatch::Vertex::Vertex()
  : idx(std::numeric_limits<VertexIndex>::min())
{
}

MaxMatch::Vertex::Vertex(const VertexType& type_, const std::string& name_, const VertexIndex& idx_)
  : type(type_),
    name(name_),
    idx(idx_)
{
}

MaxMatch::MaxMatch()
{
    addVertex(U_Vertex, "NILL U Vertex");
    addVertex(V_Vertex, "NILL V Vertex");
}

const MaxMatch::Vertexes& MaxMatch::u_vertexes() const
{
    return m_u_vertexes;
}

const MaxMatch::Vertexes& MaxMatch::v_vertexes() const
{
    return m_v_vertexes;
}

const MaxMatch::Edges& MaxMatch::edges() const
{
    return m_edges;
}

void MaxMatch::addVertex(const VertexType& type, const std::string& name)
{
    VertexNamesToIndexes& namesToIdxs(type == U_Vertex ? m_u_vertNamesToIdxs : m_v_vertNamesToIdxs);
    Vertexes& vertexes(type == U_Vertex ? m_u_vertexes : m_v_vertexes);

    VertexNamesToIndexes::iterator nameToIdx(namesToIdxs.find(name));
    if(nameToIdx != namesToIdxs.end())
    {
        char partname(type == U_Vertex ? 'u' : 'v');
        throw std::string("MaxMatch::addVertex(const VertexType& type, const string& name): A ") + partname +
                          " vertex already exists with the specified name.";
    }

    VertexIndex idx(vertexes.size());
    vertexes.push_back(Vertex(type, name, idx));
    namesToIdxs[name] = idx;
}

void MaxMatch::addEdge(const std::string& u_vertexName, const std::string& v_vertexName)
{
    VertexNamesToIndexes::iterator nameToIdx(m_u_vertNamesToIdxs.find(u_vertexName));
    if(nameToIdx == m_u_vertNamesToIdxs.end())
    {
        throw std::string("MaxMatch::addEdge(const string& u_vertexName, const string& v_vertexName): "
                          "no u vertex with u_vertexName exists.");
    }
    VertexIndex uIdx(nameToIdx->second);
    Vertex& u(m_u_vertexes[uIdx]);

    nameToIdx = m_v_vertNamesToIdxs.find(v_vertexName);
    if(nameToIdx == m_v_vertNamesToIdxs.end())
    {
        throw std::string("MaxMatch::addEdge(const string& u_vertexName, const string& v_vertexName): "
                          "no v vertex with v_vertexName exists.");
    }
    VertexIndex vIdx(nameToIdx->second);
    Vertex& v(m_v_vertexes[vIdx]);

    EdgeIndex edgeIdx(m_edges.size());
    m_edges.push_back(Edge(uIdx, vIdx, edgeIdx));
    u.edges.push_back(edgeIdx);
    v.edges.push_back(edgeIdx);
}

const MaxMatch::VertexIndexes& MaxMatch::us_to_vs() const
{
    return m_us_to_vs;
}

const MaxMatch::VertexIndexes& MaxMatch::vs_to_us() const
{
    return m_vs_to_us;
}

int MaxMatch::hopcoftKarp()
{
    int matches(0);

    m_layers.resize(m_u_vertexes.size());

    m_us_to_vs.resize(m_u_vertexes.size());
    std::fill(m_us_to_vs.begin(), m_us_to_vs.end(), NillVertIdx);

    m_vs_to_us.resize(m_v_vertexes.size());
    std::fill(m_vs_to_us.begin(), m_vs_to_us.end(), NillVertIdx);

    VertexIndex uIdx, uIdxEnd(m_u_vertexes.size());
    while(makeLayers())
    {
        for(uIdx = 1; uIdx < uIdxEnd; ++uIdx)
        {
            if(m_us_to_vs[uIdx] == NillVertIdx)
            {
                if(findPath(uIdx))
                {
                    ++matches;
                }
            }
        }
    }

    return matches;
}

bool MaxMatch::makeLayers()
{
    std::list<VertexIndex> searchQ;
    VertexIndex uIdx, uIdxEnd(m_u_vertexes.size()), nextUIdx, vIdx;
    // Put all free u vertexes in layer 0 and queue them for searching
    for(uIdx = 1; uIdx < uIdxEnd; ++uIdx)
    {
        if(m_us_to_vs[uIdx] == NillVertIdx)
        {
            m_layers[uIdx] = 0;
            searchQ.push_front(uIdx);
        }
        else
        {
            m_layers[uIdx] = InfLayer;
        }
    }
    m_layers[NillVertIdx] = InfLayer;
    EdgeIndexes::const_iterator edgeIdx;
    while(!searchQ.empty())
    {
        uIdx = searchQ.back();
        searchQ.pop_back();
        EdgeIndexes& edges(m_u_vertexes[uIdx].edges);
        for(edgeIdx = edges.cbegin(); edgeIdx != edges.cend(); ++edgeIdx)
        {
            vIdx = m_edges[*edgeIdx].v_vertex;
            nextUIdx = m_vs_to_us[vIdx];
            Layer& nextULayer(m_layers[nextUIdx]);
            if(nextULayer == InfLayer)
            {
                nextULayer = m_layers[uIdx] + 1;
                searchQ.push_front(nextUIdx);
            }
        }
    }
    // If an augmenting path exists, m_layers[NillVertexIdx] represents the depth of the findPath DFS when the u vertex
    // at the end of an augmenting path is found.  Otherwise, this value is InfLayer.
    return m_layers[NillVertIdx] != InfLayer;
}

bool MaxMatch::findPath(const VertexIndex& uIdx)
{
    if(uIdx == NillVertIdx)
    {
        return true;
    }
    else
    {
        VertexIndex vIdx;
        Layer nextULayer(m_layers[uIdx] + 1);
        EdgeIndexes& edges(m_u_vertexes[uIdx].edges);
        for(EdgeIndexes::const_iterator edgeIdx(edges.begin()); edgeIdx != edges.end(); ++edgeIdx)
        {
            vIdx = m_edges[*edgeIdx].v_vertex;
            VertexIndex& nextUIdx(m_vs_to_us[vIdx]);
            if(m_layers[nextUIdx] == nextULayer)
            {
                if(findPath(nextUIdx))
                {
                    // This edge belongs to an augmenting path - add it to the matching set
                    nextUIdx = uIdx;
                    m_us_to_vs[uIdx] = vIdx;
                    return true;
                }
            }
        }
        // m_u_vertexes[uIdx] does not belong to an augmenting path and need not be searched further during the DFS
        // phase
        m_layers[uIdx] = InfLayer;
        return false;
    }
}

Main.cpp
// Copyright Erik Hvatum 2012
// License: GPL v2

#include "MaxMatch.h"
#include <iostream>

int main(int, char**)
{
    try
    {
        MaxMatch m;

        m.addVertex(m.U_Vertex, "A");
        m.addVertex(m.U_Vertex, "B");
        m.addVertex(m.U_Vertex, "C");
        m.addVertex(m.U_Vertex, "D");
        m.addVertex(m.U_Vertex, "E");
        m.addVertex(m.U_Vertex, "F");
        m.addVertex(m.U_Vertex, "G");
        m.addVertex(m.U_Vertex, "H");
        m.addVertex(m.U_Vertex, "I");
        m.addVertex(m.U_Vertex, "J");
        m.addVertex(m.U_Vertex, "K");
        m.addVertex(m.U_Vertex, "L");
        m.addVertex(m.V_Vertex, "1");
        m.addVertex(m.V_Vertex, "2");
        m.addVertex(m.V_Vertex, "3");
        m.addVertex(m.V_Vertex, "4");
        m.addVertex(m.V_Vertex, "5");
        m.addVertex(m.V_Vertex, "6");
        m.addVertex(m.V_Vertex, "7");
        m.addVertex(m.V_Vertex, "8");
        m.addVertex(m.V_Vertex, "9");
        m.addVertex(m.V_Vertex, "10");
        m.addVertex(m.V_Vertex, "11");

        m.addEdge("A", "1");
        m.addEdge("A", "2");
        m.addEdge("A", "3");
        m.addEdge("B", "1");
        m.addEdge("B", "4");
        m.addEdge("B", "5");
        m.addEdge("C", "1");
        m.addEdge("C", "3");
        m.addEdge("D", "1");
        m.addEdge("D", "5");
        m.addEdge("E", "6");
        m.addEdge("E", "7");
        m.addEdge("E", "8");
        m.addEdge("F", "3");
        m.addEdge("F", "5");
        m.addEdge("G", "6");
        m.addEdge("G", "8");
        m.addEdge("H", "7");
        m.addEdge("I", "9");
        m.addEdge("J", "10");
        m.addEdge("K", "11");

        int c(m.hopcoftKarp());
        std::cout << "Match size: " << c << std::endl;

        MaxMatch::VertexIndex uIdx(0);
        for ( MaxMatch::VertexIndexes::const_iterator u_to_v(m.us_to_vs().begin());
              u_to_v != m.us_to_vs().end();
              ++u_to_v, ++uIdx )
        {
            std::cout << m.u_vertexes()[uIdx].name << " -> " << m.v_vertexes()[*u_to_v].name << std::endl;
        }
    }
    catch(const std::string &e)
    {
        std::cerr << e << std::endl;
        return -1;
    }

    return 0;
}

Note that you need to use -std=c++0x to compile this with g++ 4.5.3-r12 p1.5. Hopefully later versions will default to 11x syntax once it's all implemented.

A version of main.cpp accepting input from YAML
// Copyright Erik Hvatum 2012
// License: GPL v2

#include <yaml-cpp/yaml.h>
#include <fstream>
#include <iostream>
#include <set>
#include "MaxMatch.h"

using namespace std;

int main(int argc, char** argv)
{
    try
    {
        ifstream yamlInFile;
        istream* yamlInStream(NULL);
        if(argc == 2)
        {
            yamlInFile.open(argv[1]);
            if(!yamlInFile.good())
            {
                throw string("Failed to open input file \"") + argv[1] + "\".";
            }
            yamlInStream = &yamlInFile;
        }
        else
        {
            yamlInStream = &cin;
        }
        YAML::Parser yparser(*yamlInStream);
        YAML::Node root;
        yparser.GetNextDocument(root);

        MaxMatch m;
        set<string> addedUVerts, addedVVerts;
        string u, v;

        const YAML::Node& bipartite(root["bipartite"]);
        for(YAML::Iterator assoc(bipartite.begin()); assoc != bipartite.end(); ++assoc)
        {
            assoc.first() >> u;
            if(addedUVerts.find(u) == addedUVerts.end())
            {
                m.addVertex(MaxMatch::U_Vertex, u);
                addedUVerts.insert(u);
            }
            const YAML::Node& adjs(assoc.second());
            for(YAML::Iterator adj(adjs.begin()); adj != adjs.end(); ++adj)
            {
                *adj >> v;
                if(addedVVerts.find(v) == addedVVerts.end())
                {
                    m.addVertex(MaxMatch::V_Vertex, v);
                    addedVVerts.insert(v);
                }
                m.addEdge(u, v);
            }
        }

        int c(m.hopcoftKarp());
        cout << "Match size: " << c << endl;

        MaxMatch::VertexIndex uIdx(0);
        for ( MaxMatch::VertexIndexes::const_iterator u_to_v(m.us_to_vs().begin());
              u_to_v != m.us_to_vs().end();
              ++u_to_v, ++uIdx )
        {
            cout << m.u_vertexes()[uIdx].name << " -> " << m.v_vertexes()[*u_to_v].name << endl;
        }
    }
    catch(const string &e)
    {
        cerr << e << endl;
        return -1;
    }
    catch(YAML::Exception& e)
    {
        cout << e.what() << endl;
        return -1;
    }

    return 0;
}

input.yaml
---
bipartite:
    A: [1, 2, 3]
    B: [1, 4, 5]
    C: [1, 3]
    D: [1, 5]
    E: [6, 7, 8]
    F: [3, 5]
    G: [6, 8]
    H: [7]
    I: [9]
    J: [10]
    K: [11]
    L: []
...

A version of main.cpp that takes input from YAML and uses the boost graph library to find a maximum matching for speed comparison purposes and to verify that the maximum matching set found by my implementation is a valid maximum match. Note that the YAML input format is a little different; input.yaml for this would begin bipartite: with adjancies: indented on the following line and A:... indented twice.
// Copyright Erik Hvatum 2012
// License: GPL v2

#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/max_cardinality_matching.hpp>
#include <boost/timer/timer.hpp>
#include <yaml-cpp/yaml.h>
#include <fstream>
#include <iostream>
#include <set>
#include "MaxMatch.h"

using namespace std;

void loadFromYaml(const char* yamlFN, MaxMatch& m)
{
    boost::timer::auto_cpu_timer t;

    ifstream yamlInFile;
    istream* yamlInStream(NULL);
    if(yamlFN != NULL)
    {
        yamlInFile.open(yamlFN);
        if(!yamlInFile.good())
        {
            throw string("Failed to open input file \"") + yamlFN + "\".";
        }
        yamlInStream = &yamlInFile;
    }
    else
    {
        yamlInStream = &cin;
    }
    YAML::Parser yparser(*yamlInStream);
    YAML::Node root;
    yparser.GetNextDocument(root);

    set<string> addedUVerts, addedVVerts;
    string u, v;

    const YAML::Node& bipartite(root["bipartite"]);
    const YAML::Node& adjacencies(bipartite["adjacencies"]);
    for(YAML::Iterator assoc(adjacencies.begin()); assoc != adjacencies.end(); ++assoc)
    {
        assoc.first() >> u;
        if(addedUVerts.find(u) == addedUVerts.end())
        {
            m.addVertex(MaxMatch::U_Vertex, u);
            addedUVerts.insert(u);
        }
        const YAML::Node& adjs(assoc.second());
        for(YAML::Iterator adj(adjs.begin()); adj != adjs.end(); ++adj)
        {
            *adj >> v;
            if(addedVVerts.find(v) == addedVVerts.end())
            {
                m.addVertex(MaxMatch::V_Vertex, v);
                addedVVerts.insert(v);
            }
            m.addEdge(u, v);
        }
    }
    const YAML::Node* orphanVs(bipartite.FindValue("orphan Vs"));
    if(orphanVs != NULL)
    {
        for(YAML::Iterator orphanV(orphanVs->begin()); orphanV != orphanVs->end(); ++orphanV)
        {
            *orphanV >> v;
            m.addVertex(MaxMatch::V_Vertex, v);
        }
    }
}

template <typename Graph, typename MateMap, typename VertexIndexMap>
inline bool checkmatching(const Graph& g, MateMap mate, VertexIndexMap vm)
{
    return boost::maximum_cardinality_matching_verifier<Graph,MateMap,VertexIndexMap>::verify_matching(g,mate,vm);
}

template <typename Graph, typename MateMap>
inline bool checkmatching(const Graph& g, MateMap mate)
{
    return checkmatching(g, mate, get(boost::vertex_index,g));
}

void boostMatcher(const MaxMatch m)
{
    using namespace boost;

    typedef adjacency_list<vecS, vecS, undirectedS> my_graph;

    const MaxMatch::Edges& mEdges(m.edges());
    const MaxMatch::Vertexes& mUVerts(m.u_vertexes());
    const MaxMatch::Vertexes& mVVerts(m.v_vertexes());

    my_graph g(mUVerts.size() + mVVerts.size() - 2);

    MaxMatch::VertexIndex offset(mUVerts.size() - 1);
    for(MaxMatch::Edges::const_iterator edge(mEdges.cbegin()); edge != mEdges.cend(); ++edge)
    {
        add_edge(edge->u_vertex - 1, offset + edge->v_vertex - 1, g);
    }

    cout << "executing Edmonds maximim cardinality" << endl;
    std::vector<graph_traits<my_graph>::vertex_descriptor> mate(mUVerts.size() + mVVerts.size() - 2);
    {
        boost::timer::auto_cpu_timer t;
        edmonds_maximum_cardinality_matching(g, &mate[0]);
    }

    std::cout << "Edmonds maximim cardinality match size: " << matching_size(g, &mate[0]) << std::endl;

    MaxMatch::VertexIndex u(0), v(offset);
    for ( MaxMatch::VertexIndexes::const_iterator u_to_v(m.us_to_vs().cbegin() + 1);
          u_to_v != m.us_to_vs().cend();
          ++u_to_v, ++u )
    {
        if(*u_to_v == 0)
        {
            mate[u] = numeric_limits<size_t>::max();
        }
        else
        {
            mate[u] = offset + *u_to_v - 1;
        }
    }
    for ( MaxMatch::VertexIndexes::const_iterator v_to_u(m.vs_to_us().cbegin() + 1);
          v_to_u != m.vs_to_us().cend();
          ++v_to_u, ++v )
    {
        if(*v_to_u == 0)
        {
            mate[v] = numeric_limits<size_t>::max();
        }
        else
        {
            mate[v] = *v_to_u - 1;
        }
    }

    cout << "checking for max match" << endl;
    bool matched;
    {
        boost::timer::auto_cpu_timer t;
        matched = checkmatching(g, &mate[0]);
    }
    cout << (matched ? "Hopcroft-Karp found max match" : "Hopcroft-Karp DID NOT find max match") << endl;
}

int main(int argc, char** argv)
{
    try
    {
        MaxMatch m;
        cout << "loading input" << endl;
        loadFromYaml(argc == 2 ? argv[1] : NULL, m);

        int c;
        cout << "executing Hopcroft-Karp" << endl;
        {
            boost::timer::auto_cpu_timer t;
            c = m.hopcoftKarp();
        }
        cout << "Hopcroft-Karp match size: " << c << endl;

//      MaxMatch::VertexIndex uIdx(0);
//      for ( MaxMatch::VertexIndexes::const_iterator u_to_v(m.us_to_vs().begin());
//            u_to_v != m.us_to_vs().end();
//            ++u_to_v, ++uIdx )
//      {
//          cout << m.u_vertexes()[uIdx].name << " -> " << m.v_vertexes()[*u_to_v].name << endl;
//      }

        boostMatcher(m);
    }
    catch(const string &e)
    {
        cerr << e << endl;
        return -1;
    }
    catch(YAML::Exception& e)
    {
        cout << e.what() << endl;
        return -1;
    }

    return 0;
}

Archives

July 2009   August 2009   September 2009   October 2009   November 2009   December 2009   January 2010   September 2010   December 2010   January 2011   February 2011   April 2011   June 2011   August 2011   February 2012   June 2012   July 2012   August 2012   October 2012   November 2012   January 2014   April 2014   June 2014   August 2014   September 2014   October 2014   January 2015   March 2015   April 2015   June 2015   November 2015   December 2015   January 2016   June 2016   August 2016   January 2017   March 2017   April 2018   April 2019   June 2019   January 2020  

This page is powered by Blogger. Isn't yours?

Subscribe to Posts [Atom]