So I'm working on a project that solves the Kevin Bacon problem. The code works fine for a small file size, but I have to read in the entire IMDB movie database file, which contains thousands of movies. I have a graph where the vertices are the actors. I then create edges between the actors if they were in the same movie. However, I think my problem lies in creating the vertices because it is creating a string for each vertex, and so it quickly runs out of memory.
Code:
#ifndef GRAPHSTUFF_H_
#define GRAPHSTUFF_H_
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <list>
#include <vector>
using namespace std;
struct Vertex;
struct Edge
{
// First vertex in edge is implicit
Vertex *dest; // Second vertex in edge
double cost; // Edge cost
Edge( Vertex *d = 0, double c = 0.0 )
: dest( d ), cost( c ) { }
};
// Structure stored in priority queue for Dijkstra's algorithm.
struct Path
{
Vertex *dest; // w
double cost; // d(w)
Path( Vertex *d = 0, double c = 0.0 )
: dest( d ), cost( c ) { }
bool operator> ( const Path & rhs ) const
{ return cost > rhs.cost; }
bool operator< ( const Path & rhs ) const
{ return cost < rhs.cost; }
};
const int INFINITY = std::numeric_limits<int>::max();
// Basic info for each vertex.
struct Vertex
{
std::string name; // Vertex name
vector<Edge> adj; // Adjacent vertices (and costs)
double dist; // Cost
Vertex *prev; // Previous vertex on shortest path
int scratch; // Extra variable used in algorithm
Vertex( const std::string & nm ) : name( nm )
{ reset( ); }
void reset( )
{ dist = INFINITY; prev = NULL; scratch = 0; }
};
#endif /*GRAPHSTUFF_H_*/
Is there any way to improve this?