Skip to content

Instantly share code, notes, and snippets.

@OzTamir
Last active August 29, 2015 14:06
Show Gist options
  • Select an option

  • Save OzTamir/4f57a8ec96a323c975b2 to your computer and use it in GitHub Desktop.

Select an option

Save OzTamir/4f57a8ec96a323c975b2 to your computer and use it in GitHub Desktop.
Parse links from HTML <a> tags from an input file
#include <iostream>
#include <fstream>
#include <string>
#include <regex>
using namespace std;
void parseLinks(string file) {
// Pattern of a HTML link tag
string patt = "(<a href\\s*=\\s*)(\")([^<\"]*)";
// Regex object to match this pattern
regex link (patt);
// Create an empty iterator so that we will know when should we stop
const sregex_token_iterator end;
// This is used to determinate wheter there were matches and print messages accordingly
bool matchesFound = false;
// This is used to only capture the third group, aka the address
vector<int> v;
// This makes it the 3rd group
v.push_back(3);
for (sregex_token_iterator i(file.begin(), file.end(), link, v);
i != end;
++i)
{
// If it's the first match, print a message and update the boolean var
if(!matchesFound) {
cout << "Matches found:" << endl;
matchesFound = true;
}
// Print a match
cout << (*i).str() << endl;
}
// If no matches were found, print a message
if(!matchesFound) {
cout << "No matches found." << endl;
}
}
int main () {
string file;
string line;
string filename;
// Get a filename from the user
cout << "Please enter a file name:" << endl;
cin >> filename;
ifstream myfile (filename);
// If the file is valid, open it
if (myfile.is_open()) {
// Read the lines into a string
while ( getline (myfile,line) ) {
file += line + "\n";
}
// Close the file
myfile.close();
// Find all the links in the file
parseLinks(file);
}
// If the filename is not valid, print an error
else
cout << "Unable to open file" << endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment