Last active
August 29, 2015 14:06
-
-
Save OzTamir/4f57a8ec96a323c975b2 to your computer and use it in GitHub Desktop.
Parse links from HTML <a> tags from an input file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <fstream> | |
| #include <string> | |
| #include <regex> | |
| using namespace std; | |
| void parseLinks(string file) { | |
| // Pattern of a HTML link tag | |
| string patt = "(<a href\\s*=\\s*)(\")([^<\"]*)"; | |
| // Regex object to match this pattern | |
| regex link (patt); | |
| // Create an empty iterator so that we will know when should we stop | |
| const sregex_token_iterator end; | |
| // This is used to determinate wheter there were matches and print messages accordingly | |
| bool matchesFound = false; | |
| // This is used to only capture the third group, aka the address | |
| vector<int> v; | |
| // This makes it the 3rd group | |
| v.push_back(3); | |
| for (sregex_token_iterator i(file.begin(), file.end(), link, v); | |
| i != end; | |
| ++i) | |
| { | |
| // If it's the first match, print a message and update the boolean var | |
| if(!matchesFound) { | |
| cout << "Matches found:" << endl; | |
| matchesFound = true; | |
| } | |
| // Print a match | |
| cout << (*i).str() << endl; | |
| } | |
| // If no matches were found, print a message | |
| if(!matchesFound) { | |
| cout << "No matches found." << endl; | |
| } | |
| } | |
| int main () { | |
| string file; | |
| string line; | |
| string filename; | |
| // Get a filename from the user | |
| cout << "Please enter a file name:" << endl; | |
| cin >> filename; | |
| ifstream myfile (filename); | |
| // If the file is valid, open it | |
| if (myfile.is_open()) { | |
| // Read the lines into a string | |
| while ( getline (myfile,line) ) { | |
| file += line + "\n"; | |
| } | |
| // Close the file | |
| myfile.close(); | |
| // Find all the links in the file | |
| parseLinks(file); | |
| } | |
| // If the filename is not valid, print an error | |
| else | |
| cout << "Unable to open file" << endl; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment