Skip to content

Instantly share code, notes, and snippets.

@willeccles
Last active March 26, 2019 20:31
Show Gist options
  • Save willeccles/cd27ffce59e605eef6f19e7e4ea2d75e to your computer and use it in GitHub Desktop.
Save willeccles/cd27ffce59e605eef6f19e7e4ea2d75e to your computer and use it in GitHub Desktop.
A simple parser for robots.txt files.
/* Tested with https://www.google.com/robots.txt */
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <vector>
#include <utility> // for std::pair
#include <cstdio> // for std::printf
inline std::string& str_trim(std::string& s) {
s.erase(s.find_last_not_of(" \t") + 1);
s.erase(0, s.find_first_not_of(" \t"));
return s;
};
int main(void) {
std::ifstream robots("robots.txt");
if (!robots) {
std::cerr << "Error opening robots.txt.\n";
return 1;
}
std::vector<std::pair<std::string, std::string> > values;
std::string line, lhs, rhs;
std::stringstream ss;
while (std::getline(robots, line)) {
ss = std::stringstream(line);
std::getline(ss, lhs, ':');
if (lhs.length() == line.length() || lhs[0] == '#')
continue;
std::getline(ss, rhs);
str_trim(lhs);
str_trim(rhs);
values.push_back({ lhs, rhs });
}
robots.close();
for (auto& p : values) {
// use printf for nice formatted output
std::printf("%-11s %s\n", (p.first + ':').c_str(), p.second.c_str());
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment