Last active
August 8, 2020 03:04
-
-
Save jmoyers/242c1c68adb549849714f1c715594cd7 to your computer and use it in GitHub Desktop.
Futures vs Shared Memory/Locks/Conditional Variable
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <future> | |
#include <iostream> | |
#include <string> | |
#include <unordered_set> | |
#include <queue> | |
using namespace std; | |
class Solution { | |
public: | |
string get_domain(string &url) { | |
auto pos = url.find('/', 7); | |
return url.substr(7, pos - 7); | |
} | |
vector<string> crawl(string startUrl, HtmlParser htmlParser) { | |
unordered_set<string> visited{startUrl}; | |
queue<future<vector<string>>> futures; | |
auto domain = get_domain(startUrl); | |
futures.push(std::async(launch::async, &HtmlParser::getUrls, &htmlParser, startUrl)); | |
while (!futures.empty()) { | |
auto urls = futures.front().get(); | |
futures.pop(); | |
for (auto &r : urls) { | |
if (get_domain(r) == domain and visited.find(r) == visited.end()) { | |
visited.insert(r); | |
futures.push(std::async(launch::async, &HtmlParser::getUrls, &htmlParser, r)); | |
} | |
} | |
} | |
vector<string> results(visited.begin(), visited.end()); | |
return results; | |
} | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <condition_variable> | |
#include <iostream> | |
#include <mutex> | |
#include <string> | |
#include <thread> | |
#include <unordered_set> | |
#include <vector> | |
using namespace std; | |
class Solution { | |
private: | |
string hostname; | |
int pending = 0; | |
mutex work_mutex; | |
condition_variable work_available; | |
vector<string> crawlable; | |
unordered_set<string> visited; | |
HtmlParser *parser; | |
string get_hostname(string url) { | |
return url.substr(7, url.find('/', 7) - 7); | |
} | |
bool check_hostname(string url, string hostname) { | |
return get_hostname(url) == hostname; | |
} | |
void worker() { | |
string url; | |
while (true) { | |
{ | |
// deque a url to crawl | |
unique_lock lock(work_mutex); | |
work_available.wait(lock, [&]() { | |
return !crawlable.empty() || (crawlable.empty() && pending == 0); | |
}); | |
if (crawlable.empty()) { | |
return; | |
} | |
url = crawlable.back(); | |
crawlable.pop_back(); | |
pending++; | |
} | |
vector<string> new_urls = parser->getUrls(url); | |
{ | |
scoped_lock lock(work_mutex); | |
for (auto &result : new_urls) { | |
if (visited.find(result) == visited.end() and | |
check_hostname(result, hostname)) { | |
crawlable.push_back(result); | |
visited.insert(result); | |
} | |
} | |
pending--; | |
work_available.notify_all(); | |
} | |
} | |
} | |
public: | |
vector<string> crawl(string startUrl, HtmlParser htmlParser) { | |
crawlable.push_back(startUrl); | |
visited.insert(startUrl); | |
hostname = get_hostname(startUrl); | |
parser = &htmlParser; | |
int num_threads = thread::hardware_concurrency(); | |
vector<thread> threads; | |
for (int i = 0; i < num_threads; i++) { | |
threads.emplace_back(&Solution::worker, this); | |
} | |
work_available.notify_all(); | |
for (auto &t : threads) | |
t.join(); | |
vector<string> results(visited.begin(), visited.end()); | |
return results; | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment