Created
November 28, 2012 03:19
-
-
Save macintux/4158835 to your computer and use it in GitHub Desktop.
Generate list of HTTP redirects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%% @author John Daily <[email protected]> | |
%%% @copyright (C) 2012, John Daily | |
%%% @doc | |
%%% The httpc module in inets does not appear to offer any way to | |
%%% determine the "real" URL if it follows a series of redirects | |
%%% during its request. This code addresses that gap. | |
%%% | |
%%% The code does not interpret any HTML-based redirect, such as a | |
%%% meta refresh. | |
%%% | |
%%% It may be useful to wrap httpc:request so that the list of URLs | |
%%% is included in the standard result set, but that exercise is | |
%%% left for posterity. | |
%%% @end | |
%%% Created : 27 Nov 2012 by John Daily <[email protected]> | |
-module(chaseurls). | |
-compile(export_all). | |
-define(MAXREDIRECTS, 10). | |
%% Will return a tuple, with one of these atoms as first member: | |
%% * ok | |
%% * brokenchain (received a 4XX or 5XX status error when requesting a URL) | |
%% * toomany (too many redirects) | |
%% | |
%% The 2nd member is a list of URLs in reverse order. The URL | |
%% provided with the original call will always be the last member, and | |
%% the last URL encountered will be the first. | |
%% | |
%% Examples: | |
%% chase("http://some-bad-url") -> { brokenchain, [ "http://some-bad-url" ] } | |
%% chase("http://some-valid-url") -> { ok, [ "http://final-redirect", "http://intermediate-redirect", | |
%% "http://some-valid=url" ] } | |
%% chase("http://some-looping-url1") -> { toomany, [ "http://some-looping-url2", | |
%% "http://some-looping-url1", | |
%% "http://some-looping-url2", | |
%% "http://some-looping-url1", | |
%% "http://some-looping-url2",... ]} | |
chase(URL) -> | |
inets:start(), %% will silently fail if already started | |
chase({url, URL}, ?MAXREDIRECTS, []). | |
%% Allow the caller to specify the # of redirects before halting | |
chase(URL, MaxRedirects) -> | |
inets:start(), %% will silently fail if already started | |
chase({url, URL}, MaxRedirects, []). | |
%% Do not reverse the results; we want the last URL to be head of the | |
%% list in case that's all the caller wants | |
chase(done, _Count, URLs) -> | |
{ok, URLs}; | |
chase({_, nolocation}, _Count, URLs) -> | |
{brokenchain, URLs}; | |
chase({url, URL}, 0, URLs) -> | |
{toomany, [URL] ++ URLs}; | |
chase({url, URL}, Count, Previous) -> | |
chase(process_result(httpc:request(get, {URL, []}, [{autoredirect, false}], [])), | |
Count - 1, | |
[URL] ++ Previous). | |
process_result({ok, {{_Version, Status, _Reason}, _Headers, _}}) when Status >= 200, Status < 300 -> | |
done; | |
process_result({ok, {{_Version, Status, _Reason}, Headers, _}}) when Status >= 300, Status < 400 -> | |
{url, proplists:get_value("location", Headers, nolocation) }; | |
process_result({ok, {{_Version, Status, _Reason}, _Headers, _}}) when Status >= 400 -> | |
{error, nolocation}. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment