Created
September 2, 2012 13:03
-
-
Save benwilson512/3598504 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'json' | |
SITE = "http://www.righttofoodindia.org/" | |
links = JSON.parse(File.open("map.json").read) | |
# links.each do |link| | |
# filename = link["name"].downcase.gsub(" ", "_").gsub("'", "").gsub("/", "") | |
# category = link["uri"].split("/").first | |
# url = SITE + link["uri"] | |
# command = "curl #{url} -o ./stuff/#{filename}.html" | |
# `#{command}` | |
# end | |
categories = [] | |
links.each do |link| | |
category = link["uri"].split("/").first | |
categories << category unless category.include?("html") | |
end | |
categories = categories.uniq | |
values = [] | |
total = 61877.0 | |
categories.each do |cat| | |
results = `wc -w stuff/#{cat}/*` | |
value = results.split("\n").last.split(" ").first.to_f | |
values << value | |
puts "#{cat}: #{((value/total) * 100).round(1) }%" | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def process(root) | |
Dir.glob("#{root}/*").each do |obj| | |
new_path = obj.gsub("stuff/", "text/") | |
if obj.include?("html") | |
text = `sed -n '/^$/!{s/<[^>]*>//g;p;}' #{obj}` | |
File.open(new_path, "w") do |file| | |
file.syswrite(text) | |
end | |
else | |
`mkdir #{new_path}` | |
process(obj) | |
end | |
end | |
end | |
process("stuff") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"uri": "index.html", | |
"name": "Home Page" | |
}, | |
{ | |
"uri": "latest.html", | |
"name": "Latest additions" | |
}, | |
{ | |
"uri": "campaign/campaign.html", | |
"name": "Introduction" | |
}, | |
{ | |
"uri": "campaign/disclaimer.html", | |
"name": "Disclaimer" | |
}, | |
{ | |
"uri": "foundation.html", | |
"name": "Foundation Statement" | |
}, | |
{ | |
"uri": "campaign/secretariat.html", | |
"name": "About the Secretariat" | |
}, | |
{ | |
"uri": "contactus.html", | |
"name": "Contact Addresses" | |
}, | |
{ | |
"uri": "links/updates.html", | |
"name": "Campaign Updates" | |
}, | |
{ | |
"uri": "fin/fin_intro.html", | |
"name": "Finance and Accounts" | |
}, | |
{ | |
"uri": "links/links.html", | |
"name": "Useful links" | |
}, | |
{ | |
"uri": "rtowork/ega_intro.html", | |
"name": "Introduction" | |
}, | |
{ | |
"uri": "rtowork/ega_latest_activities.html", | |
"name": "Current Highlights" | |
}, | |
{ | |
"uri": "rtowork/ega_keydocs.html", | |
"name": "Key Documents" | |
}, | |
{ | |
"uri": "rtowork/ega_events.html", | |
"name": "Events" | |
}, | |
{ | |
"uri": "rtowork/ega_rozgarupdates.html", | |
"name": "Rozgar Updates" | |
}, | |
{ | |
"uri": "rtowork/ega_articles.html", | |
"name": "Articles on EGA" | |
}, | |
{ | |
"uri": "rtowork/ega_news.html", | |
"name": "EGA in the News" | |
}, | |
{ | |
"uri": "rtowork/ega_briefing.html", | |
"name": "Campaign Materials" | |
}, | |
{ | |
"uri": "rtowork/ega_archives.html", | |
"name": "Archives" | |
}, | |
{ | |
"uri": "mdm/mdm_intro.html", | |
"name": "Mid-day Meals" | |
}, | |
{ | |
"uri": "mdm/mdm_scorders.html", | |
"name": "Supreme Court Orders" | |
}, | |
{ | |
"uri": "mdm/mdm_comrs.html", | |
"name": "Commissioners' Interventions" | |
}, | |
{ | |
"uri": "mdm/mdm_events.html", | |
"name": "Events" | |
}, | |
{ | |
"uri": "mdm/mdm_glines.html", | |
"name": "Mid-day Meal Guidelines" | |
}, | |
{ | |
"uri": "mdm/mdm_surveys.html", | |
"name": "Field Surveys" | |
}, | |
{ | |
"uri": "mdm/mdm_articles.html", | |
"name": "Articles" | |
}, | |
{ | |
"uri": "mdm/mdm_campaignmaterials.html", | |
"name": "Campaign Materials" | |
}, | |
{ | |
"uri": "mdm/mdm_news.html", | |
"name": "MDMs in the news" | |
}, | |
{ | |
"uri": "right_to_food_act_intro.html", | |
"name": "Right to Food Act" | |
}, | |
{ | |
"uri": "right_to_food_act_events.html", | |
"name": "Events" | |
}, | |
{ | |
"uri": "right_to_food_act_key_docs.html", | |
"name": "Key Documents" | |
}, | |
{ | |
"uri": "right_to_food_act.html", | |
"name": "Articles" | |
}, | |
{ | |
"uri": "icds/icds_index.html", | |
"name": "Integrated Child Development Services ICDS" | |
}, | |
{ | |
"uri": "icds/icds_orders.html", | |
"name": "Supreme Court Orders on ICDS" | |
}, | |
{ | |
"uri": "icds/icds_comrs_interventions.html", | |
"name": "Commissioners' Interventions" | |
}, | |
{ | |
"uri": "icds/icds_comrs_reports.html", | |
"name": "ICDS in Commissioner's Reports" | |
}, | |
{ | |
"uri": "icds/icds_nac.html", | |
"name": "ICDS in the National Advisory Council" | |
}, | |
{ | |
"uri": "icds/icds_glines.html", | |
"name": "Official ICDS Documents" | |
}, | |
{ | |
"uri": "icds/icds_surveys.html", | |
"name": "Field Surveys" | |
}, | |
{ | |
"uri": "icds/icds_articles.html", | |
"name": "Articles" | |
}, | |
{ | |
"uri": "icds/icds_events.html", | |
"name": "ICDS events" | |
}, | |
{ | |
"uri": "icds/icds_news.html", | |
"name": "ICDS in the news" | |
}, | |
{ | |
"uri": "pds/pds_intro.html", | |
"name": "Public Distribution System" | |
}, | |
{ | |
"uri": "pds/pds_articles.html", | |
"name": "Articles" | |
}, | |
{ | |
"uri": "case/case.html", | |
"name": "The 'Right to Food' case" | |
}, | |
{ | |
"uri": "orders/interimorders.html", | |
"name": "Supreme Court Orders" | |
}, | |
{ | |
"uri": "comrs/comrs_intro.html", | |
"name": "Supreme Court Commissioners" | |
}, | |
{ | |
"uri": "campaign/camp_primers.html", | |
"name": "Primers" | |
}, | |
{ | |
"uri": "campaign/camp_postersplays.html", | |
"name": "Posters and Plays" | |
}, | |
{ | |
"uri": "campaign/camp_pamphlets.html", | |
"name": "Pamphlets" | |
}, | |
{ | |
"uri": "campaign/camp_background.html", | |
"name": "Background Material" | |
}, | |
{ | |
"uri": "hindi/hindi_main.html", | |
"name": "Hindi section" | |
}, | |
{ | |
"uri": "hindi/campaign.html", | |
"name": "About the Campaign" | |
}, | |
{ | |
"uri": "hindi/legal.html", | |
"name": "Legal Action" | |
}, | |
{ | |
"uri": "hindi/child.html", | |
"name": "Children's right to food" | |
}, | |
{ | |
"uri": "hindi/ega.html", | |
"name": "Employment Guarantee" | |
}, | |
{ | |
"uri": "hindi/pds.html", | |
"name": "Public Distribution System" | |
}, | |
{ | |
"uri": "hindi/primers.html", | |
"name": "Primers and Posters" | |
}, | |
{ | |
"uri": "links/articles_intro.html", | |
"name": "Articles" | |
}, | |
{ | |
"uri": "links/field_reports.html", | |
"name": "Field Reports" | |
}, | |
{ | |
"uri": "research/research.html", | |
"name": "Field Surveys" | |
}, | |
{ | |
"uri": "research/social_audits.html", | |
"name": "Social Audits" | |
}, | |
{ | |
"uri": "rtowork/ega_news.html", | |
"name": "EGA in the News" | |
}, | |
{ | |
"uri": "mdm/mdm_news.html", | |
"name": "MDMs in the news" | |
}, | |
{ | |
"uri": "icds/icds_news.html", | |
"name": "ICDS in the news" | |
}, | |
{ | |
"uri": "links/links.html", | |
"name": "Useful links" | |
}, | |
{ | |
"uri": "events/kolkataconvention.html", | |
"name": "Kolkata Convention on the Right to Food and Work" | |
}, | |
{ | |
"uri": "rtowork/ray-intro.html", | |
"name": "Rozgar Adhikar Yatra" | |
}, | |
{ | |
"uri": "rtowork/banner.html", | |
"name": "Banner project" | |
}, | |
{ | |
"uri": "rtowork/egaconvention.html", | |
"name": "19 September 2004 Convention on Right to Work" | |
}, | |
{ | |
"uri": "events/bhopalconvention/bhopalmeeting.html", | |
"name": "Bhopal Convention" | |
}, | |
{ | |
"uri": "mdm/action.html", | |
"name": "Action day on Mid-day Meals April 2002" | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment