Skip to content

Instantly share code, notes, and snippets.

@emiliorizzo
Last active March 10, 2017 01:55
Show Gist options
  • Save emiliorizzo/93a54baebe8a61e55b14a48d362cb9d8 to your computer and use it in GitHub Desktop.
Save emiliorizzo/93a54baebe8a61e55b14a48d362cb9d8 to your computer and use it in GitHub Desktop.
Facebook API force to scrape url list

Facebook API force to scrape url list

Updates facebook cached pages.

How to use

fb-scrape.sh -t TOKEN -f URL-LIST

Get a facebook access token

https://developers.facebook.com/tools/debug/accesstoken

Generate url list file from sitemap

 curl -s sitemap_url | grep "<loc>" | awk -F"<loc>" '{print $2} ' | awk -F"</loc>" '{print $1}' 

 
 
#!/bin/bash
# Colors
c_normal=$(tput sgr0)
c_warn="\033[02;38;5;208m"
c_error="\033[01;38;5;196m"
c_brand="\033[02;38;5;47m"
c_nice="\033[02;38;5;197m"
c_info="\033[02;38;5;44m"
t_bold=$(tput bold)
t_soft=$(tput dim)
t_under=$(tput smul)
# Functions ----------
function scrape {
printf "${c_info}${lnumber} - ${c_brand}${t_bold}${url}${c_normal}${c_warn} \n"
url="$1"
curl -X POST \
-F "id=${url}" \
-F "scrape=true" \
-F "access_token=${token}" \
"https://graph.facebook.com"
printf "${c_normal} \n"
}
function showHelp {
printf "${c_info} Parameters: -t <facebook-token> -f <url-list-file>${c_normal} \n"
}
# Start -------------
while [[ $# > 0 ]]
do
key="$1"
case $key in
-t|--token)
token="$2"
shift # past argument
;;
-f|--file)
urlFile="$2"
shift # past argument
;;
*)
# unknown option
showHelp
;;
esac
shift # past argument or value
done
if [ "$token" != '' ] && [ "$urlFile" != '' ]; then
if [ -f $urlFile ]; then
lnumber=1
while IFS='' read -r line || [[ -n "$line" ]];do
#trim
url=$(echo $line)
if [ "$url" != '' ]; then
scrape ${url}
fi
((lnumber++))
done < "$urlFile"
else
printf "${c_brand}The file: ${c_warn}${urlFile} ${c_brand}does not exists${c_normal}\n"
fi
else
showHelp
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment