eligor13 · September 13, 2014 02:31
diff --git a/twpicdl-beta.sh b/twpicdl-beta.sh
 #!/bin/sh

 # Modified by Stan Schwertly to download locally rather than to send to Posterous. 
 # Github: http://github.com/Stantheman/Twitpic-Backup

 # Copyright 2010 Tim "burndive" of http://burndive.blogspot.com/
 # This software is licensed under the Creative Commons GNU GPL version 2.0 or later.
 # License informattion: http://creativecommons.org/licenses/GPL/2.0/

 # This script is a derivative of the original, obtained from here:
 # http://tuxbox.blogspot.com/2010/03/twitpic-to-posterous-export-script.html

 # Version1.2 [add retry]

 RUN_DATE=`date +%F--%H-%m-%S`

 TP_NAME=$1
 WORKING_DIR=$2

 IMG_DOWNLOAD=1
 PREFIX=twitpic-$TP_NAME
 HTML_OUT=$PREFIX-all-$RUN_DATE.html

 #CURL_OPT='-f --retry 3 --retry-delay 5 --retry-max-time 60'
 CURL_OPT='--retry 3 --retry-delay 5 --retry-max-time 60'

 # Checks the user-supplied arguments
 if [ -z "$TP_NAME" ]; then
  echo "You must supply a TP_NAME."
  exit
 fi

 if [ ! -d "$WORKING_DIR" ]; then
  echo "You must supply a WORKING_DIR."
  exit
 fi

 cd $WORKING_DIR

 # Checks for the directories it needs
 if [ ! -d "images" ]; then
  mkdir images;
 fi

 if [ ! -d "html" ]; then
  mkdir html;
 fi

 if [ ! -d "logs" ]; then
  mkdir logs;
 fi

 PAGE=0
 MAXRETRY=10
 RETRY=0

 LAST=`curl http://twitpic.com/photos/${TP_NAME} \
  | grep "<a href=.*>Last<" \
  | sed "s/.*\?page=\([0-9]*\).*/\1/"`
 if [ -z "$LAST" ]; then
  NEXT=`curl http://twitpic.com/photos/${TP_NAME} \
  | grep "<a href=.*>Next<" \
  | sed "s/.*\?page=\([0-9]*\).*/\1/"`
  if [ -z "$NEXT" ]; then
    PAGE=1
  else
    PAGE=$NEXT
  fi
 else
  PAGE=$LAST
 fi

 while [ $PAGE -ne 0 ]; do
  echo PAGE: $PAGE
  FILENAME="html/$PREFIX-page-$PAGE.html"
  echo "FILENAME=" $FILENAME
  echo "0 curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME"
  if [ ! -f "$FILENAME" ]; then
    echo "0"
 #	wget http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME
    echo "1 ${TP_NAME}?page=$PAGE -O $FILENAME"
    curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -o $FILENAME $CURL_OPT
    if [ $? -eq 22 -a $RETRY -le $MAXRETRY ]; then
 	  RETRY=`expr $RETRY + 1`
 	  sleep 1
    else
 	  RETRY=0
 	  PAGE=`expr $PAGE - 1`
    fi
  else
    RETRY=0
 	PAGE=`expr $PAGE - 1`
  fi
 done

 ALL_IDS=`cat html/$PREFIX-page-* | grep -Eo "<a href=\"/[a-zA-Z0-9]+\">" | grep -Eo "/[a-zA-Z0-9]+" | grep -Eo "[a-zA-Z0-9]+" | grep -v "sopapipa" | sort -r | uniq | xargs`

 COUNT=0
 LOG_FILE=logs/$PREFIX-log-$RUN_DATE.txt

 echo $ALL_IDS | tee -a $LOG_FILE

 for ID in $ALL_IDS; do
  COUNT=`expr $COUNT + 1`
  echo $ID: $COUNT | tee -a $LOG_FILE

  echo "Processing $ID..."
  FULL_HTML="html/$PREFIX-$ID-full.html"
 #  wget http://twitpic.com/$ID/full -O $FULL_HTML
  if [ ! -f "$FULL_HTML" ]; then
    RETRY=$MAXRETRY
    while [ $RETRY -ne 0 ]; do
      echo "2 " curl http://twitpic.com/$ID/full -O $FULL_HTML
      curl http://twitpic.com/$ID/full -o $FULL_HTML $CURL_OPT
      if [ $? -eq 22 ]; then
        RETRY=`expr $RETRY - 1`
        sleep 1
      else
        RETRY=0
      fi
    done
  fi

  FULL_URL=`grep "<img src" $FULL_HTML | grep -Eo "src=\"[^\"]*\"" | grep -Eo "https://[^\"]*"`

  if [ "$IMG_DOWNLOAD" -eq 1 ]; then
 	EXT=`echo "$FULL_URL" | grep -Eo "[a-zA-Z0-9]+\.[a-zA-Z0-9]+\?" | head -n1 | grep -Eo "\.[a-zA-Z0-9]+"`
 	if [ -z "$EXT" ]; then
 	  EXT=`echo "$FULL_URL" | grep -Eo "\.[a-zA-Z0-9]+$"`
 	fi
 	FULL_FILE=$PREFIX-$ID-full$EXT
 #	wget "$FULL_URL" -O "images/$FULL_FILE"
 	if [ ! -f "images/$FULL_FILE" ]; then
      RETRY=$MAXRETRY
      while [ $RETRY -ne 0 ]; do
        echo "3 " curl "$FULL_URL" -O "images/$FULL_FILE"
        curl "$FULL_URL" -o "images/$FULL_FILE" $CURL_OPT
        if [ $? -eq 22 ]; then
          RETRY=`expr $RETRY - 1`
          sleep 1
        else
          RETRY=0
        fi
      done
 	fi
  fi
 done
	#!/bin/sh

	# Modified by Stan Schwertly to download locally rather than to send to Posterous.
	# Github: http://github.com/Stantheman/Twitpic-Backup

	# Copyright 2010 Tim "burndive" of http://burndive.blogspot.com/
	# This software is licensed under the Creative Commons GNU GPL version 2.0 or later.
	# License informattion: http://creativecommons.org/licenses/GPL/2.0/

	# This script is a derivative of the original, obtained from here:
	# http://tuxbox.blogspot.com/2010/03/twitpic-to-posterous-export-script.html

	# Version1.2 [add retry]

	RUN_DATE=`date +%F--%H-%m-%S`

	TP_NAME=$1
	WORKING_DIR=$2

	IMG_DOWNLOAD=1
	PREFIX=twitpic-$TP_NAME
	HTML_OUT=$PREFIX-all-$RUN_DATE.html

	#CURL_OPT='-f --retry 3 --retry-delay 5 --retry-max-time 60'
	CURL_OPT='--retry 3 --retry-delay 5 --retry-max-time 60'

	# Checks the user-supplied arguments
	if [ -z "$TP_NAME" ]; then
	echo "You must supply a TP_NAME."
	exit
	fi

	if [ ! -d "$WORKING_DIR" ]; then
	echo "You must supply a WORKING_DIR."
	exit
	fi

	cd $WORKING_DIR

	# Checks for the directories it needs
	if [ ! -d "images" ]; then
	mkdir images;
	fi

	if [ ! -d "html" ]; then
	mkdir html;
	fi

	if [ ! -d "logs" ]; then
	mkdir logs;
	fi

	PAGE=0
	MAXRETRY=10
	RETRY=0

	LAST=`curl http://twitpic.com/photos/${TP_NAME} \
	\| grep "<a href=.*>Last<" \
	\| sed "s/.\?page=\([0-9]\).*/\1/"`
	if [ -z "$LAST" ]; then
	NEXT=`curl http://twitpic.com/photos/${TP_NAME} \
	\| grep "<a href=.*>Next<" \
	\| sed "s/.\?page=\([0-9]\).*/\1/"`
	if [ -z "$NEXT" ]; then
	PAGE=1
	else
	PAGE=$NEXT
	fi
	else
	PAGE=$LAST
	fi

	while [ $PAGE -ne 0 ]; do
	echo PAGE: $PAGE
	FILENAME="html/$PREFIX-page-$PAGE.html"
	echo "FILENAME=" $FILENAME
	echo "0 curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME"
	if [ ! -f "$FILENAME" ]; then
	echo "0"
	# wget http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME
	echo "1 ${TP_NAME}?page=$PAGE -O $FILENAME"
	curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -o $FILENAME $CURL_OPT
	if [ $? -eq 22 -a $RETRY -le $MAXRETRY ]; then
	RETRY=`expr $RETRY + 1`
	sleep 1
	else
	RETRY=0
	PAGE=`expr $PAGE - 1`
	fi
	else
	RETRY=0
	PAGE=`expr $PAGE - 1`
	fi
	done

	ALL_IDS=`cat html/$PREFIX-page-* \| grep -Eo "<a href=\"/[a-zA-Z0-9]+\">" \| grep -Eo "/[a-zA-Z0-9]+" \| grep -Eo "[a-zA-Z0-9]+" \| grep -v "sopapipa" \| sort -r \| uniq \| xargs`

	COUNT=0
	LOG_FILE=logs/$PREFIX-log-$RUN_DATE.txt

	echo $ALL_IDS \| tee -a $LOG_FILE

	for ID in $ALL_IDS; do
	COUNT=`expr $COUNT + 1`
	echo $ID: $COUNT \| tee -a $LOG_FILE

	echo "Processing $ID..."
	FULL_HTML="html/$PREFIX-$ID-full.html"
	# wget http://twitpic.com/$ID/full -O $FULL_HTML
	if [ ! -f "$FULL_HTML" ]; then
	RETRY=$MAXRETRY
	while [ $RETRY -ne 0 ]; do
	echo "2 " curl http://twitpic.com/$ID/full -O $FULL_HTML
	curl http://twitpic.com/$ID/full -o $FULL_HTML $CURL_OPT
	if [ $? -eq 22 ]; then
	RETRY=`expr $RETRY - 1`
	sleep 1
	else
	RETRY=0
	fi
	done
	fi

	FULL_URL=`grep "<img src" $FULL_HTML \| grep -Eo "src=\"[^\"]\"" \| grep -Eo "https://[^\"]"`

	if [ "$IMG_DOWNLOAD" -eq 1 ]; then
	EXT=`echo "$FULL_URL" \| grep -Eo "[a-zA-Z0-9]+\.[a-zA-Z0-9]+\?" \| head -n1 \| grep -Eo "\.[a-zA-Z0-9]+"`
	if [ -z "$EXT" ]; then
	EXT=`echo "$FULL_URL" \| grep -Eo "\.[a-zA-Z0-9]+$"`
	fi
	FULL_FILE=$PREFIX-$ID-full$EXT
	# wget "$FULL_URL" -O "images/$FULL_FILE"
	if [ ! -f "images/$FULL_FILE" ]; then
	RETRY=$MAXRETRY
	while [ $RETRY -ne 0 ]; do
	echo "3 " curl "$FULL_URL" -O "images/$FULL_FILE"
	curl "$FULL_URL" -o "images/$FULL_FILE" $CURL_OPT
	if [ $? -eq 22 ]; then
	RETRY=`expr $RETRY - 1`
	sleep 1
	else
	RETRY=0
	fi
	done
	fi
	fi
	done