Last active
October 31, 2021 13:17
-
-
Save andromedarabbit/efd8f6469f6c8b11fca4abcc26225be9 to your computer and use it in GitHub Desktop.
한경컨센서스에서 보고서 다운로드 받기
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -e | |
TODAY=$(date '+%Y-%m-%d') | |
DOWNLOAD_DIR="${HOME}/Downloads/${TODAY}" | |
mkdir -p "${DOWNLOAD_DIR}" | |
type qpdf &>/dev/null || brew install qpdf | |
type recode &>/dev/null || brew install recode | |
curl -H 'Accept-Charset: euc-kr' --silent "http://consensus.hankyung.com/apps.analysis/analysis.list?&pagenum=800" | iconv -f euc-kr -t utf-8 | grep report_idx | while IFS= read -r LINE; do | |
TITLE=$(echo "${LINE}" | sed -E 's/.*title=\"([^\"]*).*/\1/p' | uniq) | |
if [[ $TITLE =~ .*\&#.* ]]; then | |
TITLE="$(echo ${TITLE} | recode html..utf8)" | |
fi | |
HREF=$(echo "${LINE}" | sed -E 's/.*href=\"([^\"]*).*/\1/p' | uniq) | |
echo "TITLE=${TITLE}" | |
echo "HREF=$HREF" | |
wget -b -q -c --content-disposition --compression=none -o /tmp/wget-log -O "${DOWNLOAD_DIR}/${TITLE}" http://consensus.hankyung.com${HREF} | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment