Skip to content

Instantly share code, notes, and snippets.

@thalesmg
Last active November 24, 2019 11:06
Show Gist options
  • Save thalesmg/1973ab1f2f88898093fe94dea4f3582e to your computer and use it in GitHub Desktop.
Save thalesmg/1973ab1f2f88898093fe94dea4f3582e to your computer and use it in GitHub Desktop.
khinsider downloader
#!/usr/bin/env stack
{-
stack --resolver lts-14.15 script
--package bytestring
--package tagsoup
--package wreq
--package lens
--package async
--package optparse-applicative
--package directory
--package transformers
--package mtl
--package split
-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
import Control.Applicative (optional)
import Control.Concurrent.Async (Concurrently (..))
import Control.Concurrent.QSem (QSem, newQSem, signalQSem,
waitQSem)
import Control.Exception (bracket_)
import Control.Lens ((&), (^.))
import Control.Monad (unless)
import Control.Monad.Trans (liftIO)
import Control.Monad.Trans.Except (ExceptT (..), runExceptT)
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as BSL
import Data.ByteString.Lazy.Char8 (unpack)
import qualified Data.ByteString.Lazy.Char8 as C8
import Data.Char (isAlphaNum, isSpace)
import Data.List (isPrefixOf)
import Data.List.Split (splitOn)
import Network.Wreq (Response, defaults, get,
responseBody, responseStatus,
statusCode)
import Options.Applicative (Parser, argument, auto, execParser,
fullDesc, help, helper, info, long,
metavar, option, progDesc, short,
str, strOption, value, (<**>))
import System.Directory (createDirectoryIfMissing,
doesDirectoryExist, getPermissions,
writable)
import System.Exit (exitFailure)
import Text.HTML.TagSoup (Tag (..), parseTags, (~/=), (~==))
newtype URL = URL { getURL :: String } deriving (Eq, Show)
data Config = Config
{ outputDir :: Maybe FilePath
, songlistURL :: URL
, asyncJobs :: Int
} deriving Show
baseURL :: String
baseURL = "https://downloads.khinsider.com"
every :: Int -> [a] -> [a]
every _ [] = []
every n (x:xs) = x : every n (drop (n - 1) xs)
getOK :: URL -> IO (Either String ByteString)
getOK (URL url) = do
response <- get url
if response ^. responseStatus . statusCode == 200
then pure . Right $ response ^. responseBody
else pure . Left $ "failed to fetch " <> url
collectLinks :: [Tag ByteString] -> [(ByteString, URL)]
collectLinks ts' = go ts' []
where
go [] acc = reverse acc
go (TagOpen "a" attrs : TagText txt : TagClose "a" : ts) acc
| Just href <- lookup "href" attrs = go ts ((txt, URL $ baseURL <> unpack href):acc)
| otherwise = go ts acc
go (t:ts) acc = go ts acc
getSongURLs :: URL -> IO (Either String [(ByteString, URL)])
getSongURLs url = do
eresponse <- getOK url
pure $ eresponse >>= \body -> do
let soup = parseTags body
-- name, length in min, size in mb
songlist = every 3
. collectLinks
. takeWhile (~/= (TagClose "table" :: Tag ByteString))
. dropWhile (~/= (TagOpen "table" [("id", "songlist")] :: Tag ByteString))
$ soup
Right songlist
getDownloadSongURL :: URL -> IO (Either String URL)
getDownloadSongURL url = do
eresponse <- getOK url
pure $ eresponse >>= \body -> do
let soup = parseTags body
case filter (~== (TagOpen "audio" [] :: Tag ByteString)) soup of
(TagOpen "audio" attrs : _)
| Just src <- lookup "src" attrs -> Right (URL . unpack $ src)
_ -> Left "could not find audio tag with src"
sanitizeFilename :: ByteString -> ByteString
sanitizeFilename = C8.map clean
where
clean c = if isAlphaNum c || isSpace c
then c
else '_'
downloadMP3 :: FilePath -> (ByteString, URL) -> IO (Either String ())
downloadMP3 dest (name, url) = do
let filename = dest <> "/" <> unpack (sanitizeFilename name) <> ".mp3"
eresponse <- getOK url
traverse (BSL.writeFile filename) eresponse
withConc :: QSem -> (a -> IO b) -> a -> Concurrently b
withConc sem f a =
Concurrently $ bracket_ (waitQSem sem) (signalQSem sem) (f a)
configParser :: Parser Config
configParser
= Config
<$> optional (strOption
( long "output"
<> short 'o'
<> metavar "DIR"
<> help "output directory path"
))
<*> ( URL <$>
( argument str
( metavar "URL"
<> help "the url with songlist"
)
)
)
<*> option auto
( long "jobs"
<> short 'j'
<> help "number of parallel jobs"
<> value 4
)
die :: String -> IO a
die msg = putStrLn msg >> exitFailure
andM :: [IO Bool] -> IO Bool
andM [] = pure True
andM (t:ts) = do
b <- t
if b then andM ts else pure False
postValidateConfig :: Config -> IO ()
postValidateConfig Config{songlistURL, outputDir, asyncJobs} = do
let URL url = songlistURL
unless (baseURL `isPrefixOf` url) $ die "url should be from khinsider"
dirOk <-
case outputDir of
Nothing -> pure True
Just dir ->
andM [ doesDirectoryExist dir
, pure . writable =<< getPermissions dir
]
unless dirOk $ die "output dir should be writable"
unless (asyncJobs > 0) $ die "async jobs must be positive"
prepareDownloadDir :: Config -> IO FilePath
prepareDownloadDir Config{songlistURL = URL url, outputDir = Nothing} = do
let albumSlug = last . splitOn "/" $ url
newDir = "./" <> albumSlug
createDirectoryIfMissing False newDir
pure newDir
prepareDownloadDir Config{outputDir = Just dir} = pure dir
main :: IO ()
main = do
cfg@Config{songlistURL, asyncJobs} <- execParser opts
postValidateConfig cfg
outputDir <- prepareDownloadDir cfg
putStrLn $ "[*] output dir: " <> outputDir
sem <- newQSem asyncJobs
efilesToDownload <- runExceptT $ do
liftIO $ putStrLn "[*] getting songlist"
songURLs <- ExceptT $ getSongURLs songlistURL
liftIO $ putStrLn "[*] getting mp3 URLs"
mp3URLs <- ExceptT $ fmap sequence (runConcurrently $ traverse (withConc sem getDownloadSongURL) (map snd songURLs))
pure $ zipWith (\(name, _) mp3URL -> (name, mp3URL)) songURLs mp3URLs
case efilesToDownload of
Left msg -> die msg
Right urls -> do
putStrLn $ "[*] downloading " <> show (length urls) <> " files..."
runConcurrently $ traverse (withConc sem (downloadMP3 outputDir)) urls
putStrLn "[*] done!"
where
opts = info (configParser <**> helper)
( fullDesc
<> progDesc "downloads an entire album from KHInsider"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment