Skip to content

Instantly share code, notes, and snippets.

@matael
Created March 1, 2015 17:13
Show Gist options
  • Save matael/259ab9a435ac35aabb3e to your computer and use it in GitHub Desktop.
Save matael/259ab9a435ac35aabb3e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:5b4d434866d32c721d4cc0943f1a59bccbc1de2609d4e2cd1162b860c87f8856"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from bs4 import BeautifulSoup\n",
"from requests import get"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"BASE_URL = 'http://www.episodesdecodelyoko.com'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def retrieve_ids(saison):\n",
" url = BASE_URL + '/' + 'saison.php?saison=' + str(saison)\n",
" soup = BeautifulSoup(get(url).content)\n",
" return map(\n",
" lambda _: re.sub(r'.*id=(\\d+).*', r'\\1', _),\n",
" filter(\n",
" lambda _: _.find('lecteur=li')>-1,\n",
" filter(None,\n",
" map(lambda _:_.get('href'),\n",
" soup.findAll('a')\n",
" )\n",
" )\n",
" )\n",
" )"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 169
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_mp4_url(idx):\n",
" soup = BeautifulSoup(get(BASE_URL+'/episode.php?id='+str(idx)+'&lecteur=li').content)\n",
" return list(map(lambda _: re.sub(r\"file: '([^']+)'.*\", r'\\1', _),\n",
" filter(lambda _: _.startswith('file:'),\n",
" map(lambda _: _.strip(),\n",
" list(filter(lambda _: _.find('jwplayer(')>-1,\n",
" map(lambda _: _.text, soup.find_all('script'))\n",
" )\n",
" )[0].splitlines()\n",
" )\n",
" )\n",
" )\n",
" )[0]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 191
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"urls = []\n",
"for s in range(5):\n",
" for idx in retrieve_ids(s):\n",
" urls.append(get_mp4_url(idx))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 193
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with open('url_code_lyoko.txt', 'w') as f:\n",
" for u in urls: f.write(u+'\\n')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 194
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment