Created
May 16, 2018 13:18
-
-
Save guessi/8b5641cc7753c5808cf6211c32320bda to your computer and use it in GitHub Desktop.
Simple Helper Script for Extracting LINE Add Friends Links from Given URLs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import re | |
from bs4 import BeautifulSoup | |
from requests import get | |
from sys import stdin | |
scanned_urls = [] | |
def loop_search(match=None, depth=0): | |
for url in list(set([i.attrs["href"] for i in match])): | |
search = re.compile(r'https?:\/\/line.me').search(url) | |
if search == None: | |
extract_lineat(url, depth) | |
def extract_lineat(url, depth=0): | |
if url in scanned_urls: | |
return | |
else: | |
scanned_urls.append(url) | |
print('==> processing: {0}'.format(url.strip())) | |
soup = BeautifulSoup(get(url.strip()).text, 'html.parser') | |
# pattern for the line@ links | |
m1 = soup.select('a["href^=line://ti/p/"]') | |
m2 = soup.select('a[href^="https://line.me/R/ti/p/"]') | |
m3 = soup.select('a[href^="http://line.me/R/ti/p/"]') | |
[print(i.rsplit('/', 1)[1].replace('%40', '@')) | |
for i in list(set([i.attrs["href"] | |
for i in (m1 + m2 + m3)]))] | |
# find all normal urls | |
http_urls = soup.select('a[href^="http"]') | |
# search with finite loop | |
depth += 1 | |
if depth < 3: | |
loop_search(http_urls, depth) | |
for url in stdin.readlines(): | |
extract_lineat(url, 0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment