Skip to content

Instantly share code, notes, and snippets.

View ishritam's full-sized avatar
🎯
Focusing

Shritam Kumar Mund ishritam

🎯
Focusing
View GitHub Profile
import os, base64, re, logging
from elasticsearch import Elasticsearch
# Log transport details (optional):
logging.basicConfig(level=logging.INFO)
# Parse the auth and host from env:
BONSAI_URL = "Copy the url from the bonsai dashbord's credentials menu"
bonsai = os.environ[BONSAI_URL]
auth = re.search('https\:\/\/(.*)\@', bonsai).group(1).split(':')
import scrapy
import re
import json
from pprint import pprint
from string import ascii_lowercase
import redis
class MedLVSpider(scrapy.Spider):
#name of the spider
name = 'mg_pv'
@ishritam
ishritam / lv.py
Last active August 17, 2020 10:42
import scrapy
import re
import json
from pprint import pprint
from string import ascii_lowercase
import redis
class MgLVSpider(scrapy.Spider):
#name of the spider
name = 'mg_lv'
test_phrase = 'It was 7 in Manchester United, he was given that number right after Beckham left.'
test_patterns=['[a-z]+', # sequences of lower case letters
'[A-Z]+', # sequences of upper case letters
'[a-zA-Z]+', # sequences of lower or upper case letters
'[A-Z][a-z]+', # one upper case letter followed by lower case letters
'[0-9]+'] # sequences of digits
multi_re_find(test_patterns,test_phrase)
text = 'Jersey number of Cristiano Ronaldo is 7, his twitter account is @Cristiano. '
patterns=[ r'\d+', # sequence of digits
r'\D+', # sequence of non-digits
r'\s+', # sequence of whitespace
r'\S+', # sequence of non-whitespace
r'\w+', # alphanumeric characters
r'\W+', # non-alphanumeric
]
def multi_re_find(patterns,phrase):
'''
Takes in a list of regex patterns
Prints a list of all matches
'''
for i in patterns:
print("Searching the phrase using the re check: {i}")
print(re.findall(pattern,phrase))
print('\n')
Character Description Example Pattern Code Example Match
\d A digit file_\d\d file_25
\D A non digit \D\D\D ABC
\w Alphanumeric \w-\w\w\w A-b_1
\W Non-alphanumeric \W\W\W\W !*+)
\s White space a\sb\sc a b c
\S Non-whitespace \S\S\S\S This
#Python Split()
txt = "Football is a game of 11 players on each team."
x = txt.split()
print(x)
Function Description
re.search() Take the pattern, scan the text, and then return a Match object.
re.findall() Returns a list containing all matches
re.split() Returns a list where the string has been split at each match
re.sub() Replaces one or many matches with a string
import re