Last active
December 10, 2022 05:17
-
-
Save yoki/e6911902d0fd410806ac72af96357182 to your computer and use it in GitHub Desktop.
Python String
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
s.index(s2, i, j) #Index of first occurrence of s2 in s after index i and before index j | |
s.find(s2) #Find and return lowest index of s2 in s | |
s.index(s2) #Return lowest index of s2 in s (but raise ValueError if not found) | |
s.replace(s2, s3) #Replace s2 with s3 in s | |
s.replace(s2, s3, count) #Replace s2 with s3 in s at most count times | |
s.rfind(s2) #Return highest index of s2 in s | |
s.rindex(s2) #Return highest index of s2 in s (raise ValueError if not found) | |
#=================================================== | |
#Regexp | |
#=================================================== | |
#----------------------- | |
## match | |
#----------------------- | |
str = 'an example word:cat!!' | |
match = re.search(r'word:(\w\w\w)', str) | |
if match: | |
print('found', match.group()) ## 'found word:cat' | |
print('matched part is ', match.group(1)) ## 'matched part is cat' | |
else: | |
print('did not find') | |
#----------------------- | |
## search location | |
#----------------------- | |
match = re.search(r'iii', 'piiig') #=> found, match.group() == "iii" | |
match = re.search(r'igs', 'piiig') #=> not found, match == None | |
#----------------------- | |
# Find all | |
#----------------------- | |
## Suppose we have a text with many email addresses | |
str = 'purple [email protected], blah monkey [email protected] blah dishwasher' | |
## Here re.findall() returns a list of all the found email strings | |
emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['[email protected]', '[email protected]'] | |
for email in emails: | |
# do something with each found email string | |
print email | |
#----------------------- | |
# replace | |
#----------------------- | |
str = 'purple [email protected], blah monkey [email protected] blah dishwasher' | |
## re.sub(pat, replacement, str) -- returns new string with all replacements, | |
## \1 is group(1), \2 group(2) in the replacement | |
print re.sub(r'([\w\.-]+)@([\w\.-]+)', r'\[email protected]', str) | |
## purple [email protected], blah monkey [email protected] blah dishwasher |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############# | |
# concat | |
############ | |
# Array Concat | |
parts = ['Is', 'Chicago', 'Not', 'Chicago?'] | |
' '.join(parts) #=> 'Is Chicago Not Chicago?' | |
# for print function | |
print(a + ':' + b + ':' + c) # Ugly | |
print(':'.join([a, b, c])) # Still ugly | |
print(a, b, c, sep=':') # Better | |
############# | |
# stripping | |
############ | |
s = ' hello world \n' | |
s.strip() #=> 'hello world' | |
t = '-----hello=====' | |
t.lstrip('-') #=> 'hello=====' | |
## Sanitizing | |
# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_discussion_31 | |
############# | |
# literals | |
############# | |
"""select * from yuho_text | |
where element_id = 'BusinessRisksTextBlock' | |
and date > '2015-00-00' | |
and doc_name = '有価証券報告書'""" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment