yoki · December 10, 2022 05:17
diff --git a/find_replace.py b/find_replace.py
 s.index(s2, i, j) #Index of first occurrence of s2 in s after index i and before index j
 s.find(s2) #Find and return lowest index of s2 in s
 s.index(s2) #Return lowest index of s2 in s (but raise ValueError if not found)
 s.replace(s2, s3) #Replace s2 with s3 in s
 s.replace(s2, s3, count) #Replace s2 with s3 in s at most count times
 s.rfind(s2) #Return highest index of s2 in s
 s.rindex(s2) #Return highest index of s2 in s (raise ValueError if not found)

 #===================================================
 #Regexp
 #===================================================

 #-----------------------
 ## match
 #-----------------------
 str = 'an example word:cat!!'
 match = re.search(r'word:(\w\w\w)', str)
 if match:                      
  print('found', match.group()) ## 'found word:cat'
  print('matched part is ', match.group(1)) ## 'matched part is cat'
 else:
  print('did not find')
  
 #-----------------------
 ## search location
 #-----------------------
 match = re.search(r'iii', 'piiig') #=>  found, match.group() == "iii"
 match = re.search(r'igs', 'piiig') #=>  not found, match == None


 #-----------------------
 # Find all
 #-----------------------
 ## Suppose we have a text with many email addresses
 str = 'purple [email protected], blah monkey [email protected] blah dishwasher'

 ## Here re.findall() returns a list of all the found email strings
 emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['[email protected]', '[email protected]']
 for email in emails:
  # do something with each found email string
  print email

 #-----------------------
 # replace
 #-----------------------

 str = 'purple [email protected], blah monkey [email protected] blah dishwasher'
 ## re.sub(pat, replacement, str) -- returns new string with all replacements,
 ## \1 is group(1), \2 group(2) in the replacement
 print re.sub(r'([\w\.-]+)@([\w\.-]+)', r'\[email protected]', str)
 ## purple [email protected], blah monkey [email protected] blah dishwasher
diff --git a/string.py b/string.py
 #############
 # concat
 ############
 # Array Concat
 parts = ['Is', 'Chicago', 'Not', 'Chicago?']
 ' '.join(parts) #=> 'Is Chicago Not Chicago?'

 # for print function
 print(a + ':' + b + ':' + c)       # Ugly
 print(':'.join([a, b, c]))         # Still ugly
 print(a, b, c, sep=':')            # Better


 #############
 # stripping
 ############
 s = '   hello world  \n'
 s.strip() #=> 'hello world'

 t = '-----hello====='
 t.lstrip('-') #=> 'hello====='

 ## Sanitizing
 # http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_discussion_31


 #############
 # literals
 #############
 """select * from yuho_text
 where element_id = 'BusinessRisksTextBlock'
 and date > '2015-00-00'
 and doc_name = '有価証券報告書'"""
	s.index(s2, i, j) #Index of first occurrence of s2 in s after index i and before index j
	s.find(s2) #Find and return lowest index of s2 in s
	s.index(s2) #Return lowest index of s2 in s (but raise ValueError if not found)
	s.replace(s2, s3) #Replace s2 with s3 in s
	s.replace(s2, s3, count) #Replace s2 with s3 in s at most count times
	s.rfind(s2) #Return highest index of s2 in s
	s.rindex(s2) #Return highest index of s2 in s (raise ValueError if not found)

	#===================================================
	#Regexp
	#===================================================

	#-----------------------
	## match
	#-----------------------
	str = 'an example word:cat!!'
	match = re.search(r'word:(\w\w\w)', str)
	if match:
	print('found', match.group()) ## 'found word:cat'
	print('matched part is ', match.group(1)) ## 'matched part is cat'
	else:
	print('did not find')

	#-----------------------
	## search location
	#-----------------------
	match = re.search(r'iii', 'piiig') #=> found, match.group() == "iii"
	match = re.search(r'igs', 'piiig') #=> not found, match == None


	#-----------------------
	# Find all
	#-----------------------
	## Suppose we have a text with many email addresses
	str = 'purple [email protected], blah monkey [email protected] blah dishwasher'

	## Here re.findall() returns a list of all the found email strings
	emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['[email protected]', '[email protected]']
	for email in emails:
	# do something with each found email string
	print email

	#-----------------------
	# replace
	#-----------------------

	str = 'purple [email protected], blah monkey [email protected] blah dishwasher'
	## re.sub(pat, replacement, str) -- returns new string with all replacements,
	## \1 is group(1), \2 group(2) in the replacement
	print re.sub(r'([\w\.-]+)@([\w\.-]+)', r'\[email protected]', str)
	## purple [email protected], blah monkey [email protected] blah dishwasher
	#############
	# concat
	############
	# Array Concat
	parts = ['Is', 'Chicago', 'Not', 'Chicago?']
	' '.join(parts) #=> 'Is Chicago Not Chicago?'

	# for print function
	print(a + ':' + b + ':' + c) # Ugly
	print(':'.join([a, b, c])) # Still ugly
	print(a, b, c, sep=':') # Better


	#############
	# stripping
	############
	s = ' hello world \n'
	s.strip() #=> 'hello world'

	t = '-----hello====='
	t.lstrip('-') #=> 'hello====='

	## Sanitizing
	# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_discussion_31


	#############
	# literals
	#############
	"""select * from yuho_text
	where element_id = 'BusinessRisksTextBlock'
	and date > '2015-00-00'
	and doc_name = '有価証券報告書'"""