demisjohn · January 20, 2016 19:17
diff --git a/Python RegEx Notes.py b/Python RegEx Notes.py
 import re       # regex

 #   This will be used to capture a group () within a larger string, and save that group to a variable

 # Set regex pattern to match
 dcpattern = re.compile(     r'DC=[-]?(\d*\.?\d*)V?' ,    flags=(re.IGNORECASE) )
 #regex expression within 'raw' python string, to prevent interpretation/escaping (%f etc.).

 # The Regular Expression
 #	DC=[-]?(\d*\.?\d*)V?
 # searches for exact text match
 #	DC=
 # 0 or 1 instances (the '?') of the set of characters
 #	-
 # Start a group to capture via () parentheses
 #   This is how we extract the part of the string we're looking for.
 # Find any number (*) of decimals (\d), via 
 #	\d*
 # maybe (?) a period (maybe doesn't need the \ escaping?)
 #	\.?
 # And more decimals (assume this just goes away if there's no decimal point)
 # This forms the 1st 'group' to capture, since it's enclosed in ()
 #   Here we captured only the numbers.  
 # Maybe (?) has a 
 #	V
 # Ignored case in the whole expression, but I believe we could have also specified
 #	[Dd][Cc]
 # and
 #	[Vv]
 # (sets of characters that include both upper & lower case) to accomplish the same thing



 # perform the search:
 m = dcpattern.search(  f1  )      # use regex pattern to extract DC value from filename (see above for regex definition, dcpat.compile()  )
 # m will contain any 'groups' () defined in the RegEx pattern.
    if m:
        Vdc = float( m.group(1) )	# grab 1st group from RegEx & convert to float
        print 'DC value found:', m.groups(), ' --> ', Vdc, '(V)' 
 #groups() prints all captured groups

 # for example, if
 f1 = 'Iinj=1.0mA, Vdc=2.220V - 08 Oct 2013, 1130_28- Optical Spectrum.jpg'
 # after the search, we'd get:
 Vdc = 2.22




 '''Other useful RegEx tokens
 .   - any single character
 .*  - any single character, any number of times (eg. any number of characters)
 + - like *, but can't be zero characters (only one or more)

 \d - a single decimal number

 ? - may or may not have the preceeding char, eg. 
    0?  means maybe has a 0

 \s - any whitespace (tab, space, newline etc.)

 Capture number with possible decimal point:
 (\d+\.?\d*)     - One or more decimals (must include left-most 0 then, ie. ".045" won't match, only "0.045"), followed by Maybe a ".", followed by any number of (including none) decimals

 Match either or two words:
 /(?:wordone|wordtwo)/      (?:  means group but don't capture (eg. not giving the group a name)
 '''
	import re # regex

	# This will be used to capture a group () within a larger string, and save that group to a variable

	# Set regex pattern to match
	dcpattern = re.compile( r'DC=[-]?(\d\.?\d)V?' , flags=(re.IGNORECASE) )
	#regex expression within 'raw' python string, to prevent interpretation/escaping (%f etc.).

	# The Regular Expression
	# DC=[-]?(\d\.?\d)V?
	# searches for exact text match
	# DC=
	# 0 or 1 instances (the '?') of the set of characters
	# -
	# Start a group to capture via () parentheses
	# This is how we extract the part of the string we're looking for.
	# Find any number (*) of decimals (\d), via
	# \d*
	# maybe (?) a period (maybe doesn't need the \ escaping?)
	# \.?
	# And more decimals (assume this just goes away if there's no decimal point)
	# This forms the 1st 'group' to capture, since it's enclosed in ()
	# Here we captured only the numbers.
	# Maybe (?) has a
	# V
	# Ignored case in the whole expression, but I believe we could have also specified
	# [Dd][Cc]
	# and
	# [Vv]
	# (sets of characters that include both upper & lower case) to accomplish the same thing



	# perform the search:
	m = dcpattern.search( f1 ) # use regex pattern to extract DC value from filename (see above for regex definition, dcpat.compile() )
	# m will contain any 'groups' () defined in the RegEx pattern.
	if m:
	Vdc = float( m.group(1) ) # grab 1st group from RegEx & convert to float
	print 'DC value found:', m.groups(), ' --> ', Vdc, '(V)'
	#groups() prints all captured groups

	# for example, if
	f1 = 'Iinj=1.0mA, Vdc=2.220V - 08 Oct 2013, 1130_28- Optical Spectrum.jpg'
	# after the search, we'd get:
	Vdc = 2.22




	'''Other useful RegEx tokens
	. - any single character
	.* - any single character, any number of times (eg. any number of characters)
	+ - like *, but can't be zero characters (only one or more)

	\d - a single decimal number

	? - may or may not have the preceeding char, eg.
	0? means maybe has a 0

	\s - any whitespace (tab, space, newline etc.)

	Capture number with possible decimal point:
	(\d+\.?\d*) - One or more decimals (must include left-most 0 then, ie. ".045" won't match, only "0.045"), followed by Maybe a ".", followed by any number of (including none) decimals

	Match either or two words:
	/(?:wordone\|wordtwo)/ (?: means group but don't capture (eg. not giving the group a name)
	'''
No results found