iloris · November 4, 2012 16:18
diff --git a/rfc822.php b/rfc822.php
 <?php

 	#
 	# RFC 822/2822/5322 Email Parser
 	#
 	# By Cal Henderson <[email protected]>
 	#
 	# This code is dual licensed:
 	# CC Attribution-ShareAlike 2.5 - http://creativecommons.org/licenses/by-sa/2.5/
 	# GPLv3 - http://www.gnu.org/copyleft/gpl.html
 	#
 	# $Revision$
 	#

 	##################################################################################

 	function is_valid_email_address($email, $options=array()){

 		#
 		# you can pass a few different named options as a second argument,
 		# but the defaults are usually a good choice.
 		#

 		$defaults = array(
 			'allow_comments'	=> true,
 			'public_internet'	=> true, # turn this off for 'strict' mode
 		);

 		$opts = array();
 		foreach ($defaults as $k => $v) $opts[$k] = isset($options[$k]) ? $options[$k] : $v;
 		$options = $opts;



 		####################################################################################
 		#
 		# NO-WS-CTL       =       %d1-8 /         ; US-ASCII control characters
 		#                         %d11 /          ;  that do not include the
 		#                         %d12 /          ;  carriage return, line feed,
 		#                         %d14-31 /       ;  and white space characters
 		#                         %d127
 		# ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
 		# DIGIT          =  %x30-39

 		$no_ws_ctl	= "[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]";
 		$alpha		= "[\\x41-\\x5a\\x61-\\x7a]";
 		$digit		= "[\\x30-\\x39]";
 		$cr		= "\\x0d";
 		$lf		= "\\x0a";
 		$crlf		= "(?:$cr$lf)";


 		####################################################################################
 		#
 		# obs-char        =       %d0-9 / %d11 /          ; %d0-127 except CR and
 		#                         %d12 / %d14-127         ;  LF
 		# obs-text        =       *LF *CR *(obs-char *LF *CR)
 		# text            =       %d1-9 /         ; Characters excluding CR and LF
 		#                         %d11 /
 		#                         %d12 /
 		#                         %d14-127 /
 		#                         obs-text
 		# obs-qp          =       "\" (%d0-127)
 		# quoted-pair     =       ("\" text) / obs-qp

 		$obs_char	= "[\\x00-\\x09\\x0b\\x0c\\x0e-\\x7f]";
 		$obs_text	= "(?:$lf*$cr*(?:$obs_char$lf*$cr*)*)";
 		$text		= "(?:[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f]|$obs_text)";

 		#
 		# there's an issue with the definition of 'text', since 'obs_text' can
 		# be blank and that allows qp's with no character after the slash. we're
 		# treating that as bad, so this just checks we have at least one
 		# (non-CRLF) character
 		#

 		$text		= "(?:$lf*$cr*$obs_char$lf*$cr*)";
 		$obs_qp		= "(?:\\x5c[\\x00-\\x7f])";
 		$quoted_pair	= "(?:\\x5c$text|$obs_qp)";


 		####################################################################################
 		#
 		# obs-FWS         =       1*WSP *(CRLF 1*WSP)
 		# FWS             =       ([*WSP CRLF] 1*WSP) /   ; Folding white space
 		#                         obs-FWS
 		# ctext           =       NO-WS-CTL /     ; Non white space controls
 		#                         %d33-39 /       ; The rest of the US-ASCII
 		#                         %d42-91 /       ;  characters not including "(",
 		#                         %d93-126        ;  ")", or "\"
 		# ccontent        =       ctext / quoted-pair / comment
 		# comment         =       "(" *([FWS] ccontent) [FWS] ")"
 		# CFWS            =       *([FWS] comment) (([FWS] comment) / FWS)

 		#
 		# note: we translate ccontent only partially to avoid an infinite loop
 		# instead, we'll recursively strip *nested* comments before processing
 		# the input. that will leave 'plain old comments' to be matched during
 		# the main parse.
 		#

 		$wsp		= "[\\x20\\x09]";
 		$obs_fws	= "(?:$wsp+(?:$crlf$wsp+)*)";
 		$fws		= "(?:(?:(?:$wsp*$crlf)?$wsp+)|$obs_fws)";
 		$ctext		= "(?:$no_ws_ctl|[\\x21-\\x27\\x2A-\\x5b\\x5d-\\x7e])";
 		$ccontent	= "(?:$ctext|$quoted_pair)";
 		$comment	= "(?:\\x28(?:$fws?$ccontent)*$fws?\\x29)";
 		$cfws		= "(?:(?:$fws?$comment)*(?:$fws?$comment|$fws))";


 		#
 		# these are the rules for removing *nested* comments. we'll just detect
 		# outer comment and replace it with an empty comment, and recurse until
 		# we stop.
 		#

 		$outer_ccontent_dull	= "(?:$fws?$ctext|$quoted_pair)";
 		$outer_ccontent_nest	= "(?:$fws?$comment)";
 		$outer_comment		= "(?:\\x28$outer_ccontent_dull*(?:$outer_ccontent_nest$outer_ccontent_dull*)+$fws?\\x29)";


 		####################################################################################
 		#
 		# atext           =       ALPHA / DIGIT / ; Any character except controls,
 		#                         "!" / "#" /     ;  SP, and specials.
 		#                         "$" / "%" /     ;  Used for atoms
 		#                         "&" / "'" /
 		#                         "*" / "+" /
 		#                         "-" / "/" /
 		#                         "=" / "?" /
 		#                         "^" / "_" /
 		#                         "`" / "{" /
 		#                         "|" / "}" /
 		#                         "~"
 		# atom            =       [CFWS] 1*atext [CFWS]

 		$atext		= "(?:$alpha|$digit|[\\x21\\x23-\\x27\\x2a\\x2b\\x2d\\x2f\\x3d\\x3f\\x5e\\x5f\\x60\\x7b-\\x7e])";
 		$atom		= "(?:$cfws?(?:$atext)+$cfws?)";


 		####################################################################################
 		#
 		# qtext           =       NO-WS-CTL /     ; Non white space controls
 		#                         %d33 /          ; The rest of the US-ASCII
 		#                         %d35-91 /       ;  characters not including "\"
 		#                         %d93-126        ;  or the quote character
 		# qcontent        =       qtext / quoted-pair
 		# quoted-string   =       [CFWS]
 		#                         DQUOTE *([FWS] qcontent) [FWS] DQUOTE
 		#                         [CFWS]
 		# word            =       atom / quoted-string

 		$qtext		= "(?:$no_ws_ctl|[\\x21\\x23-\\x5b\\x5d-\\x7e])";
 		$qcontent	= "(?:$qtext|$quoted_pair)";
 		$quoted_string	= "(?:$cfws?\\x22(?:$fws?$qcontent)*$fws?\\x22$cfws?)";

 		#
 		# changed the '*' to a '+' to require that quoted strings are not empty
 		#

 		$quoted_string	= "(?:$cfws?\\x22(?:$fws?$qcontent)+$fws?\\x22$cfws?)";
 		$word		= "(?:$atom|$quoted_string)";


 		####################################################################################
 		#
 		# obs-local-part  =       word *("." word)
 		# obs-domain      =       atom *("." atom)

 		$obs_local_part	= "(?:$word(?:\\x2e$word)*)";
 		$obs_domain	= "(?:$atom(?:\\x2e$atom)*)";


 		####################################################################################
 		#
 		# dot-atom-text   =       1*atext *("." 1*atext)
 		# dot-atom        =       [CFWS] dot-atom-text [CFWS]

 		$dot_atom_text	= "(?:$atext+(?:\\x2e$atext+)*)";
 		$dot_atom	= "(?:$cfws?$dot_atom_text$cfws?)";


 		####################################################################################
 		#
 		# domain-literal  =       [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
 		# dcontent        =       dtext / quoted-pair
 		# dtext           =       NO-WS-CTL /     ; Non white space controls
 		# 
 		#                         %d33-90 /       ; The rest of the US-ASCII
 		#                         %d94-126        ;  characters not including "[",
 		#                                         ;  "]", or "\"

 		$dtext		= "(?:$no_ws_ctl|[\\x21-\\x5a\\x5e-\\x7e])";
 		$dcontent	= "(?:$dtext|$quoted_pair)";
 		$domain_literal	= "(?:$cfws?\\x5b(?:$fws?$dcontent)*$fws?\\x5d$cfws?)";


 		####################################################################################
 		#
 		# local-part      =       dot-atom / quoted-string / obs-local-part
 		# domain          =       dot-atom / domain-literal / obs-domain
 		# addr-spec       =       local-part "@" domain

 		$local_part	= "(($dot_atom)|($quoted_string)|($obs_local_part))";
 		$domain		= "(($dot_atom)|($domain_literal)|($obs_domain))";
 		$addr_spec	= "$local_part\\x40$domain";



 		#
 		# this was previously 256 based on RFC3696, but dominic's errata was accepted.
 		#

 		if (strlen($email) > 254) return 0;


 		#
 		# we need to strip nested comments first - we replace them with a simple comment
 		#

 		if ($options['allow_comments']){

 			$email = email_strip_comments($outer_comment, $email, "(x)");
 		}


 		#
 		# now match what's left
 		#

 		if (!preg_match("!^$addr_spec$!", $email, $m)){

 			return 0;
 		}

 		$bits = array(
 			'local'			=> isset($m[1]) ? $m[1] : '',
 			'local-atom'		=> isset($m[2]) ? $m[2] : '',
 			'local-quoted'		=> isset($m[3]) ? $m[3] : '',
 			'local-obs'		=> isset($m[4]) ? $m[4] : '',
 			'domain'		=> isset($m[5]) ? $m[5] : '',
 			'domain-atom'		=> isset($m[6]) ? $m[6] : '',
 			'domain-literal'	=> isset($m[7]) ? $m[7] : '',
 			'domain-obs'		=> isset($m[8]) ? $m[8] : '',
 		);


 		#
 		# we need to now strip comments from $bits[local] and $bits[domain],
 		# since we know they're in the right place and we want them out of the
 		# way for checking IPs, label sizes, etc
 		#

 		if ($options['allow_comments']){
 			$bits['local']	= email_strip_comments($comment, $bits['local']);
 			$bits['domain']	= email_strip_comments($comment, $bits['domain']);
 		}


 		#
 		# length limits on segments
 		#

 		if (strlen($bits['local']) > 64) return 0;
 		if (strlen($bits['domain']) > 255) return 0;


 		#
 		# restrictions on domain-literals from RFC2821 section 4.1.3
 		#
 		# RFC4291 changed the meaning of :: in IPv6 addresses - i can mean one or
 		# more zero groups (updated from 2 or more).
 		#

 		if (strlen($bits['domain-literal'])){

 			$Snum			= "(\d{1,3})";
 			$IPv4_address_literal	= "$Snum\.$Snum\.$Snum\.$Snum";

 			$IPv6_hex		= "(?:[0-9a-fA-F]{1,4})";

 			$IPv6_full		= "IPv6\:$IPv6_hex(?:\:$IPv6_hex){7}";

 			$IPv6_comp_part		= "(?:$IPv6_hex(?:\:$IPv6_hex){0,7})?";
 			$IPv6_comp		= "IPv6\:($IPv6_comp_part\:\:$IPv6_comp_part)";

 			$IPv6v4_full		= "IPv6\:$IPv6_hex(?:\:$IPv6_hex){5}\:$IPv4_address_literal";

 			$IPv6v4_comp_part	= "$IPv6_hex(?:\:$IPv6_hex){0,5}";
 			$IPv6v4_comp		= "IPv6\:((?:$IPv6v4_comp_part)?\:\:(?:$IPv6v4_comp_part\:)?)$IPv4_address_literal";


 			#
 			# IPv4 is simple
 			#

 			if (preg_match("!^\[$IPv4_address_literal\]$!", $bits['domain'], $m)){

 				if (intval($m[1]) > 255) return 0;
 				if (intval($m[2]) > 255) return 0;
 				if (intval($m[3]) > 255) return 0;
 				if (intval($m[4]) > 255) return 0;

 			}else{

 				#
 				# this should be IPv6 - a bunch of tests are needed here :)
 				#

 				while (1){

 					if (preg_match("!^\[$IPv6_full\]$!", $bits['domain'])){
 						break;
 					}

 					if (preg_match("!^\[$IPv6_comp\]$!", $bits['domain'], $m)){
 						list($a, $b) = explode('::', $m[1]);
 						$folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
 						$groups = explode(':', $folded);
 						if (count($groups) > 7) return 0;
 						break;
 					}

 					if (preg_match("!^\[$IPv6v4_full\]$!", $bits['domain'], $m)){

 						if (intval($m[1]) > 255) return 0;
 						if (intval($m[2]) > 255) return 0;
 						if (intval($m[3]) > 255) return 0;
 						if (intval($m[4]) > 255) return 0;
 						break;
 					}

 					if (preg_match("!^\[$IPv6v4_comp\]$!", $bits['domain'], $m)){
 						list($a, $b) = explode('::', $m[1]);
 						$b = substr($b, 0, -1); # remove the trailing colon before the IPv4 address
 						$folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
 						$groups = explode(':', $folded);
 						if (count($groups) > 5) return 0;
 						break;
 					}

 					return 0;
 				}
 			}			
 		}else{

 			#
 			# the domain is either dot-atom or obs-domain - either way, it's
 			# made up of simple labels and we split on dots
 			#

 			$labels = explode('.', $bits['domain']);


 			#
 			# this is allowed by both dot-atom and obs-domain, but is un-routeable on the
 			# public internet, so we'll fail it (e.g. user@localhost)
 			#

 			if ($options['public_internet']){
 				if (count($labels) == 1) return 0;
 			}


 			#
 			# checks on each label
 			#

 			foreach ($labels as $label){

 				if (strlen($label) > 63) return 0;
 				if (substr($label, 0, 1) == '-') return 0;
 				if (substr($label, -1) == '-') return 0;
 			}


 			#
 			# last label can't be all numeric
 			#

 			if ($options['public_internet']){
 				if (preg_match('!^[0-9]+$!', array_pop($labels))) return 0;
 			}
 		}


 		return 1;
 	}

 	##################################################################################

 	function email_strip_comments($comment, $email, $replace=''){

 		while (1){
 			$new = preg_replace("!$comment!", $replace, $email);
 			if (strlen($new) == strlen($email)){
 				return $email;
 			}
 			$email = $new;
 		}
 	}

 	##################################################################################
 ?>
	<?php

	#
	# RFC 822/2822/5322 Email Parser
	#
	# By Cal Henderson <[email protected]>
	#
	# This code is dual licensed:
	# CC Attribution-ShareAlike 2.5 - http://creativecommons.org/licenses/by-sa/2.5/
	# GPLv3 - http://www.gnu.org/copyleft/gpl.html
	#
	# $Revision$
	#

	##################################################################################

	function is_valid_email_address($email, $options=array()){

	#
	# you can pass a few different named options as a second argument,
	# but the defaults are usually a good choice.
	#

	$defaults = array(
	'allow_comments' => true,
	'public_internet' => true, # turn this off for 'strict' mode
	);

	$opts = array();
	foreach ($defaults as $k => $v) $opts[$k] = isset($options[$k]) ? $options[$k] : $v;
	$options = $opts;



	####################################################################################
	#
	# NO-WS-CTL = %d1-8 / ; US-ASCII control characters
	# %d11 / ; that do not include the
	# %d12 / ; carriage return, line feed,
	# %d14-31 / ; and white space characters
	# %d127
	# ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
	# DIGIT = %x30-39

	$no_ws_ctl = "[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]";
	$alpha = "[\\x41-\\x5a\\x61-\\x7a]";
	$digit = "[\\x30-\\x39]";
	$cr = "\\x0d";
	$lf = "\\x0a";
	$crlf = "(?:$cr$lf)";


	####################################################################################
	#
	# obs-char = %d0-9 / %d11 / ; %d0-127 except CR and
	# %d12 / %d14-127 ; LF
	# obs-text = LF CR (obs-char LF *CR)
	# text = %d1-9 / ; Characters excluding CR and LF
	# %d11 /
	# %d12 /
	# %d14-127 /
	# obs-text
	# obs-qp = "\" (%d0-127)
	# quoted-pair = ("\" text) / obs-qp

	$obs_char = "[\\x00-\\x09\\x0b\\x0c\\x0e-\\x7f]";
	$obs_text = "(?:$lf$cr(?:$obs_char$lf$cr)*)";
	$text = "(?:[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f]\|$obs_text)";

	#
	# there's an issue with the definition of 'text', since 'obs_text' can
	# be blank and that allows qp's with no character after the slash. we're
	# treating that as bad, so this just checks we have at least one
	# (non-CRLF) character
	#

	$text = "(?:$lf$cr$obs_char$lf$cr)";
	$obs_qp = "(?:\\x5c[\\x00-\\x7f])";
	$quoted_pair = "(?:\\x5c$text\|$obs_qp)";


	####################################################################################
	#
	# obs-FWS = 1WSP (CRLF 1*WSP)
	# FWS = ([WSP CRLF] 1WSP) / ; Folding white space
	# obs-FWS
	# ctext = NO-WS-CTL / ; Non white space controls
	# %d33-39 / ; The rest of the US-ASCII
	# %d42-91 / ; characters not including "(",
	# %d93-126 ; ")", or "\"
	# ccontent = ctext / quoted-pair / comment
	# comment = "(" *([FWS] ccontent) [FWS] ")"
	# CFWS = *([FWS] comment) (([FWS] comment) / FWS)

	#
	# note: we translate ccontent only partially to avoid an infinite loop
	# instead, we'll recursively strip nested comments before processing
	# the input. that will leave 'plain old comments' to be matched during
	# the main parse.
	#

	$wsp = "[\\x20\\x09]";
	$obs_fws = "(?:$wsp+(?:$crlf$wsp+)*)";
	$fws = "(?:(?:(?:$wsp*$crlf)?$wsp+)\|$obs_fws)";
	$ctext = "(?:$no_ws_ctl\|[\\x21-\\x27\\x2A-\\x5b\\x5d-\\x7e])";
	$ccontent = "(?:$ctext\|$quoted_pair)";
	$comment = "(?:\\x28(?:$fws?$ccontent)*$fws?\\x29)";
	$cfws = "(?:(?:$fws?$comment)*(?:$fws?$comment\|$fws))";


	#
	# these are the rules for removing nested comments. we'll just detect
	# outer comment and replace it with an empty comment, and recurse until
	# we stop.
	#

	$outer_ccontent_dull = "(?:$fws?$ctext\|$quoted_pair)";
	$outer_ccontent_nest = "(?:$fws?$comment)";
	$outer_comment = "(?:\\x28$outer_ccontent_dull(?:$outer_ccontent_nest$outer_ccontent_dull)+$fws?\\x29)";


	####################################################################################
	#
	# atext = ALPHA / DIGIT / ; Any character except controls,
	# "!" / "#" / ; SP, and specials.
	# "$" / "%" / ; Used for atoms
	# "&" / "'" /
	# "*" / "+" /
	# "-" / "/" /
	# "=" / "?" /
	# "^" / "_" /
	# "`" / "{" /
	# "\|" / "}" /
	# "~"
	# atom = [CFWS] 1*atext [CFWS]

	$atext = "(?:$alpha\|$digit\|[\\x21\\x23-\\x27\\x2a\\x2b\\x2d\\x2f\\x3d\\x3f\\x5e\\x5f\\x60\\x7b-\\x7e])";
	$atom = "(?:$cfws?(?:$atext)+$cfws?)";


	####################################################################################
	#
	# qtext = NO-WS-CTL / ; Non white space controls
	# %d33 / ; The rest of the US-ASCII
	# %d35-91 / ; characters not including "\"
	# %d93-126 ; or the quote character
	# qcontent = qtext / quoted-pair
	# quoted-string = [CFWS]
	# DQUOTE *([FWS] qcontent) [FWS] DQUOTE
	# [CFWS]
	# word = atom / quoted-string

	$qtext = "(?:$no_ws_ctl\|[\\x21\\x23-\\x5b\\x5d-\\x7e])";
	$qcontent = "(?:$qtext\|$quoted_pair)";
	$quoted_string = "(?:$cfws?\\x22(?:$fws?$qcontent)*$fws?\\x22$cfws?)";

	#
	# changed the '*' to a '+' to require that quoted strings are not empty
	#

	$quoted_string = "(?:$cfws?\\x22(?:$fws?$qcontent)+$fws?\\x22$cfws?)";
	$word = "(?:$atom\|$quoted_string)";


	####################################################################################
	#
	# obs-local-part = word *("." word)
	# obs-domain = atom *("." atom)

	$obs_local_part = "(?:$word(?:\\x2e$word)*)";
	$obs_domain = "(?:$atom(?:\\x2e$atom)*)";


	####################################################################################
	#
	# dot-atom-text = 1atext ("." 1*atext)
	# dot-atom = [CFWS] dot-atom-text [CFWS]

	$dot_atom_text = "(?:$atext+(?:\\x2e$atext+)*)";
	$dot_atom = "(?:$cfws?$dot_atom_text$cfws?)";


	####################################################################################
	#
	# domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
	# dcontent = dtext / quoted-pair
	# dtext = NO-WS-CTL / ; Non white space controls
	#
	# %d33-90 / ; The rest of the US-ASCII
	# %d94-126 ; characters not including "[",
	# ; "]", or "\"

	$dtext = "(?:$no_ws_ctl\|[\\x21-\\x5a\\x5e-\\x7e])";
	$dcontent = "(?:$dtext\|$quoted_pair)";
	$domain_literal = "(?:$cfws?\\x5b(?:$fws?$dcontent)*$fws?\\x5d$cfws?)";


	####################################################################################
	#
	# local-part = dot-atom / quoted-string / obs-local-part
	# domain = dot-atom / domain-literal / obs-domain
	# addr-spec = local-part "@" domain

	$local_part = "(($dot_atom)\|($quoted_string)\|($obs_local_part))";
	$domain = "(($dot_atom)\|($domain_literal)\|($obs_domain))";
	$addr_spec = "$local_part\\x40$domain";



	#
	# this was previously 256 based on RFC3696, but dominic's errata was accepted.
	#

	if (strlen($email) > 254) return 0;


	#
	# we need to strip nested comments first - we replace them with a simple comment
	#

	if ($options['allow_comments']){

	$email = email_strip_comments($outer_comment, $email, "(x)");
	}


	#
	# now match what's left
	#

	if (!preg_match("!^$addr_spec$!", $email, $m)){

	return 0;
	}

	$bits = array(
	'local' => isset($m[1]) ? $m[1] : '',
	'local-atom' => isset($m[2]) ? $m[2] : '',
	'local-quoted' => isset($m[3]) ? $m[3] : '',
	'local-obs' => isset($m[4]) ? $m[4] : '',
	'domain' => isset($m[5]) ? $m[5] : '',
	'domain-atom' => isset($m[6]) ? $m[6] : '',
	'domain-literal' => isset($m[7]) ? $m[7] : '',
	'domain-obs' => isset($m[8]) ? $m[8] : '',
	);


	#
	# we need to now strip comments from $bits[local] and $bits[domain],
	# since we know they're in the right place and we want them out of the
	# way for checking IPs, label sizes, etc
	#

	if ($options['allow_comments']){
	$bits['local'] = email_strip_comments($comment, $bits['local']);
	$bits['domain'] = email_strip_comments($comment, $bits['domain']);
	}


	#
	# length limits on segments
	#

	if (strlen($bits['local']) > 64) return 0;
	if (strlen($bits['domain']) > 255) return 0;


	#
	# restrictions on domain-literals from RFC2821 section 4.1.3
	#
	# RFC4291 changed the meaning of :: in IPv6 addresses - i can mean one or
	# more zero groups (updated from 2 or more).
	#

	if (strlen($bits['domain-literal'])){

	$Snum = "(\d{1,3})";
	$IPv4_address_literal = "$Snum\.$Snum\.$Snum\.$Snum";

	$IPv6_hex = "(?:[0-9a-fA-F]{1,4})";

	$IPv6_full = "IPv6\:$IPv6_hex(?:\:$IPv6_hex){7}";

	$IPv6_comp_part = "(?:$IPv6_hex(?:\:$IPv6_hex){0,7})?";
	$IPv6_comp = "IPv6\:($IPv6_comp_part\:\:$IPv6_comp_part)";

	$IPv6v4_full = "IPv6\:$IPv6_hex(?:\:$IPv6_hex){5}\:$IPv4_address_literal";

	$IPv6v4_comp_part = "$IPv6_hex(?:\:$IPv6_hex){0,5}";
	$IPv6v4_comp = "IPv6\:((?:$IPv6v4_comp_part)?\:\:(?:$IPv6v4_comp_part\:)?)$IPv4_address_literal";


	#
	# IPv4 is simple
	#

	if (preg_match("!^\[$IPv4_address_literal\]$!", $bits['domain'], $m)){

	if (intval($m[1]) > 255) return 0;
	if (intval($m[2]) > 255) return 0;
	if (intval($m[3]) > 255) return 0;
	if (intval($m[4]) > 255) return 0;

	}else{

	#
	# this should be IPv6 - a bunch of tests are needed here :)
	#

	while (1){

	if (preg_match("!^\[$IPv6_full\]$!", $bits['domain'])){
	break;
	}

	if (preg_match("!^\[$IPv6_comp\]$!", $bits['domain'], $m)){
	list($a, $b) = explode('::', $m[1]);
	$folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
	$groups = explode(':', $folded);
	if (count($groups) > 7) return 0;
	break;
	}

	if (preg_match("!^\[$IPv6v4_full\]$!", $bits['domain'], $m)){

	if (intval($m[1]) > 255) return 0;
	if (intval($m[2]) > 255) return 0;
	if (intval($m[3]) > 255) return 0;
	if (intval($m[4]) > 255) return 0;
	break;
	}

	if (preg_match("!^\[$IPv6v4_comp\]$!", $bits['domain'], $m)){
	list($a, $b) = explode('::', $m[1]);
	$b = substr($b, 0, -1); # remove the trailing colon before the IPv4 address
	$folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
	$groups = explode(':', $folded);
	if (count($groups) > 5) return 0;
	break;
	}

	return 0;
	}
	}
	}else{

	#
	# the domain is either dot-atom or obs-domain - either way, it's
	# made up of simple labels and we split on dots
	#

	$labels = explode('.', $bits['domain']);


	#
	# this is allowed by both dot-atom and obs-domain, but is un-routeable on the
	# public internet, so we'll fail it (e.g. user@localhost)
	#

	if ($options['public_internet']){
	if (count($labels) == 1) return 0;
	}


	#
	# checks on each label
	#

	foreach ($labels as $label){

	if (strlen($label) > 63) return 0;
	if (substr($label, 0, 1) == '-') return 0;
	if (substr($label, -1) == '-') return 0;
	}


	#
	# last label can't be all numeric
	#

	if ($options['public_internet']){
	if (preg_match('!^[0-9]+$!', array_pop($labels))) return 0;
	}
	}


	return 1;
	}

	##################################################################################

	function email_strip_comments($comment, $email, $replace=''){

	while (1){
	$new = preg_replace("!$comment!", $replace, $email);
	if (strlen($new) == strlen($email)){
	return $email;
	}
	$email = $new;
	}
	}

	##################################################################################
	?>