jhyland87 · August 28, 2018 19:42
diff --git a/find-ip-and-dns-records.sh b/find-ip-and-dns-records.sh


 declare -A CFG

 CFG[tmp.prefix]="ip-dns-parse"
 CFG[tmp.file.file-list]="files-to-parse.list"
 CFG[tmp.file.grep-results]="grep-results.list"
 CFG[tmp.file.pruned-grep-results]="pruned-grep-results.list"

 CFG[output.result]="./results"

 CFG[display.result.printf]="%-20s %-20s %-6s %-6s %-50s %-60s %-40s\n"
 CFG[display.processing]="%-50s ... "

 CFG[patterns.ip]='192\.168\.78\.[0-9]{1,3}|10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}'
 CFG[patterns.dns]='[a-zA-Z0-9\.-]+\.(website|wsstaging)\.com'
 CFG[patterns.ip-and-dns]="(${CFG[patterns.ip]}|${CFG[patterns.dns]})"

 CFG[throttle.limit.found-files]=""
 #CFG[patterns.processing]="%-50s ... "

 printf "${CFG[display.processing]}" "Creating temp directory"

 CFG[tmp.dir]=$(mktemp -d -q -t "${CFG[tmp.prefix]}")

 printf "Done (%s)\n" "${CFG[tmp.dir]}"

 #CFG["tmp.dir"]="ip-dns-parse"

 for cfgval in "${!CFG[@]}"; do
  if [[ "${cfgval}" =~ ^tmp\.file\. ]]; then
    if [[ -a ${CFG[tmp.dir]}/${CFG[${cfgval}]} ]]; then
      printf "Found tmp file %s, deleting it\n" "${CFG[tmp.dir]}/${CFG[${cfgval}]}"
      rm ${CFG[tmp.dir]}/${CFG[${cfgval}]}
    fi
  fi
 done



 printf "${CFG[display.processing]}" "Searching for files to parse"

 find liberty-apps/liberty-{cron,magento,misc,models,portal,reports,services,ship{,-{client,server}},sweet,ups,warehouse} \
  -type f \
  -not -iname 'README' \
  -not -iname 'fabfile.py' \
  -not -iname '*.txt' \
  -not -iname '*.log' \
  -not -iname '*.bak' \
  -not -iname '*.c' \
  -not -iname '*.html' \
  -not -iname '*.htm' \
  -not -iname '*.jar' \
  -not -iname '*.sql' \
  -not -iname '*.css' \
  -not -iname '*.less' \
  -not -iname '*.db' \
  -not -iname '.*' \
  -not -path '*/.git/*' \
  -not -path '*/.gitmodules/*' \
  -not -path '*/.svn/*' \
  -not -path '*/node_modules/*' | head -n ${CFG[throttle.limit.found-files]:-100000000} > ${CFG[tmp.dir]}/${CFG[tmp.file.file-list]}

 file_count=$(wc -l ${CFG[tmp.dir]}/${CFG[tmp.file.file-list]} | awk '{print $1}')

 printf "%s (found %s files)\n" "Done" "${file_count}"


 printf "${CFG[display.processing]}" "Parsing files... "

 totalnum_charlen=$(echo -n ${file_count} | wc -c | awk '{print $1}')
 numpad="%0${totalnum_charlen}d"

 filen="1"
 printf "Progress: %s/" "${file_count}"
 printf "${numpad}" "${filen}"

 function bksp(){
  for n in $(seq 1 $1); do
    echo -en "\b" 
  done
 }

 while read -r fle _; do
  grep -nHE "${CFG[patterns.ip-and-dns]}" "${fle}" | \
    tee -a "${CFG[tmp.dir]}/${CFG[tmp.file.grep-results]}" | \
    awk -f ./ignore-comments.awk -v trim=1 >> "${CFG[tmp.dir]}/${CFG[tmp.file.pruned-grep-results]}" 

  filen=$(($filen+1))
  bksp ${totalnum_charlen}
  printf "${numpad}" "$filen"
 done < "${CFG[tmp.dir]}/${CFG[tmp.file.file-list]}" 

 bksp $((${totalnum_charlen} * 2 + 1 + 10)) # [char length of total file count] x 2 + [1 for the /] + [10 for the Progress: ]

 grep_matches=$(wc -l "${CFG[tmp.dir]}/${CFG[tmp.file.pruned-grep-results]}" | awk '{print $1}')

 printf "%s (%s files parsed and found %s matches)\n" "Done" "${file_count}" "${grep_matches}"

 echo
 printf "${CFG[display.result.printf]}" "FILENAME" "PROJECT" "LINE" "TYPE" "VALUE" "FILEPATH" "CODE"
 # PARSE GREP RESULTS

 declare -A referenceCount

 processed=0

 # 
 while read -r match _; do
  filepath=$(echo "${match}" | cut -d: -f1)
  project=$(echo "${filepath}" | cut -d'/' -f1)
  filename=$(echo "${filepath}" | rev | cut -d'/' -f1 | rev)
  line=$(echo "${match}" | cut -d: -f2)
  code=$(echo "${match}" | cut -d: -f3-)
  matches=$(echo "${code}" | grep -Eo "${CFG[patterns.ip-and-dns]}" | tr '\n' ' ')
  matchCount=$(echo "${matches}" | wc -w | awk '{print $1}')

  processed=$((processed+1))

  if [[ ${matchCount} -eq 0 ]]; then
    continue
  fi

  # Increment the reference counter for the matches found
  for m in ${matches}; do
    if [[ -z ${referenceCount[${m}]} ]]; then
      referenceCount[${m}]=1 
    else
      referenceCount[${m}]=$((${referenceCount[${m}]}+1))
    fi

    echo "${m}" | grep -Eq "${CFG[patterns.ip]}"
    isIP=$?

    echo "${m}" | grep -Eq "${CFG[patterns.dns]}"
    isDNS=$?


    printf "${CFG[display.result.printf]}" "${filename}" "${project}" "${line}" "test" "${m}" "${filepath}" "${code}"

  done  


  #printf "\nMatch #%s\n" ${processed}
  #printf "%10s : %s\n" "File" "${file}"
  #printf "%10s : %s\n" "Line" "${line}"
  #printf "%10s : %s\n" "Code" "${code}"
  #printf "%10s : %s\n" "Match #" "${matchCount}"
  #printf "%10s : %s\n" "Matches" "${matches}"
 done < "${CFG[tmp.dir]}/${CFG[tmp.file.pruned-grep-results]}" 
 echo 
 for k in "${!referenceCount[@]}"; do
  printf "%s : %s\n" "${k}" "${referenceCount[${k}]}"
 done

 printf "${CFG[patterns.processing]}" "Deleting temporary directory"
 rm -rf "${CFG[tmp.dir]}"
 echo "Done"


	declare -A CFG

	CFG[tmp.prefix]="ip-dns-parse"
	CFG[tmp.file.file-list]="files-to-parse.list"
	CFG[tmp.file.grep-results]="grep-results.list"
	CFG[tmp.file.pruned-grep-results]="pruned-grep-results.list"

	CFG[output.result]="./results"

	CFG[display.result.printf]="%-20s %-20s %-6s %-6s %-50s %-60s %-40s\n"
	CFG[display.processing]="%-50s ... "

	CFG[patterns.ip]='192\.168\.78\.[0-9]{1,3}\|10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}'
	CFG[patterns.dns]='[a-zA-Z0-9\.-]+\.(website\|wsstaging)\.com'
	CFG[patterns.ip-and-dns]="(${CFG[patterns.ip]}\|${CFG[patterns.dns]})"

	CFG[throttle.limit.found-files]=""
	#CFG[patterns.processing]="%-50s ... "

	printf "${CFG[display.processing]}" "Creating temp directory"

	CFG[tmp.dir]=$(mktemp -d -q -t "${CFG[tmp.prefix]}")

	printf "Done (%s)\n" "${CFG[tmp.dir]}"

	#CFG["tmp.dir"]="ip-dns-parse"

	for cfgval in "${!CFG[@]}"; do
	if [[ "${cfgval}" =~ ^tmp\.file\. ]]; then
	if [[ -a ${CFG[tmp.dir]}/${CFG[${cfgval}]} ]]; then
	printf "Found tmp file %s, deleting it\n" "${CFG[tmp.dir]}/${CFG[${cfgval}]}"
	rm ${CFG[tmp.dir]}/${CFG[${cfgval}]}
	fi
	fi
	done



	printf "${CFG[display.processing]}" "Searching for files to parse"

	find liberty-apps/liberty-{cron,magento,misc,models,portal,reports,services,ship{,-{client,server}},sweet,ups,warehouse} \
	-type f \
	-not -iname 'README' \
	-not -iname 'fabfile.py' \
	-not -iname '*.txt' \
	-not -iname '*.log' \
	-not -iname '*.bak' \
	-not -iname '*.c' \
	-not -iname '*.html' \
	-not -iname '*.htm' \
	-not -iname '*.jar' \
	-not -iname '*.sql' \
	-not -iname '*.css' \
	-not -iname '*.less' \
	-not -iname '*.db' \
	-not -iname '.*' \
	-not -path '/.git/' \
	-not -path '/.gitmodules/' \
	-not -path '/.svn/' \
	-not -path '/node_modules/' \| head -n ${CFG[throttle.limit.found-files]:-100000000} > ${CFG[tmp.dir]}/${CFG[tmp.file.file-list]}

	file_count=$(wc -l ${CFG[tmp.dir]}/${CFG[tmp.file.file-list]} \| awk '{print $1}')

	printf "%s (found %s files)\n" "Done" "${file_count}"


	printf "${CFG[display.processing]}" "Parsing files... "

	totalnum_charlen=$(echo -n ${file_count} \| wc -c \| awk '{print $1}')
	numpad="%0${totalnum_charlen}d"

	filen="1"
	printf "Progress: %s/" "${file_count}"
	printf "${numpad}" "${filen}"

	function bksp(){
	for n in $(seq 1 $1); do
	echo -en "\b"
	done
	}

	while read -r fle _; do
	grep -nHE "${CFG[patterns.ip-and-dns]}" "${fle}" \| \
	tee -a "${CFG[tmp.dir]}/${CFG[tmp.file.grep-results]}" \| \
	awk -f ./ignore-comments.awk -v trim=1 >> "${CFG[tmp.dir]}/${CFG[tmp.file.pruned-grep-results]}"

	filen=$(($filen+1))
	bksp ${totalnum_charlen}
	printf "${numpad}" "$filen"
	done < "${CFG[tmp.dir]}/${CFG[tmp.file.file-list]}"

	bksp $((${totalnum_charlen} * 2 + 1 + 10)) # [char length of total file count] x 2 + [1 for the /] + [10 for the Progress: ]

	grep_matches=$(wc -l "${CFG[tmp.dir]}/${CFG[tmp.file.pruned-grep-results]}" \| awk '{print $1}')

	printf "%s (%s files parsed and found %s matches)\n" "Done" "${file_count}" "${grep_matches}"

	echo
	printf "${CFG[display.result.printf]}" "FILENAME" "PROJECT" "LINE" "TYPE" "VALUE" "FILEPATH" "CODE"
	# PARSE GREP RESULTS

	declare -A referenceCount

	processed=0

	#
	while read -r match _; do
	filepath=$(echo "${match}" \| cut -d: -f1)
	project=$(echo "${filepath}" \| cut -d'/' -f1)
	filename=$(echo "${filepath}" \| rev \| cut -d'/' -f1 \| rev)
	line=$(echo "${match}" \| cut -d: -f2)
	code=$(echo "${match}" \| cut -d: -f3-)
	matches=$(echo "${code}" \| grep -Eo "${CFG[patterns.ip-and-dns]}" \| tr '\n' ' ')
	matchCount=$(echo "${matches}" \| wc -w \| awk '{print $1}')

	processed=$((processed+1))

	if [[ ${matchCount} -eq 0 ]]; then
	continue
	fi

	# Increment the reference counter for the matches found
	for m in ${matches}; do
	if [[ -z ${referenceCount[${m}]} ]]; then
	referenceCount[${m}]=1
	else
	referenceCount[${m}]=$((${referenceCount[${m}]}+1))
	fi

	echo "${m}" \| grep -Eq "${CFG[patterns.ip]}"
	isIP=$?

	echo "${m}" \| grep -Eq "${CFG[patterns.dns]}"
	isDNS=$?


	printf "${CFG[display.result.printf]}" "${filename}" "${project}" "${line}" "test" "${m}" "${filepath}" "${code}"

	done


	#printf "\nMatch #%s\n" ${processed}
	#printf "%10s : %s\n" "File" "${file}"
	#printf "%10s : %s\n" "Line" "${line}"
	#printf "%10s : %s\n" "Code" "${code}"
	#printf "%10s : %s\n" "Match #" "${matchCount}"
	#printf "%10s : %s\n" "Matches" "${matches}"
	done < "${CFG[tmp.dir]}/${CFG[tmp.file.pruned-grep-results]}"
	echo
	for k in "${!referenceCount[@]}"; do
	printf "%s : %s\n" "${k}" "${referenceCount[${k}]}"
	done

	printf "${CFG[patterns.processing]}" "Deleting temporary directory"
	rm -rf "${CFG[tmp.dir]}"
	echo "Done"