Last active
June 11, 2019 19:36
-
-
Save Altoidnerd/1904373e0dec393f48b3f04da242d95d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| e='echo -e' | |
| or col in $cols; do | |
| # First we get the column we want to process with an SQL query | |
| # sed 1d supresses the column name | |
| $e "$(date):\tgetting column $col ... " >> $logfile | |
| column=$($sqlget "select \`$col\` from \`$db\`.\`$table\`"| sed 1d); | |
| # How can I parallelize the following three (well, 6) operations? An & after would background them, but | |
| # I need to wait until they are all finished to write the output (CSV) line to $columnfile | |
| # (1) | |
| $e "$(date):\textracting charset from \`$db\`.\`$table\` column $col" >> $logfile | |
| charset=$(echo "$column" | awk '{for(i=1;i<=NF;i++)if(!a[$i]++)printf $i}' FS=""); | |
| charset=$(echo $charset | sed 's/\"/\"\"/g'); | |
| # (2) | |
| $e "$(date):\textracting number of characters from \`$db\`.\`$table\` column $col" >> $logfile | |
| charcount_min=$($e "$column" | awk '{print length}' | awk 'BEGIN{a=1000}{if ($1<0+a) a=$1} END{print a}' ) | |
| charcount_max=$($e "$column" | awk '{print length}' | awk 'BEGIN{a= 0}{if ($1>0+a) a=$1} END{print a}' ) | |
| # (3) | |
| $e "$(date):\textracting number of periods from \`$db\`.\`$table\` column $col" >> $logfile | |
| num_period_min=$($e "$column" | sed 's/[^.]//g' | awk '{print length}' | awk 'BEGIN{a=1000}{if ($1<0+a) a=$1} END{print a}' ) | |
| num_period_max=$($e "$column" | sed 's/[^.]//g' | awk '{print length}' | awk 'BEGIN{a= 0}{if ($1>0+a) a=$1} END{print a}' ) | |
| # once (1), (2), (3) are done, we can do the following: | |
| $e "$db,$table,$col,\"$charset\",$charcount_min,$charcount_max,$num_period_min,$num_period_max" >> "$columnfile"; | |
| done; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment