Last active
February 5, 2022 12:53
-
-
Save iancover/e4d0d1801c280f2ac74ab26b218c9bf5 to your computer and use it in GitHub Desktop.
Text Analyzer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Thinkful | Text analyzer example solution</title> | |
<meta charset="utf-8" /> | |
<meta | |
name="description" | |
content="Exemplary solution for the text analyzer project from Thinkful's Front End Web Development course" | |
/> | |
<link | |
rel="stylesheet" | |
type="text/css" | |
href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" | |
/> | |
<!-- <link href="https://fonts.googleapis.com/css?family=Roboto" rel="stylesheet"> --> | |
<link rel="stylesheet" type="text/css" href="main.css" /> | |
</head> | |
<body> | |
<div class="container"> | |
<main> | |
<h1>Text analyzer</h1> | |
<p>Paste in text below, submit, and get some basic stats back.</p> | |
<form class="js-text-form"> | |
<div> | |
<label for="user-text">Text to analyze</label> | |
<textarea | |
cols="60" | |
rows="20" | |
id="user-text" | |
name="user-text" | |
placeholder="Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua." | |
required | |
></textarea> | |
</div> | |
<div> | |
<button type="submit">Analyze it!</button> | |
</div> | |
</form> | |
<dl class="hidden text-report js-text-report"> | |
<dt>Word count</dt> | |
<dd class="js-word-count"></dd> | |
<dt>Unique word count</dt> | |
<dd class="js-unique-word-count"></dd> | |
<dt>Average word length</dt> | |
<dd class="js-average-word-length"></dd> | |
</dl> | |
</main> | |
</div> | |
<script | |
src="https://code.jquery.com/jquery-3.1.0.min.js" | |
integrity="sha256-cCueBR6CsyA4/9szpPfrX3s49M9vUU5BgtiJj06wt/s=" | |
crossorigin="anonymous" | |
></script> | |
<script type="text/javascript" src="script.js"></script> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
* { | |
box-sizing: border-box; | |
} | |
body { | |
font-family: 'Roboto', sans-serif; | |
} | |
.container { | |
padding: 30px; | |
} | |
main { | |
max-width: 960px; | |
margin: 0 auto; | |
} | |
label { | |
display: block; | |
} | |
.hidden { | |
display: none; | |
} | |
.text-report dt:after { | |
content: ': '; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// WHY DO WE FIRST GET THE AVERAGE, WHEN WE HAVENT DEFINED TOKENS? | |
// IN THIS FUNCTION DOES IT MATTER IF WE CALL THIS ARGUMENT 'TOKENS' OR SOMETHING ELSE, SINCE WE DEFINE THAT VARIABLE | |
// LATER IN THE FUNCTION 'reportOnText()'? | |
function getAverageWordLength(tokens) { | |
// join all tokens together to create one big string | |
// then divide that total length by the number | |
// of tokens to get average | |
var totalLength = tokens.join('').length; | |
return (totalLength / tokens.length).toFixed(2); | |
} | |
// WHY THEN COUNT THE DISTINCT WORDS? | |
// Note: distinct words will count the words without counting if their repeated, so that's why its unique | |
// its not counting words that are only mentioned once. | |
function countDistinctWords(tokens) { | |
// one way to solve this problem is by using a Set object | |
// (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set) | |
// as in the commented out lines below. we haven't covered sets in this | |
// course, so we implement a different approach | |
// | |
// var distinctWords = new Set(tokens); | |
// return distinctWords.size; | |
// instead of using a set, we'll create an empty array of distinct words. | |
// then we'll iterate over our tokens and check to see if the token is already | |
// in distinct words. if it is we do nothing, if it's not we add it to our | |
// list of distinct words | |
var distinctWords = []; | |
for (var i = 0; i < tokens.length; i++) { | |
if (distinctWords.indexOf(tokens[i]) === -1) { | |
// .indexOf() searches in the array which position whatever arguments is passed is at | |
// in this case 'tokens[i]', and it will equal '-1' if it hasn't occurred, so if it hasn't | |
// it will push that 'token' (word) into the counter | |
distinctWords.push(tokens[i]); | |
} | |
} | |
return distinctWords.length; | |
} | |
// this is a naive implementation of text tokenization | |
// https://en.wikipedia.org/wiki/Tokenization_(lexical_analysis). | |
// the goal is to standardize some of the differences between | |
// words in a text by converting all to lowercase, removing punctuation | |
// etc., so that, for instance, the "there" in 'it is there.' or "it is ThErE " or 'its There' | |
// would all be converted into the same value ("there") | |
// THE TWO FUNCTIONS BELOW CLEAN TEXT | |
// WHY TOKENIZE AFTER PREVIOUS FUNCTIONS? | |
function tokenizeText(text) { | |
return text | |
.toLowerCase() | |
.match(/\b[^\s]+\b/g) | |
.sort(); | |
} | |
// .toLowerCase() will make them have all lower case letters so it can count them once even if they're capitalized | |
// need to undertand the .match() method with the '/\b[^\s]+\b/g' | |
// .sort() will sort alphabetically | |
function removeReturns(text) { | |
return text.replace(/\r?\n|\r/g, ''); | |
} | |
// the .replace() method will replace first arg = '/\r?\n|\r/g' with the second "" interpreting any special characters | |
// that the system may interpret differently based on the keyboard as spaces instead since they don't mean anything | |
// useful and could be something like hitting tab or the space bar | |
// generate and display analytics on text | |
function reportOnText(text) { | |
// tokenize our text then compute our data points | |
var tokens = tokenizeText(text); | |
var numTotalWords = tokens.length; | |
var numDistinctWords = countDistinctWords(tokens); | |
var averageWordLength = getAverageWordLength(tokens); | |
// take our data and display it in the dom | |
var textReport = $('.js-text-report'); | |
textReport.find('.js-word-count').text(numTotalWords); | |
textReport.find('.js-unique-word-count').text(numDistinctWords); | |
textReport | |
.find('.js-average-word-length') | |
.text(averageWordLength + ' characters'); | |
textReport.removeClass('hidden'); | |
} | |
// Watch for and handle form submissions | |
function watchFormSubmission() { | |
$('.js-text-form').submit(function (event) { | |
event.preventDefault(); | |
// get the text the user submitted | |
var userText = $(this).find('#user-text').val(); | |
reportOnText(removeReturns(userText)); | |
}); | |
} | |
// equivalent to `$(document).ready(function() {...})` | |
$(function () { | |
watchFormSubmission(); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// incomplete | |
function wordCounter(text) { | |
return text.split("").length; | |
} | |
function getUniques(str) { | |
var separateString = str.toLowerCase().split("").sort(); | |
for (var i=0; i<separateString.length; i++) { | |
var uniqueCount = 1; | |
// missing code here... | |
} | |
} | |
function analyzeResults(text) { | |
var totalWords = wordCounter(); | |
var uniqueWords = ''; | |
var wordResults = ''; | |
wordResults.find('.js-word-count').text(totalWords); | |
wordResults.find('.js-word-uniques').text(); | |
wordResults.find('.js-word-length').text(); | |
wordResults.removeClass('hidden'); | |
} | |
function formSubmission() { | |
$('.js-text-typed').submit(function(event) { | |
event.preventDefault(); | |
var inputText = $(this).find('#user-text').val(); | |
analyzeResults(inputText); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment