Last active
June 10, 2020 10:28
-
-
Save hmdhk/6ef468fd4a1c8fdd2a72516def37a924 to your computer and use it in GitHub Desktop.
Detect URL pattern
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function redactUrlIds (url) { | |
var parsed = this.parseUrl(url) | |
var pathname = parsed.pathname | |
var pathParts = pathname.split('/') | |
var redactString = '{ID}' | |
// source: https://www.npmjs.com/package/uuid-regexp | |
var uuidRegex = /[a-f0-9]{8}-?[a-f0-9]{4}-?[1-5][a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/i | |
var separatorsRegex = /[-_]/g | |
var digitsRegex = /[0-9]/g | |
var lowerCaseRegex = /[a-z]/g | |
var upperCaseRegex = /[A-Z]/g | |
var redactedParts = pathParts.map(function (part, index) { | |
var numberOfSeparators = (part.match(separatorsRegex) || []).length | |
if (numberOfSeparators > 3) { | |
return redactString | |
} | |
var numberOfDigits = (part.match(digitsRegex) || []).length | |
if (part.length > 3 && numberOfDigits / part.length >= 0.3 || numberOfDigits > 3) { | |
return redactString | |
} | |
var numberofUpperCase = (part.match(upperCaseRegex) || []).length | |
var numberofLowerCase = (part.match(lowerCaseRegex) || []).length | |
var lowerCaseRate = numberofLowerCase / part.length | |
var upperCaseRate = numberofUpperCase / part.length | |
if (part.length > 5 && (upperCaseRate > 0.3 && upperCaseRate < 0.6 || lowerCaseRate > 0.3 && lowerCaseRate < 0.6)) { | |
return redactString | |
} | |
if (uuidRegex.test(part)) { | |
return redactString | |
} | |
return part | |
}) | |
var redacted = (parsed.origin ? parsed.origin + '/' : '/') + redactedParts.join('/') + (parsed.queryString ? '?{query}' : '') + parsed.hash | |
return redacted | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
describe('redactUrlIds', function () { | |
it('should work', function () { | |
var testUrls = [ | |
{ | |
url: 'https://test.com/uuid/110ec58a-a0f2-4ac4-8393-c866d813b8d1/uuid', | |
expect: 'https://test.com/uuid/{ID}/uuid' | |
}, | |
{ | |
url: 'https://stackoverflow.com/questions/46567188/uncheck-all-the-radio-button-from-another-class-pyqt4', | |
expect: 'https://stackoverflow.com/questions/{ID}/{ID}' | |
}, | |
{ | |
url: 'https://docs.google.com/document/d/1W9FtW5DnkyP3Bucng4aLvqT-BtZuX1WfV9KciwE3i9w/edit#', | |
expect: 'https://docs.google.com/document/d/{ID}/edit' | |
}, | |
{ | |
url: 'https://www.google.dk/search?q=OPTIONS+request+spec&oq=OPTIONS+request+spec&aqs=chrome..69i57.6104j0j1&sourceid=chrome&ie=UTF-8#hash', | |
expect: 'https://www.google.dk/search?{query}#hash' | |
}, | |
{ | |
url: `http://localhost:5601/app/kibana#/discover?_g=()&_a=(columns:!(_source),filters:!(('$state':(store:appState),meta:(alias:!n,disabled:!f,index:'59d38220-70b3-11e7-bc2f-f714d5049c68',key:processor.event,negate:!f,params:(query:transaction,type:phrase),type:phrase,value:transaction),query:(match:(processor.event:(query:transaction,type:phrase))))),index:'59d38220-70b3-11e7-bc2f-f714d5049c68',interval:auto,query:(language:lucene,query:''),sort:!('@timestamp',desc))`, | |
expect: `http://localhost:5601/app/kibana#/discover?_g=()&_a=(columns:!(_source),filters:!(('$state':(store:appState),meta:(alias:!n,disabled:!f,index:'59d38220-70b3-11e7-bc2f-f714d5049c68',key:processor.event,negate:!f,params:(query:transaction,type:phrase),type:phrase,value:transaction),query:(match:(processor.event:(query:transaction,type:phrase))))),index:'59d38220-70b3-11e7-bc2f-f714d5049c68',interval:auto,query:(language:lucene,query:''),sort:!('@timestamp',desc))` | |
}, | |
{ | |
url: `https://www.amazon.com/gp/product/B00I8BIC9E/ref=s9u_ri_gw_i2/136-5951111-4548800?ie=UTF8&fpl=fresh&pd_rd_i=B00I8BIC9E&pd_rd_r=V6C03W9QA81J7H4GD8Y1&pd_rd_w=kLfjh&pd_rd_wg=sWk23&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=&pf_rd_r=GBA05GKH8WG6AAJMMP12&pf_rd_t=36701&pf_rd_p=e8de777f-727a-4395-809e-49e84c65e636&pf_rd_i=desktop`, | |
expect: 'https://www.amazon.com/gp/product/{ID}/ref=s9u_ri_gw_i2/{ID}?{query}' | |
}, | |
{ | |
url: 'https://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html', | |
expect: 'https://www.w3.org/Protocols/{ID}/{ID}' | |
}, | |
{ | |
url: 'https://www.dba.dk/andet-motiv-blomster-have/id-1037936287/', | |
expect: 'https://www.dba.dk/andet-motiv-blomster-have/{ID}/' | |
}, | |
{ | |
url: 'https://www.youtube.com/watch?v=bAb8KIhgVAI', | |
expect: 'https://www.youtube.com/watch?{query}' | |
}, | |
{ | |
url: 'https://www.instagram.com/p/BZ5wx5aj1a7/', | |
expect: 'https://www.instagram.com/p/{ID}/' | |
}, | |
{ | |
url: 'https://www.liveauctioneers.com/item/55862255_pair-of-ruby-red-sgrafitto-open-face-double-gourd-vases', | |
expect: 'https://www.liveauctioneers.com/item/{ID}' | |
}, | |
{ | |
url: 'https://medium.com/the-atlantic/google-and-facebook-have-failed-us-61b526beb817', | |
expect: 'https://medium.com/the-atlantic/{ID}' | |
}, | |
{ | |
url: '/the-atlantic/google-and-facebook-have-failed-us-61b526beb817', | |
expect: '/the-atlantic/{ID}' | |
}, | |
{ | |
url: 'https://www.elastic.co/v1/test/doc/ERkrFV8BbVhEBroBxP_R', | |
expect: 'https://www.elastic.co/v1/test/doc/{ID}' | |
} | |
] | |
var testCount = 0 | |
testUrls.forEach(function (testUrl) { | |
var redacted = utils.redactUrlIds(testUrl.url) | |
expect(redacted).toBe(testUrl.expect) | |
testCount++ | |
}) | |
expect(testCount).toBe(testUrls.length) | |
}) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment