Last active
October 19, 2017 03:27
-
-
Save zackster/24f04d1e18754694cd13457f81db7f2b to your computer and use it in GitHub Desktop.
For the benefit of https://codefor.cash subscribers – automatic matching of skills to online job ads
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def expand_keyword_list(keyword_list) | |
synonym_list = { | |
'rails': ['ror', 'ruby on rails', 'rails'], | |
'html': ['html5', 'html', 'html4'], | |
'css': ['css3', 'css', 'cascading style sheets', 'cascading stylesheets'], | |
'node': ['node', 'nodejs', 'node.js', 'node js'], | |
'javascript': ['javascript', 'java script', 'js'], | |
' unity ': [' unity ', 'unity3d', 'unity 3d'] | |
} | |
synonym_list.each do |k, v| | |
v = v.concat([k.to_s]) | |
if v.any?{|syn| keyword_list.include?(syn)} | |
keyword_list = keyword_list.concat(v) | |
end | |
end | |
keyword_list.uniq! | |
keyword_list.dup.each do |kw| | |
# Anything with a .?JS => the version without the .js or js at the end | |
if kw =~ /\.?js$/ | |
keyword_list = keyword_list.concat(kw.split(/\.?js/)) | |
end | |
# Anything with a space => remove the spaces, or join with dashes | |
if kw =~ /\s+\S+$/ | |
split_kw = kw.split(/\s+/) | |
keyword_list = keyword_list.append(split_kw.join('')) | |
keyword_list = keyword_list.append(split_kw.join('-')) | |
end | |
# Anything with a DB => the word without the DB | |
if kw =~ /db$/ | |
keyword_list << kw[0..-3] | |
end | |
end | |
keyword_list.uniq! | |
php_frameworks = ['laravel', 'phalcon', 'symfony', 'codeigniter', 'cakephp', | |
'zend', 'fuelphp', 'slim', 'phpixie', 'fat-free', 'aura', 'silverstripe', 'wordpress'] | |
ruby_frameworks = ['sinatra', 'rails'] | |
python_frameworks = ['growler', 'cherrypy', 'morepath', 'turbogears2', 'circuits', 'watson-framework', 'pycnic', 'webcore', 'reahl', 'django', 'flask', 'tornado', 'falcon', 'hug', 'sanic', 'aiohttp', 'pyramid', 'tensorflow', 'requests', 'simplejson', 'sql alchemy', 'scrapy', 'matplotlib', 'scikit-learn', 'scikit', 'pygame', 'arrow', 'wxpython', 'beautiful soup', 'natural language toolkit', 'nltk', 'twisted', 'pyglet', 'nose', 'peewee', 'scapy', 'numpy', 'scipy', 'ipython', 'sympy'] | |
javascript_frameworks = ['es6', 'coffeescript', 'typescript', 'elm', 'react', 'angular', 'ember', 'vue', 'backbone', 'redux', 'mobx', 'relay', 'firebase', 'graphql', 'apollo', 'falcor', 'horizon', 'meteor', 'feathersjs', 'donejs', 'mern', 'mean', 'mocha', 'jasmine', 'enzyme', 'jest', 'ava', 'aphrodite', 'webpack', 'grunt', 'gulp', 'browserify', 'bower', 'react native', 'cordova', 'phonegap', 'nativescript'] | |
java_frameworks = ['spring', 'struts', 'hibernate', 'jsf', 'java ee', 'vaadin', 'google web toolkit', 'gwt', 'grails', 'flexive', 'jspx', 'openxava', 'crawler4j', 'apache nutch', 'hk2', 'dagger', 'appfuse', 'jlisa', 'drools', 'easy rules', 'jbpm', 'jeddict', 'actframework', 'activiti', 'akka', 'android plot', 'apache accumulo', 'apache activemq', 'apache avalon', 'apache avro', 'apache axis', 'apache blur', 'apache bookkeeper', 'apache camel', 'apache cayenne', 'apache click', 'apache cocoon', 'apache commons', 'apache crunch', 'apache cxf', 'apache datafu', 'apache empire db', 'apache felix', 'apache flume', 'apache geronimo', 'apache giraph', 'apache hadoop', 'apache hbase', 'apache hive', 'apache jackrabbit', 'apache javanlp', 'apache jena', 'apache kafka', 'apache log4j', 'apache lucene', 'apache mahout', 'apache mesos', 'apache mina', 'apache oodt', 'apache oozie', 'apache opennlp', 'apache pdfbox', 'apache pig', 'apache pivot', 'apache poi', 'apache qpid', 'apache river (jini)', 'apache samza', 'apache shiro', 'apache sling', 'apache solr', 'apache spark', 'apache storm', 'apache struts', 'apache tapestry', 'apache tika', 'apache tomcat', 'apache turbine', 'apache uima', 'apache usergrid', 'apache velocity', 'apache vxquery', 'apache wink', 'apache xerces', 'apache zookeeper', 'axon', 'barracuda', 'beads', 'birt', 'bigfaceless', 'biojava', 'bluecove', 'bouncy castle cryptographic', 'cascading', 'checker', 'cogcompnlp', 'codename one', 'controlsfx', 'deeplearning4j', 'directwebremoting', 'dropwizard jersey', 'eclipselink', 'ehcache', 'ejml', 'facebook4j', 'fmj', 'frame4j', 'freemarker template', 'gcviewer', 'geoapi', 'geotools', 'glassfish', 'google gson', 'google guava', 'google guice', 'google web toolkit (gwt)', 'gstreamer', 'gxt', 'hibernate', 'hsqldb', 'ibatis', 'infinispan', 'itext', 'jackcess', 'jackson', 'java collections framework', 'java media framework', 'javassist', 'javers', 'jaxp', 'jboss seam', 'jcabi', 'jdom', 'jello', 'jersey', 'jetty', 'jfreechart', 'jidesoft', 'jmock', 'jmonkeyengine', 'joda time', 'jogamp', 'jooby', 'jppf', 'jprofiler', 'jrockit', 'jsf', 'jsonlib', 'jsoup', 'jsyn', 'jts topology', 'junit', 'liquibase', 'logback', 'lombok', 'loopj', 'lwjgl', 'mapdb', 'mockito', 'mybatis', 'nd4j', 'netty', 'neuroph', 'ninja', 'opencsv', 'oracle weblogic', 'orientdb', 'ormlite', 'pi4j', 'play', 'primefaces', 'quartz', 'quasar', 'rabbitmq', 'ratpack', 'reactor', 'resteasy', 'restfb', 'restlet', 'sax', 'scribejava', 'selenide', 'selenium', 'slf4j', 'slick2d', 'smack', 'sonarlint', 'sonarqube', 'jasper reports', 'spock', 'spring amqp', 'spring android', 'spring aop(aspect oriented programming)', 'spring batch', 'spring boot', 'spring core', 'spring data', 'spring data jdbc extensions', 'spring data mongodb', 'spring data neo4j', 'spring hateoas', 'spring integration', 'spring ldap', 'spring mvc', 'spring roo', 'spring security', 'spring session', 'spring social', 'spring xd', 'stormpath', 'stringtemplate', 'suanshu', 'testng', 'thymeleaf', 'twitter4j', 'vaadin', 'vertx', 'vraptor', 'webfirmframework', 'wildfly', 'wordcram', 'wso2', 'wso2 api manager', 'wso2 app manager', 'wso2 esb', 'wso2 governance registry', 'wso2 identity server', 'wso2 machine learner', 'wso2 message broker', 'xuggler', 'zkoss', 'prova', 'openrules', 'jruleengine', 'jess', 'accumulo', 'activemq', 'avalon', 'avro', 'axis', 'blur', 'bookkeeper', 'camel', 'cayenne', 'click', 'cocoon', 'commons', 'crunch', 'cxf', 'datafu', 'empire db', 'felix', 'flume', 'geronimo', 'giraph', 'hadoop', 'hbase', 'hive', 'jackrabbit', 'javanlp', 'jena', 'kafka', 'log4j', 'lucene', 'mahout', 'mesos', 'mina', 'oodt', 'oozie', 'opennlp', 'pdfbox', 'pig', 'pivot', 'poi', 'qpid', 'river (jini)', 'samza', 'shiro', 'sling', 'solr', 'spark', 'storm', 'struts', 'tapestry', 'tika', 'tomcat', 'turbine', 'uima', 'usergrid', 'velocity', 'vxquery', 'wink', 'xerces', 'zookeeper'] | |
css_frameworks = ['sass', 'scss', 'less', 'css modules'] | |
implied_skills_tree = { | |
['rails'] => ['html', 'css', 'javascript'], | |
['node'] => ['javascript'], | |
php_frameworks => ['php'], | |
ruby_frameworks => ['ruby'], | |
python_frameworks => ['python'], | |
javascript_frameworks => ['javascript', 'js'], | |
java_frameworks => ['java '], | |
css_frameworks => ['css'], | |
['lamp'] => ['linux', 'apache', 'mysql', 'php'], | |
['mean'] => ['mongo', 'express', 'angular', 'node'] | |
} | |
implied_skills_tree.each do |frameworks, implied_skills| | |
if frameworks.any?{|fw| keyword_list.include?(fw)} | |
keyword_list = keyword_list.concat(implied_skills) | |
end | |
end | |
keyword_list.uniq | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment