Created
July 13, 2018 16:26
-
-
Save angelf/df1d18864c1750bdd1880b7c8d99b1af to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| commit ca9602f6cb31ade1e5f39592eba3b8b2c2031da5 | |
| Author: root <ip-10-148-129-34.ec2.internal> | |
| Date: Fri Jul 13 18:22:22 2018 +0200 | |
| evitar incializar el clasificador si no se va usar | |
| diff --git a/lib/classes/clasificador.rb b/lib/classes/clasificador.rb | |
| index bcd81c3..6ed6c1e 100644 | |
| --- a/lib/classes/clasificador.rb | |
| +++ b/lib/classes/clasificador.rb | |
| @@ -42,10 +42,15 @@ class Clasificador | |
| end | |
| def initialize(pais_id) | |
| + t = Time.now | |
| + | |
| @terms_idf_score = {} | |
| @bds = {} | |
| @terms_idf_score_max = {} | |
| @stopwords = Clasificador.stopwords(pais_id) | |
| + | |
| + puts "Stopwords: #{Time.now - t}" | |
| + | |
| if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_LEG") | |
| store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_LEG") | |
| store.transaction(true) do | |
| @@ -55,6 +60,7 @@ class Clasificador | |
| end | |
| end | |
| + puts "Leg: #{Time.now - t}" | |
| if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_JUR") | |
| store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_JUR") | |
| store.transaction(true) do | |
| @@ -64,6 +70,8 @@ class Clasificador | |
| end | |
| end | |
| + puts "Jur: #{Time.now - t}" | |
| + | |
| if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_ARTS") | |
| store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_ARTS") | |
| store.transaction(true) do | |
| @@ -73,6 +81,8 @@ class Clasificador | |
| end | |
| end | |
| + puts "Arts: #{Time.now - t}" | |
| + | |
| if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_TOP_CITED") | |
| store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_TOP_CITED") | |
| store.transaction(true) do | |
| @@ -82,6 +92,7 @@ class Clasificador | |
| end | |
| end | |
| + puts "End (Top CIted): #{Time.now - t}" | |
| end | |
| def self.intersection(a,b) | |
| diff --git a/lib/vcite/profile.rb b/lib/vcite/profile.rb | |
| index cf42c51..84830d5 100644 | |
| --- a/lib/vcite/profile.rb | |
| +++ b/lib/vcite/profile.rb | |
| @@ -13,17 +13,9 @@ class Profile | |
| # @arr_entities #el listado de todas las entities que queremos buscar | |
| # @set_tokens #el listado de todos los tokens que son de nuestro interes, de mas largo a mas corto | |
| # @set_tokens_er #el listado de todas las expresiones regulares ordenadas por su caracter de comienzo | |
| - | |
| - def initialize(opts = {}) | |
| - @arr_entities = Array.new | |
| - @set_tokens = Hash.new | |
| - @set_tokens_er = Hash.new | |
| - @set_tokens_entities = Hash.new | |
| - @searchCache = SearchCache.new | |
| - @hash_destinos_diccionario = Hash.new | |
| - @platform = opts[:platform].any? ? opts[:platform] : 'VLEX-WEBAPP' | |
| - if st_type && !opts[:disable_clasificador] | |
| + def clasificador | |
| + if st_type && !@disable_clasificador | |
| @clasificador = $CLASIFICADOR_CACHE["clasificador-#{st_type}"] | |
| unless @clasificador | |
| $CLASIFICADOR_LOGGER.info("CREA CLASIFICADOR #{st_type}") | |
| @@ -31,10 +23,24 @@ class Profile | |
| $CLASIFICADOR_CACHE["clasificador-#{st_type}"] = @clasificador | |
| else | |
| $CLASIFICADOR_LOGGER.info("CLASIFICADOR CACHEADO#{st_type}") | |
| - | |
| end | |
| + @clasificador | |
| + else | |
| + nil | |
| end | |
| end | |
| + | |
| + def initialize(opts = {}) | |
| + @disable_clasificador = opts[:disable_clasificador] | |
| + @arr_entities = Array.new | |
| + @set_tokens = Hash.new | |
| + @set_tokens_er = Hash.new | |
| + @set_tokens_entities = Hash.new | |
| + @searchCache = SearchCache.new | |
| + @hash_destinos_diccionario = Hash.new | |
| + @platform = opts[:platform].any? ? opts[:platform] : 'VLEX-WEBAPP' | |
| + | |
| + end | |
| def st_type | |
| nil | |
| @@ -707,7 +713,7 @@ class Profile | |
| resuelveVids_por_hipotesi_documento(arr_matches,doc) | |
| resuelveVids_por_documento_origen(arr_matches,doc) | |
| - if @clasificador | |
| + unless @disable_clasificador | |
| uniq_matches = arr_matches.select{|m| m.arr_vids_candidatos && m.arr_vids_candidatos.size > 0}.group_by{|m| [m.arr_vids_candidatos, m.respond_to?(:arr_key_fragmentos) ? m.arr_key_fragmentos[0].to_i : nil] } | |
| all_scores = {} | |
| @@ -753,6 +759,7 @@ class Profile | |
| all_context_without_matched = matches.map{|m| m.context.to_s.gsub(m.st_cadena_original, "")}.join(" ").strip | |
| next if all_context_without_matched.length < 20 #no desambiguar por contexto si este es muy pequeño | |
| + next unless clasificador | |
| measure = Benchmark.measure{ | |
| new_candidatos, scores, raw_scores = clasificador.process(all_context_without_matched,candidatos,bd) | |
| if new_candidatos.length < candidatos.length | |
| @@ -814,7 +821,7 @@ class Profile | |
| abbrv = Documento.get_cache(k.split("-")[0].to_i).titulo.split("(")[0].split(" ").select{|x| x.first() =~ /[A-Z]/ || x.length > 5}.map{|x| x.first()}.join("") | |
| art = k.split("-")[1] | |
| "#{abbrv} #{art}(#{v})" | |
| - }.join("; ") ) if @clasificador | |
| + }.join("; ") ) if all_scores.keys.any? | |
| return arr_matches, all_scores | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment