Skip to content

Instantly share code, notes, and snippets.

@angelf
Created July 13, 2018 16:26
Show Gist options
  • Select an option

  • Save angelf/df1d18864c1750bdd1880b7c8d99b1af to your computer and use it in GitHub Desktop.

Select an option

Save angelf/df1d18864c1750bdd1880b7c8d99b1af to your computer and use it in GitHub Desktop.
commit ca9602f6cb31ade1e5f39592eba3b8b2c2031da5
Author: root <ip-10-148-129-34.ec2.internal>
Date: Fri Jul 13 18:22:22 2018 +0200
evitar incializar el clasificador si no se va usar
diff --git a/lib/classes/clasificador.rb b/lib/classes/clasificador.rb
index bcd81c3..6ed6c1e 100644
--- a/lib/classes/clasificador.rb
+++ b/lib/classes/clasificador.rb
@@ -42,10 +42,15 @@ class Clasificador
end
def initialize(pais_id)
+ t = Time.now
+
@terms_idf_score = {}
@bds = {}
@terms_idf_score_max = {}
@stopwords = Clasificador.stopwords(pais_id)
+
+ puts "Stopwords: #{Time.now - t}"
+
if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_LEG")
store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_LEG")
store.transaction(true) do
@@ -55,6 +60,7 @@ class Clasificador
end
end
+ puts "Leg: #{Time.now - t}"
if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_JUR")
store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_JUR")
store.transaction(true) do
@@ -64,6 +70,8 @@ class Clasificador
end
end
+ puts "Jur: #{Time.now - t}"
+
if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_ARTS")
store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_SAMPLE_ARTS")
store.transaction(true) do
@@ -73,6 +81,8 @@ class Clasificador
end
end
+ puts "Arts: #{Time.now - t}"
+
if File.exist?("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_TOP_CITED")
store = PStore.new("#{RAILS_ROOT}/lib/vcite/pstore/clasification/#{pais_id}_TOP_CITED")
store.transaction(true) do
@@ -82,6 +92,7 @@ class Clasificador
end
end
+ puts "End (Top CIted): #{Time.now - t}"
end
def self.intersection(a,b)
diff --git a/lib/vcite/profile.rb b/lib/vcite/profile.rb
index cf42c51..84830d5 100644
--- a/lib/vcite/profile.rb
+++ b/lib/vcite/profile.rb
@@ -13,17 +13,9 @@ class Profile
# @arr_entities #el listado de todas las entities que queremos buscar
# @set_tokens #el listado de todos los tokens que son de nuestro interes, de mas largo a mas corto
# @set_tokens_er #el listado de todas las expresiones regulares ordenadas por su caracter de comienzo
-
- def initialize(opts = {})
- @arr_entities = Array.new
- @set_tokens = Hash.new
- @set_tokens_er = Hash.new
- @set_tokens_entities = Hash.new
- @searchCache = SearchCache.new
- @hash_destinos_diccionario = Hash.new
- @platform = opts[:platform].any? ? opts[:platform] : 'VLEX-WEBAPP'
- if st_type && !opts[:disable_clasificador]
+ def clasificador
+ if st_type && !@disable_clasificador
@clasificador = $CLASIFICADOR_CACHE["clasificador-#{st_type}"]
unless @clasificador
$CLASIFICADOR_LOGGER.info("CREA CLASIFICADOR #{st_type}")
@@ -31,10 +23,24 @@ class Profile
$CLASIFICADOR_CACHE["clasificador-#{st_type}"] = @clasificador
else
$CLASIFICADOR_LOGGER.info("CLASIFICADOR CACHEADO#{st_type}")
-
end
+ @clasificador
+ else
+ nil
end
end
+
+ def initialize(opts = {})
+ @disable_clasificador = opts[:disable_clasificador]
+ @arr_entities = Array.new
+ @set_tokens = Hash.new
+ @set_tokens_er = Hash.new
+ @set_tokens_entities = Hash.new
+ @searchCache = SearchCache.new
+ @hash_destinos_diccionario = Hash.new
+ @platform = opts[:platform].any? ? opts[:platform] : 'VLEX-WEBAPP'
+
+ end
def st_type
nil
@@ -707,7 +713,7 @@ class Profile
resuelveVids_por_hipotesi_documento(arr_matches,doc)
resuelveVids_por_documento_origen(arr_matches,doc)
- if @clasificador
+ unless @disable_clasificador
uniq_matches = arr_matches.select{|m| m.arr_vids_candidatos && m.arr_vids_candidatos.size > 0}.group_by{|m| [m.arr_vids_candidatos, m.respond_to?(:arr_key_fragmentos) ? m.arr_key_fragmentos[0].to_i : nil] }
all_scores = {}
@@ -753,6 +759,7 @@ class Profile
all_context_without_matched = matches.map{|m| m.context.to_s.gsub(m.st_cadena_original, "")}.join(" ").strip
next if all_context_without_matched.length < 20 #no desambiguar por contexto si este es muy pequeño
+ next unless clasificador
measure = Benchmark.measure{
new_candidatos, scores, raw_scores = clasificador.process(all_context_without_matched,candidatos,bd)
if new_candidatos.length < candidatos.length
@@ -814,7 +821,7 @@ class Profile
abbrv = Documento.get_cache(k.split("-")[0].to_i).titulo.split("(")[0].split(" ").select{|x| x.first() =~ /[A-Z]/ || x.length > 5}.map{|x| x.first()}.join("")
art = k.split("-")[1]
"#{abbrv} #{art}(#{v})"
- }.join("; ") ) if @clasificador
+ }.join("; ") ) if all_scores.keys.any?
return arr_matches, all_scores
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment