vishnuvyas · April 3, 2020 04:29
diff --git a/ContextExtractor.java b/ContextExtractor.java
 import java.util.*;
 import java.util.stream.*;

 class ContextExtractor {

 public static class TagResult {
    public String token;
    public int start;
    public int end;

    public TagResult(String t, int s, int e) {
      this.token = t;
      this.start = s;
      this.end = e;
    }

    @Override
    public String toString() {
      StringBuilder b = new StringBuilder();
      b.append(token);
      b.append(" : ");
      b.append("{");
      b.append(start);
      b.append(",");
      b.append(end);
      b.append("}");
      return b.toString();
    }
  }

  public static String text = 
    "Patient has diabetes mellitus and shows no signs of stopping candy\n"+ "\n"
    + "Family History\n"
    + "CVA and CHF"
    + "\n"
    + "Signed by Dr Strange";

  public List<TagResult> fakeTagger(String line) {
    List<String> terms = List.of("diabetes mellitus",
    "cva");

    List<TagResult> taggedItems = new ArrayList<>();

    int lastStart = 0;
    while(lastStart < line.length()) {
      boolean foundAny = false;      
      for(String term: terms) {
        String cleanLine = line.toLowerCase().substring(lastStart);
        if(cleanLine.contains(term)) {
          int start = lastStart + cleanLine.indexOf(term);
          int end = start + term.length();
          taggedItems.add(new TagResult(term, start, end));
          foundAny = true;
          lastStart = end;
        }
      }

      if(!foundAny) {
        // this means that no terms were found on this line
        // so we are going to exit out of this loop.
        break;
      }
    }
    return taggedItems;
  }


  public static class Context {
    public List<String> pre;
    public List<String> post;
    public String token;

    public Context(List<String> pr, List<String> po, String t) {
      this.pre = pr;
      this.post = po;
      this.token = t;
    }

    @Override
    public String toString() {
      StringBuilder builder = new StringBuilder();
      builder.append("[ ");
      for(String preTok : pre) {
        builder.append(preTok);
        builder.append(", ");
      }

      builder.append("] -- ");
      builder.append("[Tok: " + token + " ] -- ");
      builder.append("[ ");
      for(String tok : post) {
        builder.append(tok);
        builder.append(", ");
      }
       builder.append("]");
      return builder.toString();
    }
  }

  public List<String> tokenize(String line) {
    ArrayList<String> a = new ArrayList<String>();
    a.addAll(Arrays.asList(line.split("\\s+")));
    return a;
  }

  public List<String> buildContext(List<String> lines,
                             String currentContext,
                             int lineNum,
                             int size,
                             int step) {
    int contextSizeRemaining = size;
    int currentLineNum = lineNum;
    List<String> contextTokens = tokenize(currentContext);
    
    // handle the base case where the current context contextTokens
    // are sufficient or this is the first line or the last line.
    if(contextTokens.size() == size) {
      return contextTokens;
    } else if(contextTokens.size() < size && lineNum <= 0 && step < 0) {
      return contextTokens;
    } else if(contextTokens.size() < size && lineNum >= lines.size() && step > 0) {
      return contextTokens;
    } else {
      // this is the case where we can't return the current context
      // directly. so we have to go to the lines nearby to get the 
      // current context. 
      lineNum += step;
      contextSizeRemaining -= contextTokens.size();

       while(contextSizeRemaining > 0 && lineNum >= 0 && lineNum <= lines.size()) {
           List<String> extraTokens = tokenize(lines.get(lineNum));
           int nToks = Math.min(extraTokens.size(),contextSizeRemaining);
           int offset = (step<0) ? (extraTokens.size()-1) : 0 ;
           for(int n = 0; n < nToks; ++n) {
             int insPos = (step<0) ? 0 : (contextTokens.size()-1);
             contextTokens.add(insPos,extraTokens.get(offset+(step*n)));
             contextSizeRemaining--;
           }
      }
      return contextTokens;
    }      
  }

  public List<Context> getContexts(String text,int left, int right) {
    
    List<Context> contexts = new ArrayList<Context>();

    // collect all non-empty lines into a an array called lines.
    ArrayList<String> lines = new ArrayList<String>();
    for(String line : text.split("\n")) {
      if(line.trim().length() > 0) {
        lines.add(line);
      }
    }

    for(int lineNum = 0; lineNum < lines.size(); ++lineNum) {
      String currentLine = lines.get(lineNum);
      for(TagResult tagResult : fakeTagger(currentLine)) {
        List<String> preContext = buildContext(lines,
          currentLine.substring(0, tagResult.start),lineNum,left,-1);

        List<String> postContext = buildContext(lines,
            currentLine.substring(tagResult.end),lineNum,right,+1);

        contexts.add(new Context(preContext,postContext,tagResult.token));
      }
    }

    return contexts;

  }

  public static void main(String[] args) {
    ContextExtractor m = new ContextExtractor();
    m.getContexts(text,5,5).forEach(System.out::println);
  }
 }
	import java.util.*;
	import java.util.stream.*;

	class ContextExtractor {

	public static class TagResult {
	public String token;
	public int start;
	public int end;

	public TagResult(String t, int s, int e) {
	this.token = t;
	this.start = s;
	this.end = e;
	}

	@Override
	public String toString() {
	StringBuilder b = new StringBuilder();
	b.append(token);
	b.append(" : ");
	b.append("{");
	b.append(start);
	b.append(",");
	b.append(end);
	b.append("}");
	return b.toString();
	}
	}

	public static String text =
	"Patient has diabetes mellitus and shows no signs of stopping candy\n"+ "\n"
	+ "Family History\n"
	+ "CVA and CHF"
	+ "\n"
	+ "Signed by Dr Strange";

	public List<TagResult> fakeTagger(String line) {
	List<String> terms = List.of("diabetes mellitus",
	"cva");

	List<TagResult> taggedItems = new ArrayList<>();

	int lastStart = 0;
	while(lastStart < line.length()) {
	boolean foundAny = false;
	for(String term: terms) {
	String cleanLine = line.toLowerCase().substring(lastStart);
	if(cleanLine.contains(term)) {
	int start = lastStart + cleanLine.indexOf(term);
	int end = start + term.length();
	taggedItems.add(new TagResult(term, start, end));
	foundAny = true;
	lastStart = end;
	}
	}

	if(!foundAny) {
	// this means that no terms were found on this line
	// so we are going to exit out of this loop.
	break;
	}
	}
	return taggedItems;
	}


	public static class Context {
	public List<String> pre;
	public List<String> post;
	public String token;

	public Context(List<String> pr, List<String> po, String t) {
	this.pre = pr;
	this.post = po;
	this.token = t;
	}

	@Override
	public String toString() {
	StringBuilder builder = new StringBuilder();
	builder.append("[ ");
	for(String preTok : pre) {
	builder.append(preTok);
	builder.append(", ");
	}

	builder.append("] -- ");
	builder.append("[Tok: " + token + " ] -- ");
	builder.append("[ ");
	for(String tok : post) {
	builder.append(tok);
	builder.append(", ");
	}
	builder.append("]");
	return builder.toString();
	}
	}

	public List<String> tokenize(String line) {
	ArrayList<String> a = new ArrayList<String>();
	a.addAll(Arrays.asList(line.split("\\s+")));
	return a;
	}

	public List<String> buildContext(List<String> lines,
	String currentContext,
	int lineNum,
	int size,
	int step) {
	int contextSizeRemaining = size;
	int currentLineNum = lineNum;
	List<String> contextTokens = tokenize(currentContext);

	// handle the base case where the current context contextTokens
	// are sufficient or this is the first line or the last line.
	if(contextTokens.size() == size) {
	return contextTokens;
	} else if(contextTokens.size() < size && lineNum <= 0 && step < 0) {
	return contextTokens;
	} else if(contextTokens.size() < size && lineNum >= lines.size() && step > 0) {
	return contextTokens;
	} else {
	// this is the case where we can't return the current context
	// directly. so we have to go to the lines nearby to get the
	// current context.
	lineNum += step;
	contextSizeRemaining -= contextTokens.size();

	while(contextSizeRemaining > 0 && lineNum >= 0 && lineNum <= lines.size()) {
	List<String> extraTokens = tokenize(lines.get(lineNum));
	int nToks = Math.min(extraTokens.size(),contextSizeRemaining);
	int offset = (step<0) ? (extraTokens.size()-1) : 0 ;
	for(int n = 0; n < nToks; ++n) {
	int insPos = (step<0) ? 0 : (contextTokens.size()-1);
	contextTokens.add(insPos,extraTokens.get(offset+(step*n)));
	contextSizeRemaining--;
	}
	}
	return contextTokens;
	}
	}

	public List<Context> getContexts(String text,int left, int right) {

	List<Context> contexts = new ArrayList<Context>();

	// collect all non-empty lines into a an array called lines.
	ArrayList<String> lines = new ArrayList<String>();
	for(String line : text.split("\n")) {
	if(line.trim().length() > 0) {
	lines.add(line);
	}
	}

	for(int lineNum = 0; lineNum < lines.size(); ++lineNum) {
	String currentLine = lines.get(lineNum);
	for(TagResult tagResult : fakeTagger(currentLine)) {
	List<String> preContext = buildContext(lines,
	currentLine.substring(0, tagResult.start),lineNum,left,-1);

	List<String> postContext = buildContext(lines,
	currentLine.substring(tagResult.end),lineNum,right,+1);

	contexts.add(new Context(preContext,postContext,tagResult.token));
	}
	}

	return contexts;

	}

	public static void main(String[] args) {
	ContextExtractor m = new ContextExtractor();
	m.getContexts(text,5,5).forEach(System.out::println);
	}
	}