bmwcmw · August 21, 2019 06:33
diff --git a/testdg.java b/testdg.java
 // This is the principal idea for the question and some thoughts that I didn't have time to explain.

 // The simple version - O(CNM) where C number of queries, N number of log lines, M avg length of log line ==> O(NM) ?
 static Set<String> findRelativeTags(Iterator<String> logStream, String[] queries) {
    if (queries == null || queries.length == 0) {
        return Collections.emptySet();
    }
    Set<String> relativeTags = new HashSet<>();
    while (logStream.hasNext()) {
        Set<String> logLineTagSet = splitTagSet(logStream.next());
        int matchingCount = 0;
        for (String query: queries) {
            if (logLineTagSet.contains(query)) {
                matchingCount++;
            }
        }
        // If current's log line's tags matched all tags in query
        if (matchingCount == queries.length) {
            relativeTags.addAll(logLineTagSet);
        }
    }
    
    // Remove query tags one time at the end to avoid repetition
    for (String query: queries) {
        relativeTags.remove(query);
    }
    return relativeTags;
 }

 /**
 * Parse log line's tag section into a set
 * O(M length of logLine)
 */
 static Set<String> splitTagSet(String logLine) {
    if (logLine == null || logLine.isEmpty()) {
        return Collections.emptySet();
    }
    String[] split = logLine.split("\\|"); // can be optimized using char array index
    String tagStr = split[split.length - 1];
    String[] tags = tagStr.split(",");
    return new HashSet<>(Arrays.asList(tags));
 }

 // I felt strange to see the API findRelativeTags(log, queries), why the logs are read every time when we query ?
 // Then I thought it would be more logical to have preprocessing & indexing phrase for tags. 
 // I began talking about some kind of Map-based preprocessing & indexing and we ran out of time. But it seems to be out of scope ?

 // In the real life, one way could be that we can construct indexation while the system receives every inserted log line, 
 // for example build multiple indexes on all potential searching terms,
 // then process queries using that pre-built indexation (and maybe update the indexation by some given logic later).
	// This is the principal idea for the question and some thoughts that I didn't have time to explain.

	// The simple version - O(CNM) where C number of queries, N number of log lines, M avg length of log line ==> O(NM) ?
	static Set<String> findRelativeTags(Iterator<String> logStream, String[] queries) {
	if (queries == null \|\| queries.length == 0) {
	return Collections.emptySet();
	}
	Set<String> relativeTags = new HashSet<>();
	while (logStream.hasNext()) {
	Set<String> logLineTagSet = splitTagSet(logStream.next());
	int matchingCount = 0;
	for (String query: queries) {
	if (logLineTagSet.contains(query)) {
	matchingCount++;
	}
	}
	// If current's log line's tags matched all tags in query
	if (matchingCount == queries.length) {
	relativeTags.addAll(logLineTagSet);
	}
	}

	// Remove query tags one time at the end to avoid repetition
	for (String query: queries) {
	relativeTags.remove(query);
	}
	return relativeTags;
	}

	/**
	* Parse log line's tag section into a set
	* O(M length of logLine)
	*/
	static Set<String> splitTagSet(String logLine) {
	if (logLine == null \|\| logLine.isEmpty()) {
	return Collections.emptySet();
	}
	String[] split = logLine.split("\\\|"); // can be optimized using char array index
	String tagStr = split[split.length - 1];
	String[] tags = tagStr.split(",");
	return new HashSet<>(Arrays.asList(tags));
	}

	// I felt strange to see the API findRelativeTags(log, queries), why the logs are read every time when we query ?
	// Then I thought it would be more logical to have preprocessing & indexing phrase for tags.
	// I began talking about some kind of Map-based preprocessing & indexing and we ran out of time. But it seems to be out of scope ?

	// In the real life, one way could be that we can construct indexation while the system receives every inserted log line,
	// for example build multiple indexes on all potential searching terms,
	// then process queries using that pre-built indexation (and maybe update the indexation by some given logic later).