This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
load local data infile 'tfidf.csv' into table tfidf fields terminated by "|" lines terminated by '\n'(term, tag, count); | |
DELIMITER // | |
CREATE PROCEDURE tfidf_applier() | |
begin | |
declare res1 INT; | |
set res1 = (select count(distinct tag) from tfidf); | |
drop table if exists log_term_table; | |
create table log_term_table(term varchar(200), logval decimal(20,5)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Count the number of occurences of each term for each tag | |
(or in each document) | |
Arguments : | |
Input file where each input line is of the form : | |
term1;term2;term3.. , associated_tag1;associated_tag2.. | |
Output: | |
Basically a count matrix with each line of the form |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Combine all files in a directory into required number of files | |
''' | |
import csv | |
import sys | |
import os | |
import time | |
def fetch(num): |