Skip to content

Instantly share code, notes, and snippets.

View therohk's full-sized avatar

Rohit Kulkarni therohk

View GitHub Profile
@therohk
therohk / opted-pos-meaning.md
Last active October 13, 2024 15:16
OPTED (Online Plain Text English Dictionary) word type to Part of Speech mapping
opted_type desc
n. noun
a. adjective
v. t. verb transitive
v. i. verb intransitive
adv. adverb
pl. noun plural
n. pl. noun plural
superl. adjective superlative
@therohk
therohk / elements.json
Created April 10, 2022 11:07
known chemical elements and their physical attributes
[{"atomic":"1","symbol":"H","weight":"1.008","electronstring":"1s1","series":"Nonmetal","melt":"14.01","boil":"20.28","electroneg":"2.20","valence":"1","oxidation":"-1c,1c","radius":{"calculated":"53","empirical":"25","covalent":"37","vanderwaals":"120"},"discover":"1766","density":{"stp":"0.0899"},"affinity":"72.8","conductivity":{"thermal":"0.1805"},"abundance":{"universe":"75","solar":"75","meteor":"2.4","crust":"0.15","ocean":"11","human":"10"},"heat":{"specific":"14300","vaporization":"0.452","fusion":"0.558"},"ionize":{"1":"1312.0"},"expandedconfig":"1s1","electrons":[1],"quantum":{"l":0,"m":0,"n":1},"isotopes":7},
{"atomic":"2","symbol":"He","weight":"4.002602","electronstring":"1s2","series":"Noble","boil":"4.22","valence":"0","radius":{"calculated":"31","covalent":"32","vanderwaals":"140"},"discover":"1895","density":{"stp":"0.1785"},"affinity":"0","conductivity":{"thermal":"0.1513"},"abundance":{"universe":"23","solar":"23","crust":"5.5e-7","ocean":"7.2e-10"},"heat":{"specific":"5193.1","vaporizatio
@therohk
therohk / TreeOfLifeEval.java
Last active May 29, 2021 09:53
Code for conversion of Tree of Life from xml source to csv dataset using jsoup
public static void main(String[] args) {
DataSet linkSet = new DataSet();
linkSet.getHeaders().setHeaders("source_node_id", "target_node_id");
DataSet nodeSet = new DataSet();
nodeSet.getHeaders().setHeaders("node_id", "node_name", "child_nodes", "leaf_node", "tolorg_link", "extinct", "confidence", "phylesis");
try {
Connection connection = HttpConnection.connect("http://tolweb.org/");
@therohk
therohk / versor-1.8.1.js
Created December 30, 2020 14:14
Functions for interacting with d3.js globe
// Version 1.8.1 Copyright 2020 Rohit Kulkarni
(function(global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global.versor = factory());
}(this, (function() {'use strict';
var acos = Math.acos,
asin = Math.asin,
atan2 = Math.atan2,
@therohk
therohk / BaseCounterGist.java
Last active August 5, 2021 17:29
Counter for any base representation with provided charset
package com.therohk.gist;
import java.math.BigInteger;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public final class BaseCounterGist implements Iterator<String> {
//-------------------------------------------------------------------------
private static final String charset26 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
@therohk
therohk / india-city-locations.json
Created July 16, 2020 13:34
india-news-headlines~headline_category~city\.(\w+) doi.org/10.7910/DVN/DPQMQH
[{"latitude": 23.84, "longitude": 91.27, "city":"agartala" },
{ "latitude": 27.19, "longitude": 78.01, "city":"agra" },
{ "latitude": 23.03, "longitude": 72.58, "city":"ahmedabad" },
{ "latitude": 26.45, "longitude": 74.64, "city":"ajmer" },
{ "latitude": 25.45, "longitude": 81.84, "city":"allahbad" },
{ "latitude": 16.57, "longitude": 80.35, "city":"amaravati" },
{ "latitude": 31.64, "longitude": 74.87, "city":"amritsar" },
{ "latitude": 24.75, "longitude": 84.37, "city":"aurangabad" },
{ "latitude": 28.36, "longitude": 79.41, "city":"bareilly" },
{ "latitude": 12.97, "longitude": 77.56, "city":"bengaluru" },
plot_slice_freq = function(dfin, start, stend, include=c(), stem_flag=TRUE, floor_unit="halfyear", plot_name="_") {
# ---- publish-date-ungram-hertz
ungram_hz_inc <- dfin %>%
subset(select=c(ungram,publish_date)) %>%
mutate(Date = as.Date(substr(publish_date,1,8), format="%Y%m%d")) %>%
filter(Date>=start & Date<=stend)
if(stem_flag) {
ungram_hz_inc$ungram <- wordStem(ungram_hz_inc$ungram, language="english")
include <- wordStem(include, language="english")
}
plot_ungram_tfidf = function(dfin, start, stend, slice_unit="year", nmin=2, ntop=10, ncol=3) {
# ---- publish-date-ungram-tfidf
ungram_tf_idf <- dfin %>%
subset(select=c(ungram,publish_date)) %>%
mutate(Date = as.Date(substr(publish_date,1,8), format="%Y%m%d")) %>%
filter(Date>=start & Date<=stend) %>%
mutate(time_slice = floor_date(Date, unit = slice_unit)) %>%
mutate(time_slice = format(time_slice, "%Y-%m-%d")) %>%
count(time_slice, ungram) %>%
filter(between(n,nmin,max(n))) %>%
# ---- publish-date-bigram-tfidf
plot_bigram_tfidf <- function(dbin, start, stend, slice_unit="year", nmin=5, ntop=10, ncol=3) {
bigram_tf_idf <- dbin %>%
subset(select=c(bigram,publish_date)) %>%
mutate(Date = as.Date(substr(publish_date,1,8), format="%Y%m%d")) %>%
filter(Date>=start & Date<=stend) %>%
mutate(time_slice = floor_date(Date, unit = slice_unit)) %>%
mutate(time_slice = format(time_slice, "%Y-%m-%d")) %>%
count(time_slice, bigram) %>%
filter(between(n, nmin, 10000)) %>%
@therohk
therohk / stopwords-en-basic.txt
Last active December 6, 2019 20:07
Stopwords English List - Basic
a
able
about
above
across
after
again
against
all
almost