Skip to content

Instantly share code, notes, and snippets.

@yu-iskw
Created August 21, 2015 04:16
Show Gist options
  • Save yu-iskw/20d8a408f3dce5ae90e0 to your computer and use it in GitHub Desktop.
Save yu-iskw/20d8a408f3dce5ae90e0 to your computer and use it in GitHub Desktop.
functions.Rd at dcfe0c5cde953b31c5bfeb6e41d1fc9b333241eb
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/functions.R, R/generics.R
\docType{methods}
\name{lit,ANY-method}
\alias{add_months}
\alias{add_months,Column,numeric-method}
\alias{approxCountDistinct,Column-method}
\alias{ascii}
\alias{avg}
\alias{base64}
\alias{bin}
\alias{bitwiseNOT}
\alias{cbrt}
\alias{ceil}
\alias{ceiling,Column-method}
\alias{concat}
\alias{concat,Column-method}
\alias{concat_ws}
\alias{concat_ws,character,Column-method}
\alias{conv}
\alias{conv,Column,numeric,numeric-method}
\alias{count}
\alias{countDistinct}
\alias{countDistinct,Column-method}
\alias{crc32}
\alias{date_add}
\alias{date_add,Column,numeric-method}
\alias{date_format}
\alias{date_format,Column,character-method}
\alias{date_sub}
\alias{date_sub,Column,numeric-method}
\alias{datediff}
\alias{dayofmonth}
\alias{dayofyear}
\alias{explode}
\alias{expr}
\alias{expr,character-method}
\alias{format_number}
\alias{format_number,Column,numeric-method}
\alias{format_string}
\alias{format_string,character,Column-method}
\alias{from_unixtime}
\alias{from_unixtime,Column-method}
\alias{from_utc_timestamp}
\alias{from_utc_timestamp,Column,character-method}
\alias{greatest}
\alias{greatest,Column-method}
\alias{hex}
\alias{hour}
\alias{initcap}
\alias{instr}
\alias{instr,Column,character-method}
\alias{isNaN}
\alias{last_day}
\alias{least}
\alias{least,Column-method}
\alias{levenshtein}
\alias{lit}
\alias{lit,ANY-method}
\alias{locate}
\alias{locate,character,Column-method}
\alias{lower}
\alias{lpad}
\alias{lpad,Column,numeric,character-method}
\alias{ltrim}
\alias{md5}
\alias{minute}
\alias{month}
\alias{months_between}
\alias{n,Column-method}
\alias{n_distinct,Column-method}
\alias{nanvl}
\alias{negate}
\alias{next_day}
\alias{next_day,Column,character-method}
\alias{pmod}
\alias{quarter}
\alias{rand}
\alias{rand,missing-method}
\alias{randn}
\alias{randn,missing-method}
\alias{regexp_extract}
\alias{regexp_extract,Column,character,numeric-method}
\alias{regexp_replace}
\alias{regexp_replace,Column,character,character-method}
\alias{reverse}
\alias{rpad}
\alias{rpad,Column,numeric,character-method}
\alias{rtrim}
\alias{second}
\alias{sha1}
\alias{sha2}
\alias{sha2,Column,numeric-method}
\alias{shiftLeft}
\alias{shiftLeft,Column,numeric-method}
\alias{shiftRight}
\alias{shiftRight,Column,numeric-method}
\alias{shiftRightUnsigned}
\alias{shiftRightUnsigned,Column,numeric-method}
\alias{sign,Column-method}
\alias{signum}
\alias{size}
\alias{soundex}
\alias{substring_index}
\alias{substring_index,Column,character,numeric-method}
\alias{sumDistinct}
\alias{toDegrees}
\alias{toRadians}
\alias{to_date}
\alias{to_utc_timestamp}
\alias{to_utc_timestamp,Column,character-method}
\alias{translate}
\alias{translate,Column,character,character-method}
\alias{trim}
\alias{unbase64}
\alias{unhex}
\alias{unix_timestamp}
\alias{unix_timestamp,Column,character-method}
\alias{unix_timestamp,Column,missing-method}
\alias{unix_timestamp,missing,missing-method}
\alias{upper}
\alias{weekofyear}
\alias{year}
\title{Approx Count Distinct}
\usage{
\S4method{lit}{ANY}(x)
\S4method{approxCountDistinct}{Column}(x, rsd = 0.95)
\S4method{countDistinct}{Column}(x, ...)
\S4method{concat}{Column}(x, ...)
\S4method{greatest}{Column}(x, ...)
\S4method{least}{Column}(x, ...)
\S4method{ceiling}{Column}(x)
\S4method{sign}{Column}(x)
\S4method{n_distinct}{Column}(x, ...)
\S4method{n}{Column}(x)
\S4method{date_format}{Column,character}(y, x)
\S4method{from_utc_timestamp}{Column,character}(y, x)
\S4method{instr}{Column,character}(y, x)
\S4method{next_day}{Column,character}(y, x)
\S4method{to_utc_timestamp}{Column,character}(y, x)
\S4method{add_months}{Column,numeric}(y, x)
\S4method{date_add}{Column,numeric}(y, x)
\S4method{date_sub}{Column,numeric}(y, x)
\S4method{format_number}{Column,numeric}(y, x)
\S4method{sha2}{Column,numeric}(y, x)
\S4method{shiftLeft}{Column,numeric}(y, x)
\S4method{shiftRight}{Column,numeric}(y, x)
\S4method{shiftRightUnsigned}{Column,numeric}(y, x)
\S4method{concat_ws}{character,Column}(sep, x, ...)
\S4method{conv}{Column,numeric,numeric}(x, fromBase, toBase)
\S4method{expr}{character}(x)
\S4method{format_string}{character,Column}(format, x, ...)
\S4method{from_unixtime}{Column}(x, format = "yyyy-MM-dd HH:mm:ss")
\S4method{locate}{character,Column}(substr, str, pos = 0)
\S4method{lpad}{Column,numeric,character}(x, len, pad)
\S4method{rand}{missing}(seed)
\S4method{randn}{missing}(seed)
\S4method{regexp_extract}{Column,character,numeric}(x, pattern, idx)
\S4method{regexp_replace}{Column,character,character}(x, pattern, replacement)
\S4method{rpad}{Column,numeric,character}(x, len, pad)
\S4method{substring_index}{Column,character,numeric}(x, delim, count)
\S4method{translate}{Column,character,character}(x, matchingString,
replaceString)
\S4method{unix_timestamp}{missing,missing}(x, format)
\S4method{unix_timestamp}{Column,missing}(x, format)
\S4method{unix_timestamp}{Column,character}(x, format = "yyyy-MM-dd HH:mm:ss")
add_months(y, x)
ascii(x)
avg(x, ...)
base64(x)
bin(x)
bitwiseNOT(x)
cbrt(x)
ceil(x)
concat(x, ...)
concat_ws(sep, x, ...)
conv(x, fromBase, toBase)
crc32(x)
datediff(y, x)
date_add(y, x)
date_format(y, x)
date_sub(y, x)
dayofmonth(x)
dayofyear(x)
explode(x)
expr(x)
from_utc_timestamp(y, x)
format_number(y, x)
format_string(format, x, ...)
from_unixtime(x, ...)
greatest(x, ...)
hex(x)
hour(x)
initcap(x)
instr(y, x)
isNaN(x)
last_day(x)
least(x, ...)
levenshtein(y, x)
lit(x)
locate(substr, str, ...)
lower(x)
lpad(x, len, pad)
ltrim(x)
md5(x)
minute(x)
month(x)
months_between(y, x)
nanvl(y, x)
negate(x)
next_day(y, x)
pmod(y, x)
quarter(x)
rand(seed)
randn(seed)
regexp_extract(x, pattern, idx)
regexp_replace(x, pattern, replacement)
reverse(x)
rpad(x, len, pad)
rtrim(x)
second(x)
sha1(x)
sha2(y, x)
shiftLeft(y, x)
shiftRight(y, x)
shiftRightUnsigned(y, x)
signum(x)
size(x)
soundex(x)
substring_index(x, delim, count)
sumDistinct(x)
toDegrees(x)
toRadians(x)
to_date(x)
to_utc_timestamp(y, x)
translate(x, matchingString, replaceString)
trim(x)
unbase64(x)
unhex(x)
unix_timestamp(x, format)
upper(x)
weekofyear(x)
year(x)
}
\arguments{
\item{x}{one of 224, 256, 384, or 512.}
\item{y}{column to compute SHA-2 on.}
}
\value{
Creates a Column class of literal value.
the approximate number of distinct items in a group.
the number of distinct items in a group.
Concatenates multiple input string columns together into a single string column.
Returns the greatest value of the list of column names, skipping null values.
This function takes at least 2 parameters. It will return null if all parameters are null.
Returns the least value of the list of column names, skipping null values.
This function takes at least 2 parameters. It will return null iff all parameters are null.
}
\description{
Approx Count Distinct
Count Distinct
Converts a date/timestamp/string to a value of string in the format specified by the date
format given by the second argument.
Assumes given timestamp is UTC and converts to given timezone.
Locate the position of the first occurrence of substr column in the given string.
Returns null if either of the arguments are null.
Given a date column, returns the first date which is later than the value of the date column
that is on the specified day of the week.
Assumes given timestamp is in given timezone and converts to UTC.
Returns the date that is numMonths after startDate.
Returns the date that is `days` days after `start`
Returns the date that is `days` days before `start`
Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places,
and returns the result as a string column.
Calculates the SHA-2 family of hash functions of a binary column and
returns the value as a hex string.
Shift the the given value numBits left. If the given value is a long value, this function
will return a long value else it will return an integer value.
Shift the the given value numBits right. If the given value is a long value, it will return
a long value else it will return an integer value.
Unsigned shift the the given value numBits right. If the given value is a long value,
it will return a long value else it will return an integer value.
Concatenates multiple input string columns together into a single string column,
using the given separator.
Convert a number in a string column from one base to another.
Parses the expression string into the column that it represents, similar to
DataFrame.selectExpr
Formats the arguments in printf-style and returns the result as a string column.
Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
representing the timestamp of that moment in the current system time zone in the given
format.
Locate the position of the first occurrence of substr.
NOTE: The position is not zero based, but 1 based index, returns 0 if substr
could not be found in str.
Left-pad the string column with
Generate a random column with i.i.d. samples from U[0.0, 1.0].
Generate a column with i.i.d. samples from the standard normal distribution.
Extract a specific(idx) group identified by a java regex, from the specified string column.
Replace all substrings of the specified string value that match regexp with rep.
Right-padded with pad to a length of len.
Returns the substring from string str before count occurrences of the delimiter delim.
If count is positive, everything the left of the final delimiter (counting from left) is
returned. If count is negative, every to the right of the final delimiter (counting from the
right) is returned. substring <- index performs a case-sensitive match when searching for delim.
Translate any character in the src by a character in replaceString.
The characters in replaceString is corresponding to the characters in matchingString.
The translate will happen when any character in the string matching with the character
in the matchingString.
Gets current Unix timestamp in seconds.
Converts time string in format yyyy-MM-dd HH:mm:ss to Unix timestamp (in seconds),
using the default timezone and the default locale, return null if fail.
Convert time string with given pattern
(see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
to Unix time stamp (in seconds), return null if fail.
}
\details{
A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
pattern letters of `java.text.SimpleDateFormat` can be used.
NOTE: Use when ever possible specialized functions like `year`. These benefit from a
specialized implementation.
NOTE: The position is not zero based, but 1 based index, returns 0 if substr
could not be found in str.
For example, `next <- day('2015-07-27', "Sunday")` returns 2015-08-02 because that is the first
Sunday after 2015-07-27.
Day of the week parameter is case insensitive, and accepts:
"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
If d is 0, the result has no decimal point or fractional part.
If d < 0, the result will be null.'
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment