R

File size
10.8KB
Lines of code
279

R

Domain-specific language for statistical computing, data analysis and visualization.

Comments

# ----- COMMENT -----

# this is a single-line comment

# there is no default 
# implementation for multi-line comments
# but the following syntax will achieve 
# the same effect

Printing

# ----- PRINTING -----
    # print() => receives a string argument that is then printed to the stdout, with a newline automatically included at the end of the output
    # cat() => receives a string argument to be printed to the stdout and does not include a newline by default

print("this has a newline automatically included")
cat("this does not have a newline included and it must be explicitly specified as such\n")

Quickstart

# ----- QUICKSTART -----
    # dynamically-typed interpreted language
    # weak type system with implicit type conversion
    # optimized for operations on vectors and matrices 
    # supports functional programming paradigms
    # <- => R uniquely uses <- as the default preferred assignment operator assigning a variable to a value or named function, although it is noteworthy that = is a valid alternative syntax

x <- 10

square <- function(a) {
    result <- a * a
    return(result)
}
print(square(x)) # this displays 100 to the stdout

Types

# ----- TYPE -----
    # numeric => stores numbers with and without decimal points, covering both integers and floating point value numbers
    # integer => stores an integer number value
    # complex => stores a complex number value with real and imaginary parts
    # character => stores single character and string data, declared within "" double quotation marks
    # raw => stores char values as raw bytes
    # logical => stores boolean TRUE and FALSE values
    # Date => stores a date value in the format YYYY-MM-DD
    # POSIXct => stores a datetime timestamp value in the format YYYY-MM-DD HH:MM:SS

Operators

# ----- OPERATOR ----- 

# --- ARITHMETIC OPERATOR ---

+ # addition
- # subtraction
* # multiplication
/ # division
%% # modulo
^ # exponentiation

# --- COMPARISON OPERATOR ---

== # partial equality check for value but not type
!= # partial inequality check for value but not type
> # comparison operator
< # comparison operator
>= # comparison operator
<= # comparison operator

# --- LOGICAL OPERATOR ---

& # logical and
| # logical or
! # logical not

Control structures

# ----- CONTROL STRUCTURE -----

# --- CONDITIONALS ---

# IF ELSE IF ELSE 

x <- 5
if (x > 0) {
    print("x is positive number")
} else if (x < 0) {
    print("x is non-positive number")
} else {
    print("this is just for edge-guarding but should logically never run")
}

# SWITCH()
    # the switch() construct allows for a degree of pattern-matching in R, the equivalent of switch case and match case statements in other programming languages
    # each comma-delimited predicate case condition listed within the switch() construct has its relationship specified with =
    # first argument in switch() is the value to be checked
    # final argument in switch() is the default fall-through value returned if all other specified predicate case conditions are unmet
    # the result of switch() constructs can be directly assigned to a variable, reminiscent of other functional languages

x <- 3
result <- switch(x,
                "1" = "one",
                "2" = "two",
                "3" = "three",
                "4" = "four",
                "invalid number")

# --- LOOPS ---

# FOR IN 
    # equivalent of foreach loops in PHP and similar to loops in Python, allowing iteration over each element within an iterable structure
    # <startingRangeValueInclusive> : <endRangeValueInclusive> => dynamic creation of an iterable range structure that includes the inclusive start and end value

for (i in 1:5) {
    print(i)
}

# WHILE 
    # operates exactly the same as in other programming languages

count <- 1
while (count <= 5) {
    print(count)
    count <- count + 1
}

# REPEAT
    # creates an infinite loop construct, similar to the loop keyword in Rust
    # remember to include a break condition within a conditional predicate check as below to prevent unintentional infinite loops in your R program

count <- 1
repeat {
    print(count)
    count <- count + 1
    if (count > 5) {
        break
    }
}

Data structures

# ----- DATA STRUCTURE -----
    # vector => one-dimensional ordered collection of elements of the same datatype
    # matrix => two-dimensional ordered collection of elements of the same datetype
    # array => multi-dimensional ordered collection of elements of the same datatype
    # list => ordered mapped collection of elements of multiple datatypes and their corresponding named fields, where elements can be accessed by their names or indices, effectively a hybrid between a hashmap and an arraya and the equivalent of an indexmap in rust
    # data frame => two-dimensional literal table-like structure where named columns can be of multiple datatypes
    # factor => stores categorical data which is stored within level, R's rough equivalent of enums in Rust that allows for more expressive statistical modelling

anExampleVector <- c(1, 2, 3, 4, 5)
anExampleMatrix <- matrix(1:9, nrow = 3)
anExampleArray <- array(1:12, dim = c(3, 2, 2))
anExampleList <- list(name = "John", age = 30, married = TRUE)
anExampleDataFrame <- data.frame(name = c("Alice", "Bob"), age = c(25, 30))
anExampleFactorWithLevels <- factor(c("low", "medium", "high"))

Functions

# ----- FUNCTION -----
    # <functionName> <- function(<functionParameters(s)>) { <functionBodyDefinition> } => function declaration and definition of a named function, despite the slightly odd syntax that appears to suggest an anonymous function
    # return => note that R features explicit returns using the return keyword

add <- function(x, y) {
    return(x + y)
}
result <- add(5, 3) # the value stored in result is 8

Data visualisation

# ----- DATA VISUALISATION -----
    # library() => loads in the specified libraries required for data visualisation, bringing them into local scope within the present R file
    # set.seed() => assigns an integer number value as a seed that can later be called for reproducability of the same result set
    # ggplot() => generic function to call the visualisation library, within which various augmenters can be specified to determine the type of visualisation, as well as further aesthetic specifications
    # geom_point => plots a scatter plot to visualise the initialised dataframe data
    # geom_bar => plots a bar plot to visualise the initialised dataframe data
    # geom_line => plots a line plot to visualise the initialised dataframe data
    # geom_boxplot => plots a box plot to visualise the initialised dataframe data
    # geom_histogram => plots a histogram to visualise the initialised dataframe data

library(ggplot2) # loads in the ggplot2 library for plotting data

# creation of a given dataframe with designated values
data <- data.frame(
    x = 1:10,
    y = rnorm(10, mean = 0, sd = 1)
) 

# plots a scatter plot
ggplot(data, aes(x = x, y = y)) +
    geom_point() +
    labs(title = "Scatter Plot", x = "X Axis", y = "Y Axis")

# plots a bar plot
ggplot(data, aes(x = x, y = y)) +
    geom_bar(stat = "identity", fill = "skyblue") +
    labs(title = "Bar Plot", x = "X Axis", y = "Y Axis")

# plots a line plot
ggplot(data, aes(x = x, y = y)) +
    geom_line(color = "red") +
    labs(title = "Line Plot", x = "X Axis", y = "Y Axis")

# plots a box plot
ggplot(data, aes(y = y)) +
    geom_boxplot() +
    labs(title = "Box Plot", y = "Y Axis")

# plots a histogram
ggplot(data, aes(x = y)) +
    geom_histogram(fill = "lightgreen", color = "black", bins = 10) +
    labs(title = "Histogram", x = "Y Axis", y = "Frequency")

More on