Einführung in R

Einführung in R - Notizen zum Kurs auf Datacamp

Arithmetische Operatoren

+
-
/
*
^
%%

Variablen

# Variable zuweisen
variable_name <- 5 + 5

# Wert von Variable ausgeben
variable_name

Datatypes

# Integer
1

# Decimal
4.5

# Boolean
TRUE

# String / Character
"asd"

Datentyp einer Variable finden durch

a_number <- 12
a_string <- "asd"

class(a_number) # numeric
class(a_string) # character

Vectors / Eindimensionale Arrays

# Vector erstellen
# c() = combine
a_vector <- c(1, 2, 3, 4, 5)

Vector benennen

a_vector <- c(1, 2, 3, 4, 5)

# Gibt Namen basierend auf Index
names(a_vector) <- c("Eins", "Zwei", "Drei", "Vier", "Fünf")

# Ausgabe ist
# Eins   Zwei  Drei  Vier  Fünf
# 1      2     3     4     5

Summe eines Vectors berechnen

sum(a_vector)

Auf Elemente eines Vectors zugreifen

ERSTES ELEMENT HAT INDEX 1 NICHT 0

# Element an index 1
a_vector[1]

Durchschnitt eines Vectors berechnen

mean(a_vector)

Vergleich

<
>
<=
>=
==
!=

Werte aus Vector durch Vergleich filtern

a_vector <- c(1, 2, 3, 4, 5)

greater_than_three <- a_vector[c(a_vector > 3)]

greater_than_three

# 4, 5

greater_than_three_bool <- a_vector > 3
# FALSE FALSE FALSE TRUE TRUE
greater_selection <- a_vector[greater_than_three_bool]
# 4, 5

Matrizen / Zwei dimensionales Array

Matrix erstellen

matrix(1:9, byrow = TRUE, nrow = 3)

#      [,1] [,2] [,3]
# [1,]    1    2    3
# [2,]    4    5    6
# [3,]    7    8    9
matrix(
  WERT
  byrow = TRUE # Matrix nach Zeilen füllen
  nrow = 2 # Anzahl an Zeilen
  ncol = 3 # Anzahl an Spalten
)

Matrizen benennen

rownames(a_matrix) <- row_name_vector
colnames(a_matrix) <- col_name_vector

Summen in Matrizen berechnen

rowSums(a_matrix)
colSums(a_matrix)

Matrizen mit Matrizen oder Vektoren kombinieren

# cbind() für Spalten
a_bigger_matrix <- cbind(a_matrix, a_different_matrix, a_vector)

# rbind() für Zeilen
another_bigger_matrix <- rbind(a_matrix, another_diff_matrix)

Matrix Elemente auswählen

# Element an der Stelle 0,0
a_matrix[0,0]

# Alle Elemente aus der 3. Spalte
a_matrix[,3]

Factor

“The term factor refers to a statistical data type used to store categorical variables. The difference between a categorical variable and a continuous variable is that a categorical variable can belong to a limited number of categories. A continuous variable, on the other hand, can correspond to an infinite number of values.”

Factor erstellen

a_factor <- factor(1, 2, 3)

Geordneten Factor erstellen

an_ordered_factor <- factor(c(1, 2, 3, 2, 2, 3), order = TRUE, levels = c(3, 2, 1))
an_ordered_factor

# Output
# [1] 1 2 3 2 2 3
# Levels: 3 < 2 < 1

Levels benennen

# Code to build factor_survey_vector
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)

# Specify the levels of factor_survey_vector
levels(factor_survey_vector) <- c("Female", "Male")

factor_survey_vector

# Output
# [1] Male   Female Female Male   Male  
# Levels: Female Male

Übersicht über Variable bekommen

# Build factor_survey_vector with clean levels
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
levels(factor_survey_vector) <- c("Female", "Male")
factor_survey_vector

# Generate summary for survey_vector
summary(survey_vector)

# Generate summary for factor_survey_vector
summary(factor_survey_vector)

# Output
# Generate summary for survey_vector
# summary(survey_vector)
#    Length     Class      Mode 
#         5 character character 
#
# Generate summary for factor_survey_vector
# summary(factor_survey_vector)
# Female   Male 
#      2      3 

Data Frame

A data frame has the variables of a dataset as columns and the observations as rows. This will be a familiar concept for those coming from different statistical software packages such as SAS or SPSS.

HEAD und TAILS

Vergleichbar mit head und tails.

Structure eines Data Frames

str(a_frame)

Gibt übersicht über Struktur und Daten in einem Data Frame. Beispiel:

'data.frame':	32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

Data Frame erstellen

# Definition of vectors
name <- c("Mercury", "Venus", "Earth", 
          "Mars", "Jupiter", "Saturn", 
          "Uranus", "Neptune")
type <- c("Terrestrial planet", 
          "Terrestrial planet", 
          "Terrestrial planet", 
          "Terrestrial planet", "Gas giant", 
          "Gas giant", "Gas giant", "Gas giant")
diameter <- c(0.382, 0.949, 1, 0.532, 
              11.209, 9.449, 4.007, 3.883)
rotation <- c(58.64, -243.02, 1, 1.03, 
              0.41, 0.43, -0.72, 0.67)
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)

# Create a data frame from the vectors
planets_df <- data.frame(name, type, diameter, rotation, rings)
planets_df

# Output
#     name               type diameter rotation rings
# 1 Mercury Terrestrial planet    0.382    58.64 FALSE
# 2   Venus Terrestrial planet    0.949  -243.02 FALSE
# 3   Earth Terrestrial planet    1.000     1.00 FALSE
# 4    Mars Terrestrial planet    0.532     1.03 FALSE
# 5 Jupiter          Gas giant   11.209     0.41  TRUE
# 6  Saturn          Gas giant    9.449     0.43  TRUE
# 7  Uranus          Gas giant    4.007    -0.72  TRUE
# 8 Neptune          Gas giant    3.883     0.67  TRUE

Auswählen von Elementen in Matrizen

Siehe “Matrix Elemente auswählen” oben.

Subsets
subset(planets_df, subset = diameter < 1) 

# Output
#      name               type diameter rotation rings
# 1 Mercury Terrestrial planet    0.382    58.64 FALSE
# 2   Venus Terrestrial planet    0.949  -243.02 FALSE
# 4    Mars Terrestrial planet    0.532     1.03 FALSE

Data Frames sortieren

a_vector <- c(1, 2, 4, 3, 5)

order(a)
# Use order() to create positions
positions <-  order(planets_df$diameter)

# Use positions to sort planets_df
planets_df[positions, ]

Lists

A list in R allows you to gather a variety of objects under one name (that is, the name of the list) in an ordered way. These objects can be matrices, vectors, data frames, even other lists, etc. It is not even required that these objects are related to each other in any way.

List erstellen

a_list <- list(1, 2, 3, 4, 5)

Elemente in List benennen

# Vector with numerics from 1 up to 10
my_vector <- 1:10 

# Matrix with numerics from 1 up to 9
my_matrix <- matrix(1:9, ncol = 3)

# First 10 elements of the built-in data frame mtcars
my_df <- mtcars[1:10,]

# Adapt list() call to give the components names
my_list <- list(vec = my_vector, mat = my_matrix, df = my_df)

Element aus Liste auswählen

a_list[[1]]
a_list[["element_name"]]
a_list$element_name
Last modified 2023.04.12