1.
NUMERICAL OPERATIONS
input <- readline(prompt="Enter numbers separated by spaces:")
numbers <- as.numeric(unlist(strsplit(input, " ")))
max_value <- max(numbers)
print(paste("Maximum:", max_value))
min_value <- min(numbers)
print(paste("Minimum:", min_value))
avg_value <- mean(numbers)
print(paste("Average:", avg_value))
sum_value <- sum(numbers)
print(paste("Sum:", sum_value))
sqrt_values <- sqrt(numbers)
print("Square roots")
print(sqrt_values)
rounded_values <- round(numbers, 2)
print("Rounded values")
print(rounded_values)
2. STAT OPERATIONS
# Read numeric vector from user
cat("Enter the elements:\n")
vector_elements <- as.numeric(unlist(strsplit(readline(), " ")))
# Calculate mean
mean_value <- mean(vector_elements)
cat("Mean:\n")
print(mean_value)
# Calculate median
median_value <- median(vector_elements)
cat("Median:\n")
print(median_value)
# Calculate mode
mode_value <- calculate_mode(vector_elements)
cat("Mode:\n")
print(mode_value)
calculate_mode <- function(x) {
uniqx <- unique(x)
uniqx[which.max(tabulate(match(x, uniqx)))]}
# Calculate standard deviation
sd_value <- sd(vector_elements)
cat("Standard deviation:\n")
print(sd_value)
3. SIMPLE L REG
data <- read.csv("C:/Users/suhan/Downloads/SimpleLinearRegressionData.csv")
print(data)
model <- lm(y ~ x, data = data)
summary(model)
plot(data$x, data$y, main = "Linear Regression of x and y", xlab = "x", ylab = "y", pch = 19)
abline(model, col = "blue")
IN CSV:
x→(3,7,9,6,2)
y→(1,9,8,5,4)
4. WEB SCRAPING
install.packages('rvest')
library(rvest)
webpage = read_html("https://www.geeksforgeeks.org/web-scraping-using-r-language/")
heading = html_node(webpage, '.entry-title')
text = html_text(heading)
print(text)
paragraph = html_nodes(webpage, 'p')
pText = html_text(paragraph)
print(head(pText))
5. MATRIX OPERATIONS
n <- as.integer(readline(prompt="Enter the number of rows: "))
m <- as.integer(readline(prompt="Enter the number of columns: "))
# Read matrix elements from user
cat("Enter the elements of the first matrix (row-wise), separated by spaces:\n")
matrix_elements_1 <- as.numeric(unlist(strsplit(readline(), " ")))
matrix_1 <- matrix(matrix_elements_1, nrow = n, ncol = m, byrow = TRUE)
cat("Enter the elements of the second matrix (row-wise), separated by spaces:\n")
matrix_elements_2 <- as.numeric(unlist(strsplit(readline(), " ")))
matrix_2 <- matrix(matrix_elements_2, nrow = n, ncol = m, byrow = TRUE)
# Display the matrices
cat("First Matrix:\n")
print(matrix_1)
cat("Second Matrix:\n")
print(matrix_2)
# Matrix Addition
matrix_add <- matrix_1 + matrix_2
cat("Matrix Addition:\n")
print(matrix_add)
# Matrix Subtraction
matrix_sub <- matrix_1 - matrix_2
cat("Matrix Subtraction:\n")
print(matrix_sub)
# Matrix Multiplication
matrix_mul <- matrix_1 %*% matrix_2
cat("Matrix Multiplication:\n")
print(matrix_mul)
# Matrix Inverse
if (n == m && det(matrix_1) != 0) {
matrix_inv <- solve(matrix_1)
cat("Matrix Inverse:\n")
print(matrix_inv)} else {
cat("Matrix Inverse:\nThe matrix is not square or is singular, so the inverse does not
exist.\n")}
# Matrix Transpose
matrix_transpose <- t(matrix_1)
cat("Matrix Transpose:\n")
print(matrix_transpose)
# Matrix Division
# For matrix division, we'll assume element-wise division (Hadamard division)
matrix_div <- matrix_1 / matrix_2
cat("Matrix Division (element-wise):\n")
print(matrix_div)
6. DATA PRE-PROCESSING
install.packages("readr")
library(readr)
# Read data from CSV file
iris_data <- read_csv("C:/Users/suhan/Downloads/archive/Iris.csv")
# Display first few rows of dataset
cat("Original Iris dataset")
head(iris_data)
# Check structure of dataset
str(iris_data)
# Summary
summary(iris_data)
# Check for missing values
cat("Checking for missing values:")
columns <- colSums(is.na(iris_data))
print(columns)
# Remove rows with missing values
data_no_na <- na.omit(iris_data)
cat("Data after removing rows with missing values:\n")
print(data_no_na)
# Normalization using min-max scaling
min_max_normalise <- function(x) {
return((x - min(x)) / (max(x) - min(x)))}
# Apply normalization to numeric columns
iris_normalized <- iris_data
iris_normalized[,1:4] <- sapply(iris_data[,1:4], min_max_normalise)
cat("Dataset after min-max normalization:")
head(iris_normalized)
# Encode categorical variable
iris_encoded <- iris_normalized
iris_encoded <- cbind(iris_encoded[,1:4], model = iris_data$Species)
# Inspect encoded data
cat("Inspecting encoded data:")
data <- iris_encoded
print(data)
7. PCA for HOS
install.packages("ggplot2")
install.packages("caret")
library(ggplot2)
library(caret)
pca_result <- read.csv("C:/Users/suhan/Downloads/dd/Housing.csv")
data <- data.frame(
Price = c(300000, 450000, 500000, 600000, 700000),
SqFt = c(1500, 2000, 1800, 2200, 2500),
Bedrooms = c(3, 4, 3, 5, 4),
Bathrooms = c(2, 3, 2, 3, 4))
# View the first few rows of the dataset
cat("Original Data:\n")
print(head(data))
# Remove non-numeric columns if any
data_numeric <- data[, sapply(data, is.numeric)]
# Standardize the data (important for PCA)
data_scaled <- scale(data_numeric)
# Perform PCA
pca_result <- prcomp(data_scaled, center = TRUE, scale. = TRUE)
# Summary of PCA results
cat("PCA Summary:\n")
print(summary(pca_result))
# Print principal components
cat("Principal Components:\n")
print(pca_result$rotation)
# Print the scores of the principal components
cat("PCA Scores:\n")
print(pca_result$x)
# Plotting the PCA results (optional)
# Scree plot to visualize variance explained by each principal component
screeplot(pca_result, main="Scree Plot")
# Biplot to visualize the principal components
biplot(pca_result, main="PCA Biplot")
# Generate Scree Plot for the First Two Principal Components
var_explained <- pca_result$sdev^2 / sum(pca_result$sdev^2) * 100 # Calculate variance
explained
barplot(var_explained[1:2],
main = "Scree Plot (First Two Principal Components)",
xlab = "Principal Component",
ylab = "Percentage of Variance Explained",
names.arg = c("PC1", "PC2"),
col = "steelblue")
# Generate Biplot for the First Two Principal Components
biplot(pca_result,
choices = c(1,2), # Select first two principal components
main = "Biplot of the First Two Principal Components",
col = c("red", "blue")) # Different colors for points and vectors
8. DECISION TREE CLASSIFICATION
install.packages("caret")
install.packages("rpart.plot")
library(caret)
library(rpart.plot)
irisdata <- datasets::iris
class <- irisdata$Species
set.seed(3033)
inTrain <- createDataPartition(y=class, p=0.8, list=FALSE)
training <- irisdata[inTrain,]
testing <- irisdata[-inTrain,]
class_test <- class[-inTrain]
dim(training)
dim(testing)
fit.tree= rpart(Species ~ ., data=training, method="class", parms=list(split="information"))
fit.tree
rpart.plot(fit.tree)
pred.tree <- predict(fit.tree, testing, type="class")
table(pred.tree, class_test)
printcp(fit.tree)
pruned.tree <- prune(fit.tree, cp=0.5)
rpart.plot(pruned.tree, box.palette="Blues")
pred.pruned=predict(pruned.tree, testing, type="class")
table(pred.pruned, class_test)
9. DECISION TREE REGRESSION
install.packages("rpart")
install.packages("rpart.plot")
library(rpart)
library(rpart.plot)
library(ISLR)
carseats_data <- read.csv("C:/Users/suhan/Downloads/Carseats.csv")
head(carseats_data)
# Build the tree model using rpart
tree.model <- rpart(Sales~Price + Advertising + ShelveLoc, data=carseats_data,
method="anova")
summary(tree.model)
printcp(tree.model)
# Plot the decision tree
rpart.plot(tree.model, type=3, extra=101, fallen.leaves=TRUE)
# Prepare new data and predict
new_data <- data.frame(Price=120, Advertising=10, ShelveLoc="Good")
predicted_sales <- predict(tree.model, new_data)
print(predicted_sales)
10. MOVIE RECOM
install.packages("recommenderlab")
install.packages("ggplot2")
library(recommenderlab)
library(ggplot2)
data("MovieLense")
ml_data <- MovieLense
print(ml_data)
summary(ml_data)
cat("Sparsity of the data:", sum(is.na(as(ml_data, "matrix"))) / prod(dim(ml_data)), "\n")
hist(getRatings(ml_data), main = "Rating Distribution", col = "blue", xlab = "Ratings", ylab =
"Frequency")
set.seed(123)
split <- sample(x = c(TRUE, FALSE), size = nrow(ml_data), replace = TRUE, prob = c(0.8,
0.2))
train_data <- ml_data[split, ]
test_data <- ml_data[!split, ]
recommender <- Recommender(train_data, method = "UBCF")
predictions <- predict(recommender, newdata = test_data, n = 5)
cat("Predicted Recommendations:\n")
print(as(predictions, "list"))
evaluation_scheme <- evaluationScheme(ml_data, method = "split", train = 0.8, given = 10,
goodRating = 4)
results <- evaluate(evaluation_scheme, method = "UBCF", type = "topNList", n = c(1, 3, 5))
plot(results, annotate = TRUE, main = "ROC Curve")
top_recommendations <- as(predictions, "list")
recommendation_df <- data.frame(
User = rep(names(top_recommendations), lengths(top_recommendations)),
Movie = unlist(top_recommendations))
print("Top Recommendations Data Frame:")
print(recommendation_df)
11. TIME SERIES ARIMA
library(zoo)
library(ggplot2)
library(forecast)
library(Metrics)
data <- c(10,23,36,40,51,63,72,84,93,102,113,125)
series_ts = ts(data)
test_set <- c(85,92)
nval = length(test_set)
ar_model <- Arima(series_ts, order = c(1, 0, 0))
ma_model <- Arima(series_ts, order = c(0, 0, 1))
arma_model <- Arima(series_ts, order = c(1, 0, 1))
arima_model <- Arima(series_ts, order = c(1, 1, 1))
auto_arima_no_season_model <- auto.arima(series_ts, seasonal = FALSE)
auto_arima_season_model <- auto.arima(series_ts, seasonal = TRUE)
ar_forecasts <- forecast(ar_model, h = nval)
ma_forecasts <- forecast(ma_model, h = nval)
arma_forecasts <- forecast(arma_model, h = nval)
arima_forecasts <- forecast(arima_model, h = nval)
auto_arima_no_season_forecasts <- forecast(auto_arima_no_season_model, h = nval)
auto_arima_season_forecasts <- forecast(auto_arima_season_model, h = nval)
arima_forcast_df <- data.frame(
AR = ar_forecasts$mean,
MA = ma_forecasts$mean,
ARMA = arma_forecasts$mean,
ARIMA = arima_forecasts$mean,
AutoARIMANoSeason = auto_arima_no_season_forecasts$mean,
AutoARIMASeason = auto_arima_season_forecasts$mean)
arima_forcast_df
mae(test_set,ar_forecasts$mean)
mape(test_set,ar_forecasts$mean)
rmse(test_set,ar_forecasts$mean)
ar_forecasts$model$coef
#ar_forecasts$mean
summary(arima_forecasts$model)
12. DATA VISUALIZATION
#Histograms
# load the data
data(iris)
# create histograms for each attribute
par(mfrow=c(1,4))
for(i in 1:4) {
hist(iris[,i], main=names(iris)[i])}
#Density Plots
# load packages
library(lattice)
# load dataset
data(iris)
# create a panel of simpler density plots by attribute
par(mfrow=c(1,4))
for(i in 1:4) {
plot(density(iris[,i]), main=names(iris)[i])}
#Box And Whisker Plots
data(iris)
# Create separate boxplots for each attribute
par(mfrow=c(1,4))
for(i in 1:4) {
boxplot(iris[,i], main=names(iris)[i])}
#Bar Plots
data(iris)
# Create separate boxplots for each attribute
par(mfrow=c(1,4))
for(i in 1:4) {
barplot(iris[,i], main=names(iris)[i])}
#Missing Plot
install.packages("Amelia")
library(Amelia)
# create a missing map
missmap(iris, col=c("black", "grey"), legend=FALSE)
#Multivariate Visualization
#Correlation Plot
install.packages("corrplot")
library(corrplot)
# calculate correlations
correlations <- cor(iris[,1:4])
# create correlation plot
corrplot(correlations, method="circle")
#Scatterplot Matrix
# pair-wise scatterplots of all 4 attributes
pairs(iris)
#Scatterplot Matrix By Class
# pair-wise scatterplots colored by class
pairs(Species~., data=iris, col=iris$Species)
#Density Plots By Class
install.packages("caret")
# load the package
library(caret)
x <- iris[,1:4]
y <- iris[,5]
scales <- list(x=list(relation="free"), y=list(relation="free"))
featurePlot(x=x, y=y, plot="density", scales=scales)
#Box And Whisker Plots By Class
# box and whisker plots for each attribute by class value
# load the package
library(caret)
x <- iris[,1:4]
y <- iris[,5]
featurePlot(x=x, y=y, plot="box")
13. K-MEANS
# Define the data points
data <- data.frame(
x = c(2, 2, 8, 5, 7, 6, 1, 4),
y = c(10, 5, 4, 8, 5, 4, 2, 9))
rownames(data) <- c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2")
# Initial cluster centers (A1, B1, C1)
initial_centers <- matrix(
c(2, 10, # A1
5, 8, # B1
1, 2), # C1
nrow = 3,
byrow = TRUE)
# Perform K-Means with given initial centers
set.seed(123) # For reproducibility
kmeans_result <- kmeans(data, centers = initial_centers, iter.max = 10, nstart = 1)
# Output the results
cat("Cluster Assignments:\n")
print(kmeans_result$cluster)
cat("\nCluster Centers:\n")
print(kmeans_result$centers)
# Visualize the clusters
library(ggplot2)
data$Cluster <- factor(kmeans_result$cluster)
ggplot(data, aes(x = x, y = y, color = Cluster)) +
geom_point(size = 4) +
geom_point(data = as.data.frame(kmeans_result$centers),
aes(x = x, y = y, color = factor(1:3)),
shape = 4, size = 6, stroke = 2) +
labs(title = "K-Means Clustering", x = "X", y = "Y") +
theme_minimal()
14. IMPORT/EXPORT
IMPORT
# Importing CSV using base R
data <- read.csv("path/to/your/file.csv")
# Importing CSV using readr
install.packages("readr")
library(readr)
data <- read_csv("path/to/your/file.csv")
# Importing Excel files using readxl
install.packages("readxl")
library(readxl)
data <- read_excel("path/to/your/file.xlsx", sheet = "Sheet1")
# Importing TXT using base R
data <- read.table("path/to/your/file.txt", header = TRUE, sep = "\t")
# Importing TXT using readr
library(readr)
data <- read_tsv("path/to/your/file.txt")
EXPORT
# Exporting CSV using base R
write.csv(data, "path/to/save/your/file.csv", row.names = FALSE)
# Exporting CSV using readr
library(readr)
write_csv(data, "path/to/save/your/file.csv")
# Exporting Excel files using writexl
install.packages("writexl")
library(writexl)
write_xlsx(data, "path/to/save/your/file.xlsx")
# Exporting TXT using base R
write.table(data, "path/to/save/your/file.txt", sep = "\t", row.names = FALSE)
# Exporting TXT using readr
library(readr)
write_tsv(data, "path/to/save/your/file.txt")
15. MLR
install.packages("MASS")
install.packages("ggplot2")
install.packages("dplyr")
install.packages("caret")
library(MASS)
library(ggplot2)
library(dplyr)
library(caret)
data <- Boston
head(data)
# Check for missing values
colSums(is.na(data))
# Visualize relationships
pairs(data)
# Select features and target (for example, "medv" is the median value of owner-occupied
homes)
target <- "medv" # Median value of owner-occupied homes in $1000s
predictors <- data %>% select(-medv)
# Split the data
set.seed(42)
train_index <- createDataPartition(data[[target]], p = 0.8, list = FALSE)
train_data <- data[train_index, ]
test_data <- data[-train_index, ]
# Build the linear regression model
model <- lm(as.formula(paste(target, "~ .")), data = train_data)
summary(model)
# Make predictions
predictions <- predict(model, newdata = test_data)
# Evaluate the model
mse <- mean((test_data[[target]] - predictions)^2)
r_squared <- summary(model)$r.squared
cat("Mean Squared Error:", mse, "\n")
cat("R^2 Score:", r_squared, "\n")
# Visualize the results
ggplot(data = test_data, aes(x = test_data[[target]], y = predictions)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, color = "red") +
labs(x = "Actual Prices", y = "Predicted Prices", title = "Actual vs Predicted Prices")
# Interpret the coefficients
coef(summary(model))
# Save the model
saveRDS(model, "boston_housing_model.rds")