<- function(n = 100, k = 4, lambda = 4) {
fun1 <- NULL
x
for (i in 1:n)
<- rbind(x, rpois(k, lambda))
x
return(x)
}
<- function(n = 100, k = 4, lambda = 4) {
fun1alt # YOUR CODE HERE
}
# Benchmarking
::mark(
benchfun1(),
fun1alt(), relative = TRUE, check = FALSE
)
Homework 03 - Functions, Profiling, and Rcpp
Due date
Thursday, October 31
Background
For this assignment, you’ll be quested with speeding up some code using what you have learned about vectorization and Rcpp.
Part 1: Vectorizing code
The following functions can be written to be more efficient without using parallel computing:
- This function generates a
n x k
dataset with all its entries distributed Poisson with meanlambda
.
- Like before, speed up the following functions (it is OK to use StackOverflow)
# Total row sums
<- function(mat) {
fun1 <- nrow(mat)
n <- double(n)
ans for (i in 1:n) {
<- sum(mat[i, ])
ans[i]
}
ans
}
<- function(mat) {
fun1alt # YOUR CODE HERE
}
# Cumulative sum by row
<- function(mat) {
fun2 <- nrow(mat)
n <- ncol(mat)
k <- mat
ans for (i in 1:n) {
for (j in 2:k) {
<- mat[i, j] + ans[i, j - 1]
ans[i,j]
}
}
ans
}
<- function(mat) {
fun2alt # YOUR CODE HERE
}
# Use the data with this code
set.seed(2315)
<- matrix(rnorm(200 * 100), nrow = 200)
dat
# Test for the first
::mark(
benchfun1(dat),
fun1alt(dat), relative = TRUE
)
# Test for the second
::mark(
benchfun2(dat),
fun2alt(dat), relative = TRUE
)
- Find the column max (hint: Check out the function
max.col()
).
# Data Generating Process (10 x 10,000 matrix)
set.seed(1234)
<- matrix(rnorm(1e4), nrow=10)
x
# Find each column's max value
<- function(x) {
fun2 apply(x, 2, max)
}
<- function(x) {
fun2alt # YOUR CODE HERE
}
# Benchmarking
::mark(
benchfun2(),
fun2alt(), relative = TRUE
)
Part 2: C++
As we saw in the Rcpp week, vectorization may not be the best solution. For this part, you must write a function using Rcpp that implements the scale-free algorithm. The following code implements this in R:
## Model parameters
<- 500
n <- 2
m
## Generating the graph
set.seed(3312)
<- matrix(0, nrow = n, ncol = n)
g 1:m, 1:m] <- 1
g[diag(g) <- 0
## Adding nodes
for (i in (m + 1):n) {
# Selecting the nodes to connect to
<- sample(
ids x = 1:(i-1), # Up to i-1
size = m, # m nodes
replace = FALSE, # No replacement
# Probability proportional to the degree
prob = colSums(g[, 1:(i-1), drop = FALSE])
)
# Adding the edges
<- 1
g[i, ids] <- 1
g[ids, i]
}
## Visualizing the degree distribution
library(ggplot2)
data.frame(degree = colSums(g)) |>
ggplot(aes(degree)) +
geom_histogram() +
scale_x_log10() +
labs(
x = "Degree\n(log10 scale)",
y = "Count"
)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Rewrite the function that generates the scale-free network using Rcpp.