## ---- eval=FALSE-------------------------------------------------------------- # for (i in 1:n) { # distp[1] <- dist(rbind(p[i, ], data[1, ])) # minqi <- dist(rbind(q[i, ], data[1, ])) # for (j in 2:nrow(data)) { # distp[j] <- dist(rbind(p[i, ], data[j, ])) # error <- q[i, ] - data[j, ] # if (sum(abs(error)) != 0) { # distq <- dist(rbind(q[i, ], data[j, ])) # if (distq < minqi) # minqi <- distq # } # } ## ---- eval=FALSE-------------------------------------------------------------- # DistVecToMat <- function(x,Y){ # min(apply(Y, 1, function(yi) sqrt(sum((x-yi)^2)))) # } # # DistVecToMat(c(1,2), matrix(c(4,5,6,7), nrow=2, byrow=TRUE)) # # 4.242641 7.071068 # sqrt(c(18,50)) # # # For each ith row of sample, calculate the distance of: # # U[i,] to X # # W[i,] to X[-k[i] , ], i.e. omitting W[i,] from X # dux <- rep(NA, n) # dwx <- rep(NA, n) # for(i in 1:n) { # dux[i] <- DistVecToMat(U[i,], X) # dwx[i] <- DistVecToMat(W[i,], X[-k[i],]) # } ## ---- eval=FALSE-------------------------------------------------------------- # # pdist(W,X) computes distance between rows of W and rows of X # tmp <- as.matrix(pdist(W,X)) # dwx <- apply(tmp, 1, min) # # # pdist(U,X) computes distance between rows of U and rows of X # tmp <- as.matrix(pdist(U,X)) # dux <- apply(tmp, 1, min) ## ---- eval=FALSE-------------------------------------------------------------- # library(microbenchmark) # # # Method 1. Loop over vector 1:n # # for(i in 1:m) dwx[i,k[i]] <- Inf # microbenchmark(hopkins(X12, 500)) # ## Unit: milliseconds # ## expr min lq mean median uq max neval # ## hopkins(X12, 500) 38.9493 42.8045 50.73876 45.0668 47.69945 120.9366 100 # # # Method 2. Build a matrix of indexes to the cells that need changing # # dwx[ matrix( c(1:n, k), nrow=n) ] <- Inf # microbenchmark(hopkins(X12, 500)) # ## Unit: milliseconds # ## expr min lq mean median uq max neval # ## hopkins(X12, 500) 39.2668 42.41565 50.21628 43.74215 46.9462 126.3522 100 ## ---- eval=FALSE, echo=FALSE-------------------------------------------------- # set.seed(1) # X1 <- matrix(rnorm(1000, mean=0, sd=1), ncol=2) # X2 <- matrix(rnorm(1000, mean=5, sd=1), ncol=2) # X12 <- rbind(X1, X2) # plot(X12) # # # Old version # set.seed(42) # system.time( hop1 <- clustertend::hopkins(X12, 500) ) # 17 sec # 1-hop1$H # # .8994528 # # # New version # set.seed(42) # system.time( hop2 <- hopkins::hopkins(X12, 500, d=1) ) # .05 sec # hop2 # # .9054555