Create project folder


In [1]:
dir.create('/data/example-project', showWarnings = FALSE)
setwd('/data/example-project')
packrat::init()


Initializing packrat project in directory:
- "/data/example-project"
Adding these packages to packrat:
            _        
    packrat   0.4.8-1

Fetching sources for packrat (0.4.8-1) ... OK (CRAN current)
Snapshot written to '/data/example-project/packrat/packrat.lock'
Installing packrat (0.4.8-1) ... 
	OK (built source)
Initialization complete!

Open exisiting project


In [2]:
setwd('/data/example-project')
packrat::on()

Install package


In [3]:
.libPaths() # Should only container folders within the example folder


  1. '/data/example-project/packrat/lib/x86_64-redhat-linux-gnu/3.4.1'
  2. '/data/example-project/packrat/lib-ext'
  3. '/data/example-project/packrat/lib-R'

In [4]:
find.package('fortunes', quiet = TRUE) # Expected to return nothing if package is not installed



In [5]:
install.packages('fortunes')


Installing package into '/data/example-project/packrat/lib/x86_64-redhat-linux-gnu/3.4.1'
(as 'lib' is unspecified)

In [6]:
find.package('fortunes') # should be a sub-folder of the './packrat/lib' wihtin the project


'/data/example-project/packrat/lib/x86_64-redhat-linux-gnu/3.4.1/fortunes'

Update snapshot


In [7]:
packrat::snapshot() # Add fortune to the packrat snapshot


Adding these packages to packrat:
             _      
    fortunes   1.5-4

Fetching sources for fortunes (1.5-4) ... OK (CRAN current)
Snapshot written to '/data/example-project/packrat/packrat.lock'

Restore a library


In [8]:
unlink(paste0(.libPaths()[1], '/fortunes'), recursive = TRUE) # Delete fortunes lib folder

In [9]:
find.package('fortunes', quiet = TRUE) # Package is missing



In [10]:
packrat::restore() # Restores the missing package


Installing fortunes (1.5-4) ... 
	OK (built source)

In [11]:
find.package('fortunes', quiet = TRUE)


'/data/example-project/packrat/lib/x86_64-redhat-linux-gnu/3.4.1/fortunes'

Install package on Spark cluster using packrat


In [12]:
# Spark preamble
library(SparkR, lib.loc=file.path(Sys.getenv('SPARK_HOME'), 'R', 'lib'))


Attaching package: 'SparkR'

The following objects are masked from 'package:stats':

    cov, filter, lag, na.omit, predict, sd, var, window

The following objects are masked from 'package:base':

    as.data.frame, colnames, colnames<-, drop, endsWith, intersect,
    rank, rbind, sample, startsWith, subset, summary, transform, union


In [13]:
sparkFunct <- function(idx) {
    # Open project
    setwd('/data/example-project')
    packrat::on(clean.search.path = FALSE)
    # Install if needed
    packrat::restore()
    # Run code on cluster
    library(fortunes)
    return(fortune())
}

In [14]:
# Create Spark context
sparkR.session(master=Sys.getenv('MASTER'))
# Run function on cluster
output <- spark.lapply(seq(4), sparkFunct)
# Delete Spark context
sparkR.session.stop()


Spark package found in SPARK_HOME: /opt/spark
Launching java with spark-submit command /opt/spark/bin/spark-submit   sparkr-shell /tmp/RtmpNREJfD/backend_port5fb28a91f92 
Java ref type org.apache.spark.sql.SparkSession id 1 

In [15]:
lapply(output, function(item) { item$author })


  1. 'Douglas Bates'
  2. 'John W. Tukey'
  3. 'Duncan Temple Lang'
  4. 'Dirk Eddelbuettel'

Remove example project


In [16]:
setwd('~')
unlink('/data/example-project', recursive = TRUE)

In [ ]: