imcExperiment: containerization of IMC data

Heatmap plotting script (helper)

  • The heatmap function we do not want to include with this package, but here is the helper function.

example

  • The IMC container contains slots for intensity, coordinates, neighborhood features, networking ID, distance measures, morphology, unique cell labels, panel information, and ROIID.
  • The input to the imcExperiment object has the cells (columns) and rows are the proteins.
  • For spatial features, the rows are the cells, and columns are the spatial features. the 2-dim coordinates (x,y), neighborHood features, network assignment, unique Label, ROIID, and distance matrix. These data are required for the constructure but can be edited later.
  • The IMC container inherits many function used for SummarizedExperiment classes (assays, etc).
 library(imcExperiment)
 showClass("imcExperiment")
## Class "imcExperiment" [package "imcExperiment"]
## 
## Slots:
##                                                                 
## Name:                   coordinates                cellIntensity
## Class:                       matrix                       matrix
##                                                                 
## Name:                  neighborHood                      network
## Class:                       matrix                   data.frame
##                                                                 
## Name:                      distance                   morphology
## Class:                       matrix                       matrix
##                                                                 
## Name:                   uniqueLabel          int_elementMetadata
## Class:                    character                    DataFrame
##                                                                 
## Name:                   int_colData                 int_metadata
## Class:                    DataFrame                         list
##                                                                 
## Name:                     rowRanges                      colData
## Class: GenomicRanges_OR_GRangesList                    DataFrame
##                                                                 
## Name:                        assays                        NAMES
## Class:               Assays_OR_NULL            character_OR_NULL
##                                                                 
## Name:               elementMetadata                     metadata
## Class:                    DataFrame                         list
## 
## Extends: 
## Class "SingleCellExperiment", directly
## Class "RangedSummarizedExperiment", by class "SingleCellExperiment", distance 2
## Class "SummarizedExperiment", by class "SingleCellExperiment", distance 3
## Class "RectangularData", by class "SingleCellExperiment", distance 4
## Class "Vector", by class "SingleCellExperiment", distance 4
## Class "Annotated", by class "SingleCellExperiment", distance 5
## Class "vector_OR_Vector", by class "SingleCellExperiment", distance 5
  #10 cells with 10 proteins
  # 10 neighbors 
 # and square distance matrix
 # note that for SCE objects the columns are the cells, and rows are proteins
  x<-imcExperiment(cellIntensity=matrix(1,nrow=10,ncol=10),
    coordinates=matrix(1,nrow=10,ncol=2),
    neighborHood=matrix(1,nrow=10,ncol=10),
    network=data.frame(matrix(1,nrow=10,ncol=10)),
    distance=matrix(1,nrow=10,ncol=10),
    morphology=matrix(1,nrow=10,ncol=10),
    uniqueLabel=paste0("A",seq_len(10)),
    panel=letters[1:10],
    ROIID=data.frame(ROIID=rep("A",10)))
  

 #7 cells with 10 proteins
 # but the spatial information is 10 rows and this will fail to build.
 #x<-imcExperiment(cellIntensity=matrix(1,nrow=7,ncol=10),
#   coordinates=matrix(1,nrow=10,ncol=2),
#   neighborHood=matrix(1,nrow=10,ncol=20),
#   network=matrix(1,nrow=10,ncol=3),
#   distance=matrix(1,nrow=10,ncol=2),
#   uniqueLabel=rep("A",10),
#   ROIID=data.frame(ROIID=rep("A",10)))

Accessors, and setters

  • the cellIntensity() function accesses cell Intensity
  • getCoordinates() accesses spatial data.
  • getNeighborhood() access the histoCAT neighborhood features
  • getDistance() accesses distance matrix input.
  • getMorphology() accesses morphology features.
  • the metadata() function can store extras .
  #get cellintensities
  cellIntensity(x)
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    1    1    1    1    1    1    1    1    1     1
##  [2,]    1    1    1    1    1    1    1    1    1     1
##  [3,]    1    1    1    1    1    1    1    1    1     1
##  [4,]    1    1    1    1    1    1    1    1    1     1
##  [5,]    1    1    1    1    1    1    1    1    1     1
##  [6,]    1    1    1    1    1    1    1    1    1     1
##  [7,]    1    1    1    1    1    1    1    1    1     1
##  [8,]    1    1    1    1    1    1    1    1    1     1
##  [9,]    1    1    1    1    1    1    1    1    1     1
## [10,]    1    1    1    1    1    1    1    1    1     1
  #set intensities
  newIn<-matrix(rnorm(100,2,5),nrow=10,ncol=10)
  rownames(newIn)<-rownames(x)
  colnames(newIn)<-colnames(x)
   cellIntensity(x)<-newIn
  cellIntensity(x)==newIn
##     A1   A2   A3   A4   A5   A6   A7   A8   A9  A10
## a TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## b TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## c TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## d TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## e TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## f TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## g TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## h TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## i TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## j TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 # if we want to store both the raw and normalized values in assays we can.
  assays(x,withDimnames = FALSE)$raw<-matrix(1,nrow=10,ncol=10)
  #store the normalized values.
   cellIntensity(x)<-asinh(counts(x)/0.5)
   all(cellIntensity(x)==asinh(assays(x)$counts/0.5))
## [1] TRUE
  x
## class: imcExperiment 
## dim: 10 10 
## metadata(0):
## assays(2): counts raw
## rownames(10): a b ... i j
## rowData names(0):
## colnames(10): A1 A2 ... A9 A10
## colData names(1): ROIID
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
 ## access the coordinates
  getCoordinates(x)
##       [,1] [,2]
##  [1,]    1    1
##  [2,]    1    1
##  [3,]    1    1
##  [4,]    1    1
##  [5,]    1    1
##  [6,]    1    1
##  [7,]    1    1
##  [8,]    1    1
##  [9,]    1    1
## [10,]    1    1
  getCoordinates(x)<-matrix(rnorm(20,0,10),nrow=10,ncol=2)
head(  getCoordinates(x))
##            [,1]       [,2]
## [1,]   3.034747  13.365824
## [2,]   9.539573   1.423551
## [3,]  -2.979820   2.273039
## [4,]  16.536503 -14.772502
## [5,] -32.479697  -2.861809
## [6,]  12.148118 -13.097818
 ## access the neighborhood profile.  Note each row must equal the number of cells, but the columns can be extended depending on the radius of interactions.
  ## access the coordinates
  getNeighborhood(x)
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    1    1    1    1    1    1    1    1    1     1
##  [2,]    1    1    1    1    1    1    1    1    1     1
##  [3,]    1    1    1    1    1    1    1    1    1     1
##  [4,]    1    1    1    1    1    1    1    1    1     1
##  [5,]    1    1    1    1    1    1    1    1    1     1
##  [6,]    1    1    1    1    1    1    1    1    1     1
##  [7,]    1    1    1    1    1    1    1    1    1     1
##  [8,]    1    1    1    1    1    1    1    1    1     1
##  [9,]    1    1    1    1    1    1    1    1    1     1
## [10,]    1    1    1    1    1    1    1    1    1     1
  getNeighborhood(x)<-matrix(rnorm(100,1,5),nrow=10,ncol=10)
 head( getNeighborhood(x))
##            [,1]       [,2]      [,3]       [,4]      [,5]        [,6]
## [1,]  1.4470953  0.9128426  3.768759  0.8406094 -6.114329   7.8022419
## [2,] -1.2536198 -3.5086605 -4.882563  4.2606125 -5.187257   5.9874954
## [3,] -0.7306890  3.9928835  5.626202  9.7837743  9.005965   8.9816927
## [4,]  5.7565770  6.2721845  6.443988  0.6390971 -5.802902   3.0645971
## [5,]  0.8503295  4.4339637 -2.423731 -0.8901062 -8.708438  -0.1458067
## [6,]  9.2245482  5.2018657  3.629917 -3.3067429 -2.863778 -11.8413331
##            [,7]       [,8]       [,9]      [,10]
## [1,]  1.8926566 -0.2676582  6.0792030 -7.7630457
## [2,] -0.8513291  1.6204647  3.4742704 -4.5843900
## [3,] -0.3867099 -5.6282341 -1.6696772 -0.1738082
## [4,] -1.9386367 -5.7983103  0.3258066  0.8801997
## [5,]  5.6461763  5.2321197 -9.3717111  4.4848104
## [6,] -2.6028521  2.7531295  5.9016797  6.4296470
 ## get the distance usually a square matrix, or can be just first nearest etc. 
    getDistance(x)
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    1    1    1    1    1    1    1    1    1     1
##  [2,]    1    1    1    1    1    1    1    1    1     1
##  [3,]    1    1    1    1    1    1    1    1    1     1
##  [4,]    1    1    1    1    1    1    1    1    1     1
##  [5,]    1    1    1    1    1    1    1    1    1     1
##  [6,]    1    1    1    1    1    1    1    1    1     1
##  [7,]    1    1    1    1    1    1    1    1    1     1
##  [8,]    1    1    1    1    1    1    1    1    1     1
##  [9,]    1    1    1    1    1    1    1    1    1     1
## [10,]    1    1    1    1    1    1    1    1    1     1
  getDistance(x)<-matrix(rnorm(100,1,5),nrow=10,ncol=10)
  head(getDistance(x))
##            [,1]      [,2]      [,3]       [,4]       [,5]       [,6]       [,7]
## [1,] -3.9890620 10.910784 0.9762887  6.5638764  0.5387551  3.6708334  3.9811188
## [2,] -0.8565689 -4.484224 1.6642903 -0.7628686  5.8483790  1.8140574  6.2603387
## [3,] -2.0591464 -5.997579 6.5732988  0.9895116 -0.4276494  0.8251664  0.7168311
## [4,]  2.0734702 -5.415851 1.9413955 -3.3587597  6.2052571  6.3753766 -4.2116313
## [5,] -4.4705299  9.841306 1.7696953 -0.5861410 -1.8071315  7.1089537 -3.4334006
## [6,] -8.4494041  1.143903 1.9083118 10.4127679  3.7919394 11.0379079  2.0298145
##           [,8]      [,9]     [,10]
## [1,] -6.080778 -4.476827 -3.824715
## [2,]  5.528822  4.213684 12.058231
## [3,] -3.036998  4.694983  3.742517
## [4,] -5.989757  5.248598 -5.998213
## [5,]  7.863350  1.963486  6.093700
## [6,]  3.919736  4.064631 -1.640773
 # get morphological features
  getMorphology(x)
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    1    1    1    1    1    1    1    1    1     1
##  [2,]    1    1    1    1    1    1    1    1    1     1
##  [3,]    1    1    1    1    1    1    1    1    1     1
##  [4,]    1    1    1    1    1    1    1    1    1     1
##  [5,]    1    1    1    1    1    1    1    1    1     1
##  [6,]    1    1    1    1    1    1    1    1    1     1
##  [7,]    1    1    1    1    1    1    1    1    1     1
##  [8,]    1    1    1    1    1    1    1    1    1     1
##  [9,]    1    1    1    1    1    1    1    1    1     1
## [10,]    1    1    1    1    1    1    1    1    1     1
  getMorphology(x)<-matrix(rnorm(100,1,5),nrow=10,ncol=60)
  head(getMorphology(x))
##            [,1]      [,2]       [,3]       [,4]      [,5]       [,6]      [,7]
## [1,]  4.6569437  1.433714 -0.4252608  1.6470348  3.352775 -7.6646575  8.490474
## [2,] -2.0078822  2.631864 -6.5699433 -2.0304610 -9.288710 -2.7986700 -4.482370
## [3,] -0.9336585  4.561218 -4.4039283  4.3493663 -7.513842  2.6526410  8.318619
## [4,] -5.7994255  4.072470  4.5486594  0.2748884  5.979210  5.4626124  5.175429
## [5,] -1.5732826  1.815842  0.1832589  7.0979574 -7.611639 -0.7107370 -1.489214
## [6,]  0.1409771 -8.980565 -4.8858289  0.2783008 -2.325024 -0.9108667 -7.908210
##           [,8]       [,9]      [,10]      [,11]     [,12]      [,13]      [,14]
## [1,]  1.391862  4.1202799  -3.537828  4.6569437  1.433714 -0.4252608  1.6470348
## [2,] -3.355957 -1.3155264  -3.619407 -2.0078822  2.631864 -6.5699433 -2.0304610
## [3,]  2.056513 -0.5586920   6.170617 -0.9336585  4.561218 -4.4039283  4.3493663
## [4,]  8.586122 -3.6149533  -7.138208 -5.7994255  4.072470  4.5486594  0.2748884
## [5,] -3.422489 -0.8057942  -1.823368 -1.5732826  1.815842  0.1832589  7.0979574
## [6,]  7.919847 -0.8591075 -11.050008  0.1409771 -8.980565 -4.8858289  0.2783008
##          [,15]      [,16]     [,17]     [,18]      [,19]      [,20]      [,21]
## [1,]  3.352775 -7.6646575  8.490474  1.391862  4.1202799  -3.537828  4.6569437
## [2,] -9.288710 -2.7986700 -4.482370 -3.355957 -1.3155264  -3.619407 -2.0078822
## [3,] -7.513842  2.6526410  8.318619  2.056513 -0.5586920   6.170617 -0.9336585
## [4,]  5.979210  5.4626124  5.175429  8.586122 -3.6149533  -7.138208 -5.7994255
## [5,] -7.611639 -0.7107370 -1.489214 -3.422489 -0.8057942  -1.823368 -1.5732826
## [6,] -2.325024 -0.9108667 -7.908210  7.919847 -0.8591075 -11.050008  0.1409771
##          [,22]      [,23]      [,24]     [,25]      [,26]     [,27]     [,28]
## [1,]  1.433714 -0.4252608  1.6470348  3.352775 -7.6646575  8.490474  1.391862
## [2,]  2.631864 -6.5699433 -2.0304610 -9.288710 -2.7986700 -4.482370 -3.355957
## [3,]  4.561218 -4.4039283  4.3493663 -7.513842  2.6526410  8.318619  2.056513
## [4,]  4.072470  4.5486594  0.2748884  5.979210  5.4626124  5.175429  8.586122
## [5,]  1.815842  0.1832589  7.0979574 -7.611639 -0.7107370 -1.489214 -3.422489
## [6,] -8.980565 -4.8858289  0.2783008 -2.325024 -0.9108667 -7.908210  7.919847
##           [,29]      [,30]      [,31]     [,32]      [,33]      [,34]     [,35]
## [1,]  4.1202799  -3.537828  4.6569437  1.433714 -0.4252608  1.6470348  3.352775
## [2,] -1.3155264  -3.619407 -2.0078822  2.631864 -6.5699433 -2.0304610 -9.288710
## [3,] -0.5586920   6.170617 -0.9336585  4.561218 -4.4039283  4.3493663 -7.513842
## [4,] -3.6149533  -7.138208 -5.7994255  4.072470  4.5486594  0.2748884  5.979210
## [5,] -0.8057942  -1.823368 -1.5732826  1.815842  0.1832589  7.0979574 -7.611639
## [6,] -0.8591075 -11.050008  0.1409771 -8.980565 -4.8858289  0.2783008 -2.325024
##           [,36]     [,37]     [,38]      [,39]      [,40]      [,41]     [,42]
## [1,] -7.6646575  8.490474  1.391862  4.1202799  -3.537828  4.6569437  1.433714
## [2,] -2.7986700 -4.482370 -3.355957 -1.3155264  -3.619407 -2.0078822  2.631864
## [3,]  2.6526410  8.318619  2.056513 -0.5586920   6.170617 -0.9336585  4.561218
## [4,]  5.4626124  5.175429  8.586122 -3.6149533  -7.138208 -5.7994255  4.072470
## [5,] -0.7107370 -1.489214 -3.422489 -0.8057942  -1.823368 -1.5732826  1.815842
## [6,] -0.9108667 -7.908210  7.919847 -0.8591075 -11.050008  0.1409771 -8.980565
##           [,43]      [,44]     [,45]      [,46]     [,47]     [,48]      [,49]
## [1,] -0.4252608  1.6470348  3.352775 -7.6646575  8.490474  1.391862  4.1202799
## [2,] -6.5699433 -2.0304610 -9.288710 -2.7986700 -4.482370 -3.355957 -1.3155264
## [3,] -4.4039283  4.3493663 -7.513842  2.6526410  8.318619  2.056513 -0.5586920
## [4,]  4.5486594  0.2748884  5.979210  5.4626124  5.175429  8.586122 -3.6149533
## [5,]  0.1832589  7.0979574 -7.611639 -0.7107370 -1.489214 -3.422489 -0.8057942
## [6,] -4.8858289  0.2783008 -2.325024 -0.9108667 -7.908210  7.919847 -0.8591075
##           [,50]      [,51]     [,52]      [,53]      [,54]     [,55]      [,56]
## [1,]  -3.537828  4.6569437  1.433714 -0.4252608  1.6470348  3.352775 -7.6646575
## [2,]  -3.619407 -2.0078822  2.631864 -6.5699433 -2.0304610 -9.288710 -2.7986700
## [3,]   6.170617 -0.9336585  4.561218 -4.4039283  4.3493663 -7.513842  2.6526410
## [4,]  -7.138208 -5.7994255  4.072470  4.5486594  0.2748884  5.979210  5.4626124
## [5,]  -1.823368 -1.5732826  1.815842  0.1832589  7.0979574 -7.611639 -0.7107370
## [6,] -11.050008  0.1409771 -8.980565 -4.8858289  0.2783008 -2.325024 -0.9108667
##          [,57]     [,58]      [,59]      [,60]
## [1,]  8.490474  1.391862  4.1202799  -3.537828
## [2,] -4.482370 -3.355957 -1.3155264  -3.619407
## [3,]  8.318619  2.056513 -0.5586920   6.170617
## [4,]  5.175429  8.586122 -3.6149533  -7.138208
## [5,] -1.489214 -3.422489 -0.8057942  -1.823368
## [6,] -7.908210  7.919847 -0.8591075 -11.050008
 ## for each cell we can obtain the ROI that it belongs to
  rowData(x)
## DataFrame with 10 rows and 0 columns
 ## if we want to add patient features to each ROI we can
  metas<-data.frame(ROIID=factor(rep("A",10)),treatment=factor(rep('none',10)))
  colData(x)<-DataFrame(metas)

 ##other slots for covariates
   metadata(x)$experiment<-'test'
   metadata(x)
## $experiment
## [1] "test"

From histoCAT to R

  • We use the raw histoCAT output and containerize the data.
  ### load the data from package.
  library(imcExperiment)
 ##load the data 1000 cells from IMC experiment.
  data(data)
  dim(data)
## [1] 1000   62
  ##output from histoCAT to R
  expr<-data[,3:36]
  normExp<-percentilenormalize(data=expr,percentile=0.99)
  normExp<-as.matrix(normExp)


  ##spatial component
  spatial<-(data[,c("X_position","Y_position")])
  spatial<-as.matrix(spatial)
 ##uniqueLabel
  uniqueLabel<-paste0(data[,"ImageId"],"_",data[,"CellId"])

 phenotypes<-data[,grepl("Phenograph",colnames(data))]
 morph<-as.matrix(data[,c("Area","Eccentricity",
    "Solidity",
    "Extent",
    "Perimeter")])

  x<-imcExperiment(cellIntensity=t(normExp),
    coordinates=spatial,
    neighborHood=as.matrix(data[,grepl("neighbour_",colnames(data))]),
    network=phenotypes,
    distance=matrix(1,nrow=nrow(data),ncol=10),
    morphology=morph,
    panel=colnames(normExp),
    uniqueLabel=paste0(data$ImageId,"_",data$CellId),
    ROIID=data.frame(ROIID=data$ImageId))

 ## explore the container.
  dim(assay(x))
## [1]   34 1000
   colData(x)$treatment<-DataFrame(treatment=rep('none',1000))
   head(colData(x))
## DataFrame with 6 rows and 2 columns
##              ROIID   treatment
##          <integer> <DataFrame>
## 274864_1    274864        none
## 274864_2    274864        none
## 274864_3    274864        none
## 274864_4    274864        none
## 274864_5    274864        none
## 274864_6    274864        none
 head( colnames(x))
## [1] "274864_1" "274864_2" "274864_3" "274864_4" "274864_5" "274864_6"
  rownames(x)
##  [1] "marker1"  "marker2"  "marker3"  "marker4"  "marker5"  "marker6" 
##  [7] "marker7"  "marker8"  "marker9"  "marker10" "marker11" "marker12"
## [13] "marker13" "marker14" "marker15" "marker16" "marker17" "marker18"
## [19] "marker19" "marker20" "marker21" "marker22" "marker23" "marker24"
## [25] "marker25" "marker26" "marker27" "marker28" "marker29" "marker30"
## [31] "marker31" "marker32" "marker33" "marker34"
  ## Intensity
  all(t(cellIntensity(x))==normExp)
## [1] TRUE
  head(t(cellIntensity(x)))
##     marker1   marker2   marker3   marker4   marker5    marker6   marker7
## 1 0.4656571 0.2736824 0.7237669 0.5204605 0.6330375 0.45935533 0.2857143
## 2 0.3487101 0.2897530 0.3958101 0.3548617 0.5588399 0.11483883 0.5000000
## 3 0.8883986 0.7622286 0.8866145 0.4873408 0.2400462 0.18374213 0.0000000
## 4 0.3963552 0.1451175 0.2714126 1.0000000 0.2100260 0.19686657 0.0000000
## 5 0.5407341 0.2522549 0.4221974 0.1064634 0.8991254 0.07655922 0.3333333
## 6 1.0000000 0.7622286 0.0000000 0.7854187 0.2018387 0.82683960 0.8000000
##     marker8   marker9  marker10   marker11  marker12   marker13  marker14
## 1 0.4237137 0.1981346 0.5895631 0.38423632 0.5596034 1.00000000 0.5421737
## 2 0.2118568 0.1812504 0.3699262 0.04531335 0.1284526 0.04016094 0.1667561
## 3 0.4237137 0.6934055 0.2150245 0.69305332 1.0000000 0.11646341 0.4103197
## 4 0.7263663 0.3632249 0.1866203 1.00000000 0.9999276 0.02467097 0.9450608
## 5 0.3530947 0.1155895 0.8496593 0.55169689 0.1819642 0.06291782 0.1453909
## 6 0.2542282 0.2731756 0.4462211 0.21726828 0.6036358 0.05220870 0.1539370
##     marker15  marker16  marker17  marker18   marker19  marker20  marker21
## 1 0.46990599 0.0952381 0.4987356 0.5269829 0.04774145 0.5735263 0.7197223
## 2 0.15883470 0.1666667 0.4851215 0.4321421 0.34528973 0.3484509 0.4443348
## 3 0.45147213 0.2666667 0.6154904 0.6206074 0.30741995 0.5060037 0.8448015
## 4 0.07050581 0.5714286 0.4715074 0.4803021 0.04774145 0.4609886 0.1938211
## 5 0.60815990 0.3333333 0.3834694 0.3128651 0.12438267 0.2171569 0.4567717
## 6 0.58665373 0.4000000 1.0000000 1.0000000 0.08020126 0.6635565 0.4070243
##    marker22  marker23   marker24  marker25   marker26  marker27  marker28
## 1 0.4735184 0.9999835 0.12229278 0.2651672 0.19823739 1.0000000 0.2379700
## 2 0.3429377 0.4963629 0.08592210 0.3093618 0.06938309 0.1036510 0.2082237
## 3 0.4343442 0.6072488 0.40161962 0.3093618 0.11101294 1.0000000 0.6663160
## 4 0.4735184 0.8192498 0.04747309 0.0000000 0.23788486 0.9639418 0.0000000
## 5 0.4191098 0.3712448 0.22898012 0.3609221 0.00000000 0.4915438 0.2776317
## 6 0.7085638 0.1612178 0.33178791 0.0000000 0.49955821 1.0000000 0.0000000
##    marker29  marker30  marker31   marker32   marker33  marker34
## 1 0.5826594 0.4963881 0.4285714 0.57132469 0.32432082 0.7448313
## 2 0.1140364 0.3285300 0.1250000 0.53626613 0.44897580 0.2375474
## 3 0.9637037 0.6810320 0.5000000 0.37811307 0.02758118 0.5500343
## 4 1.0000000 0.8740688 0.8571429 0.13504038 0.07298397 0.9535425
## 5 0.4187694 0.6712403 0.3333333 0.07271405 0.44188161 0.2307836
## 6 0.3614079 0.9747837 0.3000000 1.00000000 0.54971323 0.5500343
  ##coordinate
  all(getCoordinates(x)==spatial)
## [1] TRUE
  head(getCoordinates(x))
##   X_position Y_position
## 1   508.0000   3.000000
## 2   672.1667   3.000000
## 3   154.2000   4.200000
## 4   538.5714   4.857143
## 5   913.2778   3.444444
## 6  1200.0000   4.200000
  #neighbor attraction data form histoCAT
 all(getNeighborhood(x)==as.matrix(data[,grepl("neighbour_",colnames(data))]))
## [1] TRUE
 head(getNeighborhood((x)))
##   neighbour_4_CellId1 neighbour_4_CellId2 neighbour_4_CellId3
## 1                  36                 168                   0
## 2                 144                   0                   0
## 3                   0                   0                   0
## 4                  67                  92                   0
## 5                  13                  97                 249
## 6                 135                   0                   0
##   neighbour_4_CellId4 neighbour_4_CellId5 neighbour_4_CellId6
## 1                   0                   0                   0
## 2                   0                   0                   0
## 3                   0                   0                   0
## 4                   0                   0                   0
## 5                   0                   0                   0
## 6                   0                   0                   0
##   neighbour_4_CellId7 neighbour_4_CellId8 neighbour_4_CellId9
## 1                   0                   0                   0
## 2                   0                   0                   0
## 3                   0                   0                   0
## 4                   0                   0                   0
## 5                   0                   0                   0
## 6                   0                   0                   0
##   neighbour_4_CellId10
## 1                    0
## 2                    0
## 3                    0
## 4                    0
## 5                    0
## 6                    0
 ##phenotype cluster ID
  head(getNetwork(x))
##   (network)
## 1         1
## 2         1
## 3         4
## 4         4
## 5         1
## 6         1
 all(getNetwork(x)==phenotypes)
## [1] TRUE
 ###distance calculations
  head(getDistance(x))
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    1    1    1    1    1    1    1    1    1     1
## [2,]    1    1    1    1    1    1    1    1    1     1
## [3,]    1    1    1    1    1    1    1    1    1     1
## [4,]    1    1    1    1    1    1    1    1    1     1
## [5,]    1    1    1    1    1    1    1    1    1     1
## [6,]    1    1    1    1    1    1    1    1    1     1
  ##morphology
   all(getMorphology(x)==morph)
## [1] TRUE
  head(getMorphology(x))
##   Area Eccentricity  Solidity    Extent Perimeter
## 1   21    0.6956573 0.9545455 0.8400000    13.844
## 2   24    0.8034622 0.9230769 0.6666667    16.565
## 3   15    0.9014174 1.0000000 0.7142857    12.918
## 4    7    0.8717717 0.8750000 0.5833333     7.683
## 5   18    0.9593719 0.7500000 0.3214286    19.766
## 6    5    0.7437865 1.0000000 0.5555556     5.814
  ##uniqueLabel
   head(getLabel(x))
## [1] "274864_1" "274864_2" "274864_3" "274864_4" "274864_5" "274864_6"
   all(getLabel(x)==paste0(data$ImageId,"_",data$CellId) )
## [1] TRUE

PCA demo analysis

  • PCA data and tSNE coordinates can be added to the container.
 ### inherited accessor.
  pca_data <- prcomp(t(counts(x)), rank=50)
tsDat<-data.frame(tsne.x=data$tSNE4148542692_1,tsne.y=data$tSNE4148542692_2,row.names=colnames(x))

reducedDims(x)<-list(PCA=pca_data$x,TSNE=tsDat)
x
## class: imcExperiment 
## dim: 34 1000 
## metadata(0):
## assays(1): counts
## rownames(34): marker1 marker2 ... marker33 marker34
## rowData names(0):
## colnames(1000): 274864_1 274864_2 ... 274864_999 274864_1000
## colData names(2): ROIID treatment
## reducedDimNames(2): PCA TSNE
## mainExpName: NULL
## altExpNames(0):
reducedDimNames(x)
## [1] "PCA"  "TSNE"
     dim(reducedDims(x)$PCA)
## [1] 1000   34
     dim(reducedDims(x)$TSNE)
## [1] 1000    2
 imc<-x
 x<-NULL

IMC container and Phenograph cluster neighborhood

  • Phenograph an IMC contrainer and heatmap result.
 ### create  phenotypes via Rphenograph
  ##run phenograph
  library(Rphenograph)
library(igraph)
  phenos<-Rphenograph(t(cellIntensity(imc)),k=35)
   pheno.labels<-as.numeric(igraph::membership(phenos[[2]]))
   getNetwork(imc)<-data.frame(cell_clustering=pheno.labels)
   head( getNetwork(imc))
  ##plot phenograph
  #plot_clustering_heatmap_wrapper(myExperiment=imc)

histoCAT analysis

  • raw input from histoCAT and creating an IMC container.
  • containerizes the intensities, neighborhood results, morphology, and computes nearest distances and stores it.
  • Can use a hyperframe to identify distances quickly.
### reading in a directory of histoCAT csv files.

 ##need to reconstruct the fold revised IMC data.
  ##merges a folder full of histoCAT csv files.
 # use morphology and the 1-10 neighbors.
  dataSet<-NULL
 for(i in c("case8.csv","case5.csv")){
  message(i)
    fpath <- system.file("extdata", i, package="imcExperiment")
    case1<-read.csv(fpath)
   proteins<-colnames(case1)[grepl("Cell_",colnames(case1))]
   neig<-colnames(case1)[grepl("neighbour_",colnames(case1))][1:10]
    case1<-case1[,c("ImageId","CellId","X_position","Y_position",proteins,
                    "Area",
                    "Eccentricity",
    "Solidity",
    "Extent",
    "Perimeter",
     neig)]
    dataSet<-rbind(dataSet,case1)
  }
## case8.csv
## case5.csv
 expr<-dataSet[,proteins]
  normExp<-percentilenormalize(data=expr,percentile=0.99)
  normExp<-as.matrix(normExp)
  boxplot(normExp)

  ##spatial component
  spatial<-(dataSet[,c("X_position","Y_position")])
  spatial<-as.matrix(spatial)
 ##uniqueLabel
  uniqueLabel<-paste0(dataSet[,"ImageId"],"_",dataSet[,"CellId"])

  ##not yet assigned
  phenotypes<-matrix(0,nrow(dataSet),1)
  ROIID=data.frame(ROIID=dataSet[,"ImageId"])
  morph<-dataSet[,c("Area",
                    "Eccentricity",
    "Solidity",
    "Extent",
    "Perimeter")]
  
  x<-imcExperiment(cellIntensity=t(normExp),
    coordinates=spatial,
    neighborHood=as.matrix(dataSet[,grepl("neighbour_",colnames(dataSet))]),
    network=phenotypes,
    distance=matrix(1,nrow=nrow(dataSet),ncol=10),
    morphology=morph,
    panel=colnames(normExp),
    uniqueLabel=uniqueLabel,
    ROIID=ROIID)
   x
## class: imcExperiment 
## dim: 34 2000 
## metadata(0):
## assays(1): counts
## rownames(34): Cell_CCR4Sm149Di_Sm149 Cell_CD15Dy164Di_Dy164 ...
##   Cell_VimentinNd143Di_Nd143 Cell_pStat3Gd158Di_Gd158
## rowData names(0):
## colnames(2000): 372149_1 372149_2 ... 274864_999 274864_1000
## colData names(1): ROIID
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
   #require(Rphenograph); library(igraph)
  #phenos<-Rphenograph(t(cellIntensity(x)),k=35)
  # pheno.labels<-as.numeric(membership(phenos[[2]]))
  # getNetwork(x)<-data.frame(cell_clustering=pheno.labels)
  #  head(getNetwork(x))
 ##plot phenograph
  #plot_clustering_heatmap_wrapper(myExperiment=x)

Access unique cell ID

  • getLabel(), we can access unique cell ID
  • subsetCase(x,ROIID) can subset to a single ROIID
  • selectCases(x,c(“ROI1”,“ROI2”,“ROI3”)) selects a subset of multiple ROIs.
 ##store the ROIID in the metadata columns.
  ##access the unique cell labels.

  tail(getLabel(x))
## [1] "274864_995"  "274864_996"  "274864_997"  "274864_998"  "274864_999" 
## [6] "274864_1000"
  roi<-subsetCase(x,372149 )
  roi
## class: imcExperiment 
## dim: 34 1000 
## metadata(0):
## assays(1): counts
## rownames(34): Cell_CCR4Sm149Di_Sm149 Cell_CD15Dy164Di_Dy164 ...
##   Cell_VimentinNd143Di_Nd143 Cell_pStat3Gd158Di_Gd158
## rowData names(0):
## colnames(1000): 372149_1 372149_2 ... 372149_999 372149_1000
## colData names(1): ROIID
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
  table(sapply(strsplit(getLabel(roi),"_"),function(x) x[1]))
## 
## 372149 
##   1000

Distance computations speedily

  • The function ’imcExperimentToHyperFrame(), creates a point pattern useful for distance measures.
  • Given a hyperframe pairwise computations can be done.
  ##you can append more clinical features to the columns of the sampleDat data.frame.
  H<-imcExperimentToHyperFrame(imcExperiment=x,phenotypeToUse = 1)
  helper<-function(pp=NULL,i=NULL,j=NULL){
  ps<-split(pp)
  nnd<-nncross(ps[[i]],ps[[j]])
  }
 ## 1-NN analysis.
  ## compare cluster 10 to cluster 9
   #eachNND<-with(H,helper(pp=point,i="10",j="8"))
 ## first choose 2 clusters to compare.
 # sapply(eachNND,function(x) mean(x[,"dist"]))

Changing class from IMC to SummarizedExperiment

  • IMC container can change to SummarizedExperiment and the FlowSOM algorithm can be used for cell classification.
  library(CATALYST)
library(cowplot)
library(flowCore)
library(ggplot2)
library(SingleCellExperiment)
library(diffcyt)

 ## from IMCexperiment to SCE
 sce<- SingleCellExperiment(x)
 class(sce)

 ## FIX ME: add the SOM algorithms for over-clustering and meta-clustering by using class inheritance. 

 #### for the cytof, the columns are sample info and the rows are cells. it uses the SummarizedExperiment format.
 
 

 ## change into a flowFrame?

 ## change into a daFrame (CATALYST)

 #data("data")

 
 rd<-DataFrame(colData(imcdata))

  marker_info<-data.frame(channel_name=sapply(strsplit(rownames(imcdata),"_"),function(x) x[3]),
              marker_name=rownames(imcdata),
              marker_class=c(rep("type",13),
                             rep("state",18),
                             rep("none",13)))

  ## Switching into Summarized Experiment class.
    dse<-SummarizedExperiment(assays=SimpleList(exprs=t(cellIntensity(imcdata))),
                           rowData=rd,
                           colData=DataFrame(marker_info)
                           )
   stopifnot(all(colnames(dse)==marker_info$marker_name))
   dse

 # Transform data recommended
 dse <- transformData(dse)
## maybe look a the normalization.
# Generate clusters
dse <- (generateClusters(dse,meta_clustering=TRUE,meta_k=30,seed_clustering=828))

 ## examine each cluster.
cluster<-rowData(dse)
 data<-data.frame(t(logcounts(imcdata)),cluster)
 #plot_matrix_heatmap_wrapper(expr=data[,rownames(imcdata)],
  #                               cell_clustering=data$cluster_id)
 #

IMC and flowSet conversions

  • utilize CATALYST package for IMC we can see scatter plots after converting to FlowSets.
  • the rownames for IMC are set to the panel names, and we can split these to create the marker and metal.
  • The ROIID is useful for quickly creating a FlowSet object as a class inheritance method.
  • The rownames of IMC container are usually in “Cell_Marker_ChannelMetal”
   ## the assay returns matrix class! required for CATALYST.
  is(assay(imcdata,'counts'),'matrix')
  rownames(imcdata)
    # for plot scatter to work need to set the rowData feature in a specific way.
   channel<-sapply(strsplit(rownames(imcdata),"_"),function(x) x[3])
      channel[34:35]<-c("Ir1911","Ir1931")

   marker<-sapply(strsplit(rownames(imcdata),"_"),function(x) x[2])
    rowData(imcdata)<-DataFrame(channel_name=channel,marker_name=marker)
      rownames(imcdata)<-marker
            plotScatter(imcdata,rownames(imcdata)[17:18],assay='counts')

  # convert to flowSet
             ## the warning has to do with duplicated Iridium channels.
   table(colData(imcdata)$ROIID)
       (fsimc <- sce2fcs(imcdata, split_by = "ROIID"))
    ## now we have a flowSet.
   pData(fsimc)
   fsApply(fsimc,nrow)
   dim(exprs(fsimc[[1]]))
   exprs(fsimc[[1]])[1:5,1:5]
    ## set up the metadata files.
   head(marker_info)
   
    exper_info<-data.frame(group_id=colData(imcdata)$Treatment[match(pData(fsimc)$name,colData(imcdata)$ROIID)],
                           patient_id=colData(imcdata)$Patient.Number[match(pData(fsimc)$name,colData(imcdata)$ROIID)],
                           sample_id=pData(fsimc)$name)
   
   ## create design
   design<-createDesignMatrix(
     exper_info,cols_design=c("group_id","patient_id"))
   
   ##set up contrast 
   contrast<-createContrast(c(0,1,0))
   nrow(contrast)==ncol(design)
   data.frame(parameters=colnames(design),contrast)
   
    ## flowSet to DiffCyt
    out_DA<-diffcyt(
      d_input=fsimc,
      experiment_info=exper_info,
      marker_info=marker_info,
      design=design,
      contrast=contrast,
      analysis_type = "DA",
      seed_clustering = 123
    )
   topTable(out_DA,format_vals = TRUE)
   
   out_DS<-diffcyt(
     d_input=fsimc,
     experiment_info=exper_info,
     marker_info=marker_info,
     design=design,
     contrast=contrast,
     analysis_type='DS',
     seed_clustering = 123,
     plot=FALSE)
   
      topTable(out_DS,format_vals = TRUE)
      
       ### from flowSet to SE.
 d_se<-prepareData(fsimc,exper_info,marker_info)

Diffcyt package tutorial (extra)

  • Given a flowSet object we can utilize the diffCyt package for measuring phenotype differences.
  • This code section is an example of diffcyt from their manual.
  • Creates a random summarized experiment.
 library(diffcyt)
# Function to create random data (one sample)
d_random <- function(n = 20000, mean = 0, sd = 1, ncol = 20, cofactor = 5) {
  d <- sinh(matrix(rnorm(n, mean, sd), ncol = ncol)) * cofactor
  colnames(d) <- paste0("marker", sprintf("%02d", 1:ncol))
  d
}
# Create random data (without differential signal)
set.seed(123)
d_input <- list(
  sample1 = d_random(), 
  sample2 = d_random(), 
  sample3 = d_random(), 
  sample4 = d_random()
)
experiment_info <- data.frame(
  sample_id = factor(paste0("sample", 1:4)), 
  group_id = factor(c("group1", "group1", "group2", "group2")), 
  stringsAsFactors = FALSE
)
marker_info <- data.frame(
  channel_name = paste0("channel", sprintf("%03d", 1:20)), 
  marker_name = paste0("marker", sprintf("%02d", 1:20)), 
  marker_class = factor(c(rep("type", 10), rep("state", 10)), 
                        levels = c("type", "state", "none")), 
  stringsAsFactors = FALSE
)
# Prepare data
d_se <- prepareData(d_input, experiment_info, marker_info)
# Transform data
d_se <- transformData(d_se)
# Generate clusters
d_se <- generateClusters(d_se)