CARD-Dataset


In [1]:
# changing the size of inline Plot
# IRkernel::get_plot_options()

# see http://blog.revolutionanalytics.com/2015/09/resizing-plots-in-the-r-kernel-for-jupyter-notebooks.html
options(repr.plot.width=6, repr.plot.height=4)

In [2]:
# loading libraries
library(ggplot2)

In [3]:
# loading dataset 
df <- read.csv("../data/CARD.csv", encoding = "UTF-8")
head(df)


Out[3]:
LABNRC14AGEC14STDC13MATERIALSITECOUNTRYFEATUREFEATURE_DESCLATLONGSOURCE
1AA-784472362390MbaereCAFPollencore3.98863916.92445Kiahtipes/Lupo et al. 2011
2AA-784482171370MbaereCAFPollencore3.98863916.92445Kiahtipes/Lupo et al. 2011
3AA-78449834350MbaereCAFPollencore3.98863916.92445Kiahtipes/Lupo et al. 2011
4Arc-3392310600CharcoalMont BrazzaGABNANAClist 2004/05
5Arc-340540500Mont BarnierCOG-4.1863915.20028Pincon 1991
6Arc-34122101000CharcoalKango 5GAB0.20083310.0825Clist 2004/05

In [4]:
# list the structure of
str(df)


'data.frame':	1191 obs. of  12 variables:
 $ LABNR       : Factor w/ 1173 levels "","AA-78447",..: 2 3 4 5 6 7 8 9 10 11 ...
 $ C14AGE      : int  2362 2171 834 2310 540 2210 1900 2110 2390 1240 ...
 $ C14STD      : int  39 37 35 60 50 100 50 60 65 120 ...
 $ C13         : num  0 0 0 0 0 0 0 0 0 0 ...
 $ MATERIAL    : Factor w/ 14 levels "","Animal Bone",..: 1 1 1 5 1 5 1 1 1 1 ...
 $ SITE        : Factor w/ 471 levels "725","85/22",..: 267 267 267 308 307 185 401 221 390 438 ...
 $ COUNTRY     : Factor w/ 12 levels "","AGO","BDI",..: 4 4 4 8 7 8 8 7 8 6 ...
 $ FEATURE     : Factor w/ 102 levels "","ABM 06/1",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ FEATURE_DESC: Factor w/ 10 levels "","Burial","Deposit",..: 8 8 8 1 1 1 1 1 1 1 ...
 $ LAT         : num  3.99 3.99 3.99 NA -4.19 ...
 $ LONG        : num  16.9 16.9 16.9 NA 15.2 ...
 $ SOURCE      : Factor w/ 120 levels "Asombang","Asombang 1988",..: 57 57 57 9 104 9 9 30 91 14 ...

Dates


In [5]:
ggplot(df, aes(x = C14AGE)) + 
geom_histogram(binwidth = 1000) + 
theme_bw()



In [17]:
summary(df$C14AGE)


Out[17]:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    0.0   955.5  1870.0  2989.0  2395.0 46500.0 

In [18]:
# subset older than Holocene away
df_a <- subset(df, df$C14AGE < 10000)
summary(df_a$C14AGE)


Out[18]:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0     915    1820    2006    2305    9730 

In [39]:
ggplot(df_a, aes(x = C14AGE)) + 
geom_histogram(binwidth = 100) + 
scale_x_continuous(limits=c(min(df_a$C14AGE), max(df_a$C14AGE)), expand = c(0, 0)) + 
theme_bw()



In [ ]:


In [ ]: