# Import dependencies
library(rgdal)
library(raster)
library(gdalUtils)
library("readxl")
library(viridis)
library(plyr)
library(sp)
library(ggplot2)
library(rasterVis)
library(gridExtra)
library(grid)
library(geosphere)
# Define equiarea projection
moll = '+proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs'
To estimate the maximal catalytic rate of Rubisco in the terrestrial and marine environment, we first gather data on the characteristic $k_{cat}$ of Rubisco in terrestrial and marine autotrophs at 25 ℃, and then correct for the average ambient temperature at which Rubisco is working on land or in the ocean.
to estiamte the average $k_{cat}$ of Rubisco, we use a recent compilation of kinetic measurements from a wide array of Rubisco proteins in different species Flamholz et al. to calculate the mean $k_{cat}$ of Rubisco in each group of autotrophs. Here is a sample of the data:
# Load data
rubisco_data = read_excel('../data/literature_data.xlsx','kinetic_data',skip=1)
# Keep only data with kcat values
filt_rub_data = rubisco_data[!is.na(rubisco_data$vC),]
head(filt_rub_data)
First, we calculate the mean $k_{cat}$ for each taxonomic group:
group_mean = aggregate(filt_rub_data$vC, by=list(filt_rub_data$taxonomy), FUN=mean)
For the marine environment, we use data on the biomass of each plankton group (Bar-On et al.) to estimate the mean $k_{cat}$ in the marine environment:
# Load data on the biomass of each plankton group
marine_biomass = read_excel('../data/literature_data.xlsx','marine_phytoplankton_biomass',skip=1)
# Group data based on the group of Rubisco in the Rubisco data
group_biomass = aggregate(marine_biomass$'Biomass [Gt C]', by=list(marine_biomass$'Rubisco group'), FUN=sum)
# Normalize data to fraction out of the total biomass
group_biomass$x = group_biomass$x/sum(group_biomass$x)
# For the group 'Haptophyte & Dinoflagellate' use data from Haptophytes
group_mean[nrow(group_mean) + 1,] = list('Haptophyte & Dinoflagellate',group_mean[group_mean$Group.1 =='Haptophyte',2])
# Calculate the weighted mean of the kcat values based on the fraction of the total marine autotrophic biomass
mean_marine_kcat = weighted.mean(x = group_mean[group_mean$Group.1 %in% group_biomass$Group.1,'x'], w= group_biomass$x)
sprintf('We estimate the mean kcat of Rubisco in the marine environment is %.1f s^-1',mean_marine_kcat)
For the terrestrial environment, we use the mean value of all the plant species found in the groups: Bryophyte, C3 plants, C3-C4 plants, C4 plants, C4-like plants, Fern, and Gymnosperm:
# Take only land plant groups
land_rub = filt_rub_data[filt_rub_data$taxonomy %in% list('Bryophyte', 'C3 plants', 'C3-C4 plants', 'C4 plants', 'C4-like plants', 'Fern', 'Gymnosperm'),]
# Calculate the mean kcat of terrestrial plants
mean_terrestrial_kcat = mean(land_rub$vC)
sprintf('We estimate the mean kcat of Rubisco in the terrestrial environment is %.1f s^-1',mean_terrestrial_kcat)
To estimate the maximal rate of Rubisco in ambient conditions, we need to estimate the average temperature Rubisco in working in in the marine and terrestrial environment. For that purpose, we rely on remote sensing data. For the terrestrial environment, we rely on data of the a monthly mean of NPP at each location on the globe link as well as data on the monthly mean last surface temperature link. For the marine environment, we rely on chlorophyll concentration data as a proxy for productivity in the ocean link, and on reanalysis data on the sea surface temperature at different depths link.
Both for the marine and terrestrial environments, we calculate the annual average temperature, weighted by the productivity at each location (NPP in the case of terrestrial productivity, or chlorophyll in the case of marine productivity).
First, we calculate the ambient temperature at which carbon fixation occurs in the ocean. We calculate the integrated mean temperature at the first 100 m of the ocean:
# Find all data files
chl_files = list.files('../data/temperature/ocean/chl/')
sst_files = list.files('../data/temperature/ocean/SST/THETA.2015.nc')
# Load the first data files
chl_tmp = raster(paste('../data/temperature/ocean/chl/',chl_files[1],sep=''))
# Load temperature file
sst_tmp = raster('../data/temperature/ocean/SST/THETA.2015.nc', band=1,level=1)
# Integrate over the first 100 m
for(i in 2:10){
sst_tmp = sst_tmp + raster('../data/temperature/ocean/SST/THETA.2015.nc', band=1,level=i)
}
sst_tmp = sst_tmp/10
# Remove NA data
NAvalue(chl_tmp) <- 99999
# Match extent and crs between chl and sst
extent(sst_tmp) = extent(chl_tmp)
crs(sst_tmp) = crs(chl_tmp)
# Use only chl data which has temperature associated with it
chl_masked = mask(chl_tmp,!is.na(sst_tmp))
# Reproject data onto an equiarea projection to enable us to calculate means
chl_moll = projectRaster(chl_masked, crs=moll)
sst_moll = projectRaster(sst_tmp, crs=moll)
# Integrate over the entire globe for the product of the temperature and chl
nume = cellStats(chl_moll*sst_moll,'sum')
# Integrate over the entire globe for chl
deno = cellStats(chl_moll,'sum')
# Iterate for all data files
for(i in 2:length(chl_files)){
# Load data files
chl_tmp = raster(paste('../data/temperature/ocean/chl/',chl_files[i],sep=''))
# Load temperature file
sst_tmp = raster('../data/temperature/ocean/SST/THETA.2015.nc', band=i,level=1)
# Remove NA data
NAvalue(chl_tmp) <- 99999
# Integrate over the first 100 m
for(j in 2:10){
sst_tmp = sst_tmp + raster('../data/temperature/ocean/SST/THETA.2015.nc', band=i,level=j)
}
sst_tmp = sst_tmp/10
# Match extent and crs between chl and sst
extent(sst_tmp) = extent(chl_tmp)
crs(sst_tmp) = crs(chl_tmp)
# Use only chl data which has temperature associated with it
chl_masked = mask(chl_tmp,!is.na(sst_tmp))
# Reproject data onto an equiarea projection to enable us to calculate means
chl_moll = projectRaster(chl_masked, crs=moll)
sst_moll = projectRaster(sst_tmp, crs=moll)
# Integrate over the entire globe for the product of the temperature and chl and add it to the total sum of this integration
nume = nume + cellStats(chl_moll*sst_moll,'sum')
# Integrate over the entire globe for chl and add it to the total sum of this integration
deno = deno + cellStats(chl_moll,'sum')
}
# Calculate the ambient temperature in the ocean
marine_amb_temp = nume/deno
sprintf('We estimate the mean ambient temperature at which Rubisco operates in the marine environment is %.0f ℃',marine_amb_temp)
# Find all data files
npp_files = list.files('../data/temperature/land/NPP')
lst_files = list.files('../data/temperature/land/LST')
# Load the first data files
npp_tmp = raster(paste('../data/temperature/land/NPP/',npp_files[1],sep=''))
lst_tmp = raster(paste('../data/temperature/land/LST/',lst_files[1],sep=''))
# Remove NA data
NAvalue(npp_tmp) <- 99999
NAvalue(lst_tmp) <- 99999
# Use only NPP data which has temperature associated with it
npp_masked = mask(npp_tmp,!is.na(lst_tmp))
# Reproject data onto an equiarea projection to enable us to calculate means
npp_moll = projectRaster(npp_masked, crs=moll)
lst_moll = projectRaster(lst_tmp, crs=moll)
# Integrate over the entire globe for the product of the temperature and NPP
nume = cellStats(npp_moll*lst_moll,'sum')
# Integrate over the entire globe for NPP
deno = cellStats(npp_moll,'sum')
# Iterate for all data files
for(i in 2:length(lst_files)){
# Load data files
npp_tmp = raster(paste('../data/temperature/land/NPP/',npp_files[i],sep=''))
lst_tmp = raster(paste('../data/temperature/land/LST/',lst_files[i],sep=''))
# Remove NA data
NAvalue(npp_tmp) <- 99999
NAvalue(lst_tmp) <- 99999
# Use only chl data which has temperature associated with it
npp_masked = mask(npp_tmp,!is.na(lst_tmp))
# Reproject data onto an equiarea projection to enable us to calculate means
npp_moll = projectRaster(npp_masked, crs=moll)
lst_moll = projectRaster(lst_tmp, crs=moll)
# Integrate over the entire globe for the product of the temperature and NPP and add it to the total sum of this integration
nume = nume + cellStats(npp_moll*lst_moll,'sum')
# Integrate over the entire globe for NPP and add it to the total sum of this integration
deno = deno + cellStats(npp_moll,'sum')
}
# Calculate the ambient temperature on land
land_amb_temp = nume/deno
sprintf('We estimate the mean ambient temperature at which Rubisco operates in the terrestrial environment is %.0f ℃',land_amb_temp)
We can use the difference in the ambient temperature at which Rubisco is operating in the terrestrial or marine environments to estimate the maximal rate of Rubisco at these environments. For this calculation, we rely on data measuring the temperature sensitivty of different Rubiscos (Young et al.). In general, the average temperature sensitivity (Q10) is between 2.2-2.9, meaning that for every 10 ℃ decrease in the ambient temperature the rate of Rubisco drops between 2.2-2.9-fold. By using this temperature sensitivity, we now calculate the maximal rate of Rubisco in the terrestrial and marine environment under ambient temperature:
q10 = sqrt(prod(c(2.2,2.9)))
marine_max_rate = mean_marine_kcat/(q10^((25-marine_amb_temp)/10))
sprintf('We estimate the maximal rate of Rubisco in the marine environment at %.1f s^-1',marine_max_rate)
land_max_rate = mean_terrestrial_kcat/(q10^((25-land_amb_temp)/10))
sprintf('We estimate the maximal rate of Rubisco in the terrestrial environment at %.1f s^-1',land_max_rate)
As Phytoplankton are very dynamic in their global distribution, apprearing and disappearing in rapid blooms, we wondered whether they usually appear in locations with high day durations. To estimate what is the average day duration of the "average" phytoplankton cell, we calculated the duration of day at each location at each month of the year, and weighted each location at each time by the amount of chlorophyll present
# We take the middle of each month as a representative day for calculating the number of light hours
doy =c(1,32,60,91,121,152,182,213,244,274,305,335,365)
for (i in 1:length(doy)-1){
doy[i] = ((doy[i]+doy[i+1]-2)/2)
}
doy = doy[1:12]
# Find all data files
chl_files = list.files('../data/temperature/ocean/chl/')
# Load the first data files
chl_tmp = raster(paste('../data/temperature/ocean/chl/',chl_files[1],sep=''))
# Remove NA data
NAvalue(chl_tmp) <- 99999
# Calculate the length of daylight at each location
dlength = calc(init(chl_tmp,'y'), fun=function(x){daylength(x,doy[1])})
# Reproject data onto an equiarea projection to enable us to calculate means
chl_moll = projectRaster(chl_tmp, crs=moll)
dlength_moll = projectRaster(dlength, crs=moll)
# Integrate over the entire globe for the product of the temperature and chl
nume = cellStats(chl_moll*dlength_moll,'sum')
# Integrate over the entire globe for chl
deno = cellStats(chl_moll,'sum')
# Iterate for all data files
for(i in 2:length(chl_files)){
# Load data files
chl_tmp = raster(paste('../data/temperature/ocean/chl/',chl_files[i],sep=''))
# Remove NA data
NAvalue(chl_tmp) <- 99999
dlength = calc(init(chl_tmp,'y'), fun=function(x){daylength(x,doy[i])})
# Reproject data onto an equiarea projection to enable us to calculate means
chl_moll = projectRaster(chl_tmp, crs=moll)
dlength_moll = projectRaster(dlength, crs=moll)
# Integrate over the entire globe for the product of the temperature and chl and add it to the total sum of this integration
nume = nume + cellStats(chl_moll*dlength_moll,'sum')
#print(nume)
# Integrate over the entire globe for chl and add it to the total sum of this integration
deno = deno + cellStats(chl_moll,'sum')
}
# Calculate the ambient temperature in the ocean
marine_avg_light = nume/deno
sprintf('We estimate the average number of day hours an "average" phytoplankton experiences is ≈ %.0f hours.',marine_avg_light)
This means the fact Rubisco only operates during the day will cause an expected decrease in the time-averaged rate of Rubisco of 24/13 = 1.9-fold.