############################################################################################# ## Procedure to get Latest Global Temperatur Series Monthly Data ## ## R Script to read GISS, NOAA, HADCRUT3, RSS and UAH monthly global (land & SST) ## ## temperatue anomnaly files and produce CSV file ## ## GISS monthly data import script develodped by http://LearnR.wordpress.com ## ## All other code developed by D Kelly O'Day, July 10, 2009 ## ## D Kelly O'Day, http://processtrends.com & http://chartgraphs.wordpress.com ## ############################################################################################# ################### Set up ################################################################ rm(list=ls()); options(digits=6) library(ggplot2) ############################################################################################# ################### RSS Data ################################################################ link_RSS<- "http://www.remss.com/data/msu/monthly_time_series/RSS_Monthly_MSU_AMSU_Channel_TLT_Anomalies_Land_and_Ocean_v03_2.txt" mo_RSS_in <- read.table(link_RSS, skip = 3, sep = "", dec=".", row.names = NULL, header = FALSE, as.is = T, colClasses = c(rep("numeric",11)), comment.char = "#", na.strings = c("*", "-",-99.9, -999.9), col.names = c("yr", "mo", "RSS_lo", "s20_n20", "n20_n825", "s70_s20", "n60_n825", "R", "USA", "e_n825", "s70_e")) RSS_yr_frac <- mo_RSS_in$yr + (mo_RSS_in$mo-0.5)/12 # yr_frac simplifies calcs ## Create new RSS_df data frame RSS_df <- data.frame(RSS_yr_frac, mo_RSS_in[3]) ## Get last mo, yr RSS_c <- nrow(RSS_df) # Find number of data rows RSS_lmo <- RSS_df$mo[RSS_c] # Fimd last month of data RSS_lyr <- RSS_df$yr[RSS_c] # Find last year of data ## Determine min & max yr_frac RSS_yr1 <- min(RSS_yr_frac) RSS_yr2 <- max(RSS_yr_frac) RSS_num <- nrow(RSS_df) RSS_yf <- RSS_yr2 ############################################################################################ ############## GISS Data ################################################################### ## GISS monthly data import script develodped by http://LearnR.wordpress.com ## ## Download and process GISS Data File ############### url <- c("http://data.giss.nasa.gov/gistemp/tabledata/GLB.Ts+dSST.txt") file <- c("GLB.Ts+dSST.txt") download.file(url, file) ## Cleanup File ## The first 8 rows and the last 12 rows of the textfile contain instructions ## Find out the number of rows in a file, and exclude the last 12 rows <- length(readLines(file)) - 12 ## Read file as char vector, one line per row, Exclude first 7 rows lines <- readLines(file, n=rows)[9:rows] ## Data Manipulation, R vector with 143 lines. ## Use regexp to replace all the occurences of **** with NA lines2 <- gsub("\\*{3,5}", " NA", lines, perl=TRUE) ## Convert the character vector to a dataframe df <- read.table( textConnection(lines2), header=TRUE, colClasses = "character") closeAllConnections() ## We are only interested in the montly data in first 13 columns df <- df[,1:13] ## Convert all variables (columns) to numeric format df <- colwise(as.numeric) (df) ## Remove rows where Year=NA from the dataframe df <- df [!is.na(df$Year),] ## Convert from wide format to long format dfm <- melt(df, id.var="Year", variable_name="Month") GISS_mo_num <- unclass(dfm$Month) GISS_mo_frac <- as.numeric(( unclass(dfm$Month)-0.5)/12) GISS_yr_frac <- dfm$Year + GISS_mo_frac GISS_anom <- dfm$value/100 dfm <- data.frame(dfm, GISS_mo_num, GISS_yr_frac, GISS_anom) dfm <- dfm[order(dfm$GISS_yr_frac), ] dfm <- dfm[!is.na(dfm$GISS_anom),] ## Find last report month and last value GISS_last <- nrow(dfm) GISS_last_yr <- dfm$Year[GISS_last] GISS_last_mo <- dfm$Month[GISS_last] GISS_last_temp <- dfm$GISS_anom[GISS_last] GISS_post_1979 <- subset(dfm, GISS_yr_frac >=1979) GISS_df <- data.frame(GISS_post_1979[5], GISS_post_1979[6]) GISS_num <- nrow(GISS_df) GISS_yf <- GISS_df[GISS_num, 1] ######################################################################################### ################ NOAA ################################################################### link_NOAA <- "ftp://ftp.ncdc.noaa.gov/pub/data/anomalies/monthly.land_and_ocean.90S.90N.df_1901-2000mean.dat" NOAA_in <- read.table(link_NOAA, skip = 0, sep = "", dec=".", row.names = NULL, header = FALSE, as.is = T, colClasses = c(rep("numeric",3)), comment.char = "#", na.strings = c("*", "-",-99.9, -999.9), col.names = c("NOAA_yr", "NOAA_mo", "NOAA_lo")) NOAA_yr_frac <- NOAA_in$NOAA_yr + (NOAA_in$NOAA_mo-0.5)/12 # yr_frac NOAA_in <- data.frame(NOAA_yr_frac, NOAA_in) NOAA_df <- subset(NOAA_in, NOAA_yr_frac >=1979 & NOAA_lo > -999) NOAA_num <- nrow(NOAA_df) NOAA_yf <- NOAA_df[NOAA_num,1] ########################################################################################## ########## UAH ########################################################################### ## Download and process UAH Data File ############### UAH_link <- "http://vortex.nsstc.uah.edu/public/msu/t2lt/tltglhmam_5.2" UAH_file <- c("tltglhmam_5.2") download.file(UAH_link, UAH_file) ## The first 5 rows and the last row of the textfile contain text information, not data ## Find out the number of rows in a file, and exclude the last 1,it has text UAH_rows <- length(readLines(UAH_file)) - 1 ## Read file as char vector, one line per row, Exclude first 5 rows UAH_lines <- readLines(UAH_file, n=UAH_rows)[6:UAH_rows] ## Data Manipulation, R vector with data lines. ## Use regexp to replace all the occurences of **** with NA lines2 <- gsub("\\*{3,5}", " NA", UAH_lines, perl=TRUE) ##Convert the character vector to a dataframe UAH_df <- read.table( textConnection(lines2), header=F, colClasses = "character") closeAllConnections() ## We are only interested in the most recent montly data columns 1:3 UAH_df <- UAH_df[,1:3] ## Convert all variables (columns) to numeric format UAH_df <- colwise(as.numeric) (UAH_df) UAH_yr_frac <- UAH_df[1]+ +(UAH_df[2]-0.5)/12 UAH_df <- data.frame(UAH_yr_frac, UAH_df) UAH_df <- subset(UAH_df, UAH_yr_frac >= 1979) UAH_lo <- UAH_df[4] UAH_num <- nrow(UAH_df) UAH_yf <- UAH_df[UAH_num,1] ############################################################################################ ########################## HADCRUT3 ######################################################### ## Download and process HADCRUT3 Data File ############### url_HAD <- c("http://www.cru.uea.ac.uk/cru/data/temperature/hadcrut3gl.txt") file <- c("hadcrut3gl.txt") download.file(url_HAD, file) ## Find out the number of rows in a file HAD_rows <- length(readLines(file)) ## HAD file has alternating data & observation count for each year ## Want to delete the 2nd row for year; Develop vector for rows tobe deleted HAD_pts <- seq(from = 1, to = HAD_rows, by=2) ## Read HAD data file as char vector, one line per row HAD_lines <- readLines(file, n=HAD_rows) ## Create wanted data by applying HAD_pts vector as index vector HAD_liones HAD_data <- HAD_lines[HAD_pts] ##Convert the HAD_data character vector to a dataframe HAD_df <- read.table( textConnection(HAD_data), header=F, colClasses = "character") closeAllConnections() ## Convert HAD_df from text data.frame to numeric data.frame HAD_df <- colwise(as.numeric) (HAD_df) ## We are only interested in the montly data in first 13 columns; drop 1th col, annual value HAD_df <- HAD_df[,1:13] names(HAD_df) <- c("year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") ## Convert HAD_df from wide format to long format with melt() HAD_dfm <- melt(HAD_df, id.var="year", variable_name="Month") ## Establish month numbers for yr_frac calculation HAD_mo_num <- unclass(HAD_dfm$Month) ## Name HAD_anom HAD_anom <- HAD_dfm$value ## Create yr_frac vector HAD_yr_frac <- as.numeric(HAD_dfm$year) + ((HAD_mo_num-0.5)/12) ## Create HAD_dfm data.frame & order by yr_frac HAD_dfm <- data.frame(HAD_yr_frac, HAD_anom) HAD_dfm <- HAD_dfm[order(HAD_dfm$HAD_yr_frac), ] ## Remove all HAD_anom == 0.000 HAD_dfm <- subset(HAD_dfm, HAD_anom != 0.000) ## Subset HAD_dfm to yr_frac > 1979.0 HAD_post_1979 <- subset(HAD_dfm, HAD_yr_frac >=1979 & HAD_anom != 0.000) ## Determine number of rows and yr_frac for last actual HAD_anom HAD_num <- nrow(HAD_post_1979) HAD_yf <- HAD_post_1979[HAD_num, 1] ############################################################################################ ##################### Generate Consolidated File ##################################### ## Since timing of file updates vary, may be different number of records among 5 sources ## Find out max number of records for each df; Set consol_df to max number records num_rows <- max(c(NOAA_num, GISS_num, RSS_num, UAH_num, HAD_num)) max_yf <- max(c(RSS_yf, GISS_yf, NOAA_yf, UAH_yf)) ## Generate yr_frac vector; Data series starts Jan, 1979, monthly values yr_frac <- seq(1979.04166667, max_yf, by = 1/12) ## Check nrows in each series against mun_rows, append NA if necessary ifelse (GISS_num < num_rows, new_GISS <- append(GISS_df$GISS_anom, NA, after = GISS_num), new_GISS <- GISS_df$GISS_anom ) ifelse (NOAA_num < num_rows, new_NOAA <- append(NOAA_df$NOAA_lo, NA, after = NOAA_num), new_NOAA <- NOAA_df$NOAA_lo ) ifelse (RSS_num < num_rows, new_RSS <- append(RSS_df$RSS_lo, NA, after =RSS_num), new_RSS <- RSS_df$RSS_lo) ifelse (UAH_num < num_rows, new_UAH <- append(UAH_lo, NA, after =UAH_num ), new_UAH <- UAH_lo) ifelse(HAD_num < num_rows , new_HAD <- append(HAD_post_1979[,2], NA, after = HAD_num), new_HAD <- HAD_post_1979[,2]) consol_lo <- data.frame(yr_frac, new_GISS, new_RSS, new_NOAA,new_UAH, new_HAD) attach(consol_lo) names(consol_lo) <- c("yr", "GISS","RSS", "NOAA" , "UAH", "HAD") write.csv(consol_lo,file = "C:/R_Home/Charts & Graphs Blog/Global Temp Consol/consol_lo_data.csv", row.names=F, quote=F, append = F) ## R will automatically overwrite existing CSV file with same name ## R can not write CSV file if earlier file with same name is open ## Check plot to make visual verification that data has come through ## This script provided by Peter Gallagher in comment to Charts & Graphs post myDat<-melt(consol_lo, id="yr") # display in a faceted grid with variable scales for each facet qplot(yr, value, data=myDat, geom="line", group=variable, main = "Global Land & Ocean Temperature Anomalies - C\n GISS, NOAA, HAHCrut3, RSS, UAH")+ facet_grid(variable ~ ., scale = "free_y" ) detach(consol_lo)