r - Faster calculation of onset days for rainy season -


i have huge set of csv datafiles (ca. 4 million) , each file contains climate data (temp, precip etc.) 30 years. want use these data calculate onset of rainy season. each file corresponds cell of grid (2544 rows x 1928 columns). wrote loop calculate onset days every row, resulting in file 57.840 rows (1928 x 30). scripts works fine, takes 18 minutes row, means need 31.8 days finish calculations.

does have idea on how speed these calculations?

here code:

#set path working directory path="z:/md/projects/.../climate-data/row-0" setwd(path) files <- list.files(path =path, full.names = t, recursive = t, pattern=glob2rx("*col*.csv*"))   results<-data.frame() name <- strsplit(path, "/")[[1]][6]  (file in files){      asc<-read.table(file, sep=",", skip=0, header=t)       name2 <- strsplit(file, "/")[[1]][7]      name2<-str_sub(name2,-nchar(name2),-5)      q<-mean(asc$precip)      x<-1 vector <- vector("numeric")     (i in 1:12){        (j in 1:31){        d<-asc[ which(asc$month ==  & asc$day == j) , ]        d<-mean(d$prec)        vector[x] <-d # average precipitation per day        x<-x+1      }    }     vector<-vector[ !is.na(vector)] # delete na values    vector<-vector-q # substract annual precipitation daily precipitation     vector<-cumsum(vector) # build cumulative sum of anomaly    min<-which.min(vector)    x<-1    (i in 1982:2011){         df1<-subset(asc, asc$year==i |asc$year==i-1  | asc$year==i+1)         doy1<-seq(1, nrow(df1[df1$year == i-1,]), 1)         doy2<-seq(1, nrow(df1[df1$year == i,]), 1)         doy3<-seq(1, nrow(df1[df1$year == i+1,]), 1)         doy<-c(doy1, doy2, doy3)         df1$doy<-doy         df1$doy2<-seq(1, nrow(df1), 1)         doy2<-df1[df1$doy == min & df1$year == i, ]         doy2<-doy2$doy2         df2<-subset(df1, df1$doy2>=doy2-50 &  df1$doy2<doy2+50)         df2$anomaly<-df2$precip-q         df2$accum<-cumsum(df2$anomaly)         min_new<-df2[df2$accum == min(df2$accum), ]         year<-min_new$year         onset<-min_new$doy         results <-rbind(results, c(year, onset, name2), stringsasfactors = false)         x<-x+1      }     }  write.table(results, file=paste("results_", name, ".csv", sep=""), row.names = false, col.names = t, sep = ", ", quote=f)  end.time <- sys.time() time<-end.time-start.time time 


Comments

Popular posts from this blog

node.js - Node js - Trying to send POST request, but it is not loading javascript content -

javascript - Replicate keyboard event with html button -

javascript - Web audio api 5.1 surround example not working in firefox -