r - Faster calculation of onset days for rainy season -
i have huge set of csv datafiles (ca. 4 million) , each file contains climate data (temp, precip etc.) 30 years. want use these data calculate onset of rainy season. each file corresponds cell of grid (2544 rows x 1928 columns). wrote loop calculate onset days every row, resulting in file 57.840 rows (1928 x 30). scripts works fine, takes 18 minutes row, means need 31.8 days finish calculations.
does have idea on how speed these calculations?
here code:
#set path working directory path="z:/md/projects/.../climate-data/row-0" setwd(path) files <- list.files(path =path, full.names = t, recursive = t, pattern=glob2rx("*col*.csv*")) results<-data.frame() name <- strsplit(path, "/")[[1]][6] (file in files){ asc<-read.table(file, sep=",", skip=0, header=t) name2 <- strsplit(file, "/")[[1]][7] name2<-str_sub(name2,-nchar(name2),-5) q<-mean(asc$precip) x<-1 vector <- vector("numeric") (i in 1:12){ (j in 1:31){ d<-asc[ which(asc$month == & asc$day == j) , ] d<-mean(d$prec) vector[x] <-d # average precipitation per day x<-x+1 } } vector<-vector[ !is.na(vector)] # delete na values vector<-vector-q # substract annual precipitation daily precipitation vector<-cumsum(vector) # build cumulative sum of anomaly min<-which.min(vector) x<-1 (i in 1982:2011){ df1<-subset(asc, asc$year==i |asc$year==i-1 | asc$year==i+1) doy1<-seq(1, nrow(df1[df1$year == i-1,]), 1) doy2<-seq(1, nrow(df1[df1$year == i,]), 1) doy3<-seq(1, nrow(df1[df1$year == i+1,]), 1) doy<-c(doy1, doy2, doy3) df1$doy<-doy df1$doy2<-seq(1, nrow(df1), 1) doy2<-df1[df1$doy == min & df1$year == i, ] doy2<-doy2$doy2 df2<-subset(df1, df1$doy2>=doy2-50 & df1$doy2<doy2+50) df2$anomaly<-df2$precip-q df2$accum<-cumsum(df2$anomaly) min_new<-df2[df2$accum == min(df2$accum), ] year<-min_new$year onset<-min_new$doy results <-rbind(results, c(year, onset, name2), stringsasfactors = false) x<-x+1 } } write.table(results, file=paste("results_", name, ".csv", sep=""), row.names = false, col.names = t, sep = ", ", quote=f) end.time <- sys.time() time<-end.time-start.time time
Comments
Post a Comment