# https://www.dropbox.com/s/ipoy673qhse5gsx/ssdm3 cols <- c(1,9,20,4,15,15,1,2,2,4,2,2,4,2,5,5,7) noms_col <- c ("code","ssn","last_name","name_suffix","first_name", "middle_name","VorPCode","date_death_m","date_death_d", "date_death_y","date_birth_m","date_birth_d", "date_birth_y","state","zip_resid","zip_payment","blanks") library(LaF) temp <- "ssdm3" ssn <- laf_open_fwf( temp,column_widths = cols,column_types=rep("character",length(cols) ),column_names = noms_col,trim = TRUE) object.size(ssn) go_through <- seq(1,nrow(ssn),by = 1e05 ) if(go_through[ length(go_through)] != nrow( ssn)) go_through <- c(go_through,nrow( ssn)) go_through <- cbind(go_through[-length(go_through)],c(go_through[-c(1,length(go_through)) ]-1,go_through [ length(go_through)])) go_through pb <- txtProgressBar(min = 0, max = nrow( go_through), style = 3) count_birthday <- function(s){ #print(s) setTxtProgressBar(pb, s) data <- ssn[ go_through[s,1]:go_through[s,2],c("date_death_m","date_death_d", "date_birth_m","date_birth_d")] sum((data$date_death_m==data$date_birth_m) & (data$date_death_d==data$date_birth_d) ) } system.time( data <- lapply( seq_len(nrow( go_through) ),count_birthday) ) sum( unlist(data)) /nrow(ssn) # ----------------- rm(list=ls()) library(RCurl) dropbox_ldf <- "https://dl.dropboxusercontent.com/s/l7rolinwojn37e2/ldf_json_2.RData" dropbox_df <- "https://dl.dropboxusercontent.com/s/wzr19v9pyl7ah0j/df_json_2.RData" dropbox_dt <- "https://dl.dropboxusercontent.com/s/nlwi1mmsxr5f23k/dt_json_2.RData" source_https <- function(loc,...){ curl=getCurlHandle() curlSetOpt(cookiejar="cookies.txt",useragent="Mozilla/5.0", followlocation=TRUE) tmp <- getURLContent(loc, .opts=list(ssl.verifypeer=FALSE),curl=curl) fch <- source(tmp) fch } library(dplyr) library(data.table) system.time( source_https(dropbox_dt) ) load("dt_json_2.RData") system.time( setkey(dt,Traj_Id) ) system.time( depart <<-dt[!duplicated(Traj_Id)] ) #setkey(dt,Traj_Id) system.time( depart <<- dt[J(unique(Traj_Id)), mult = "first"]) system.time( arrivee <<- dt[J(unique(Traj_Id)), mult = "last"] ) lat_0=0 lon_0=0 system.time( arrivee[,dist:=(lat-lat_0)^2+(lon-lon_0)^2] ) system.time( fin <<- subset(arrivee,dist <= 1) ) system.time( fin[,Pers_Id:=NULL] ) system.time( fin[,lat:=NULL] ) system.time( fin[,lon:=NULL] ) system.time( setkey(fin, Traj_Id) ) system.time( setkey(depart, Traj_Id) ) system.time( base <<- merge(fin,depart,all.x=TRUE) ) tail(base) # -------------- rm(list=ls()) library(RCurl) dropbox_ldf <- "https://dl.dropboxusercontent.com/s/l7rolinwojn37e2/ldf_json_2.RData" dropbox_df <- "https://dl.dropboxusercontent.com/s/wzr19v9pyl7ah0j/df_json_2.RData" dropbox_dt <- "https://dl.dropboxusercontent.com/s/nlwi1mmsxr5f23k/dt_json_2.RData" source_https <- function(loc,...){ curl=getCurlHandle() curlSetOpt(cookiejar="cookies.txt",useragent="Mozilla/5.0", followlocation=TRUE) tmp <- getURLContent(loc, .opts=list(ssl.verifypeer=FALSE),curl=curl) fch <- source(tmp) fch } library(dplyr) library(data.table) system.time( source_https(dropbox_ldf) ) system.time( load("ldf_json_2.RData") ) #load("df_json_2.RData") system.time( ldepart <<- ldf %>% group_by(Traj_Id) %>% summarise(first_lat=head(lat,1),first_lon=head(lon,1)) ) system.time( larrive <<- ldf %>% group_by(Traj_Id) %>% summarise(last_lat=tail(lat,1),last_lon=tail(lon,1)) ) lat_0=0 lon_0=0 system.time( system.time( larrive <<- mutate(larrive,dist=(last_lat-lat_0)^2+(last_lon-lon_0)^2) )) system.time( lfin <<- filter(larrive,dist<=1) ) system.time( lbase <<- left_join(lfin,ldepart) ) head(lbase) X=lbase[,c("lat","lon")] X=deaths@coords kde2d <- bkde2D(X, bandwidth=c(bw.ucv(X[,1]),bw.ucv(X[,2])))