# R script used in https://remcat.hatenadiary.jp/entry/20211009/maikinold # URL: http://tsigeto.info/maikin/maikin-monthly3.r.txt # 2021/09/26 - 2021/10/05 # Created by TANAKA Sigeto datafile.new <- "maikin-monthly.dat" datafile.old <- "maikin-monthly-juu.dat" datafile.kyu201801 <- "maikin201801kyu.dat" census.date <- c( 200610, 200907, 201407, 201605.5, 201905.5 ) reset.date0 <- c( 200812.5, 201112.5, 201712.5 ) reset.date1 <- c( 200901 , 201201 , 201801 ) checkpoint <- c( census.date, reset.date0, reset.date1, 202105, 202105.5 ) read.data.long <- function( filename ) { d <- read.delim( filename, header=T ) # Sort by date and establishment size d <- d[ order(d$size) , ] d <- d[ order(d$yyyymm) , ] d$is <- factor( paste( d$industry, d$size, sep="." ) ) d$year <- round( d$yyyymm / 100 ) d$month<- floor( d$yyyymm %% 100 ) d$worker2 <- ( d$e0 + d$e1 ) /2 d1 <- d d2 <- d d1$worker <- d1$e0 d2$worker <- d2$e1 d2$yyyymm <- d2$yyyymm + 0.5 r <- rbind( d1, d2 ) r[ order(r$yyyymm) , ] } list.bysize <- function( data , reset=c() ){ r <- list( size5 = subset( data, 5 == size ), size30= subset( data, 30 == size ), size100=subset( data, 100 == size ), size500=subset( data, 500 == size ), size1000=subset(data,1000 == size ) ) lapply( r , function(d){ n <- nrow( d ) d$worker.prev <- c( NA, d [ -n, "worker" ] ) d$worker.next <- c( d [ -1, "worker" ] , NA ) d$worker.inc <- log( d$worker / d$worker.prev ) d$worker.diff<- d$worker - d$worker.prev d [ d$yyyymm %in% reset , "worker.inc" ] <- NA d [ d$yyyymm %in% reset , "worker.diff"] <- NA d } ) } # Renewed data x.long <- read.data.long( datafile.new ) x.bysize <- subset( x.long, industry=="TL" & 0