# R script used in https://remcat.hatenadiary.jp/entry/20211009/maikinold
# URL: http://tsigeto.info/maikin/maikin-monthly3.r.txt
# 2021/09/26 - 2021/10/05
# Created by TANAKA Sigeto
datafile.new <- "maikin-monthly.dat"
datafile.old <- "maikin-monthly-juu.dat"
datafile.kyu201801 <- "maikin201801kyu.dat"
census.date <- c( 200610, 200907, 201407, 201605.5, 201905.5 )
reset.date0 <- c( 200812.5, 201112.5, 201712.5 )
reset.date1 <- c( 200901 , 201201 , 201801 )
checkpoint <- c( census.date, reset.date0, reset.date1, 202105, 202105.5 )
read.data.long <- function( filename ) {
d <- read.delim( filename, header=T )
# Sort by date and establishment size
d <- d[ order(d$size) , ]
d <- d[ order(d$yyyymm) , ]
d$is <- factor( paste( d$industry, d$size, sep="." ) )
d$year <- round( d$yyyymm / 100 )
d$month<- floor( d$yyyymm %% 100 )
d$worker2 <- ( d$e0 + d$e1 ) /2
d1 <- d
d2 <- d
d1$worker <- d1$e0
d2$worker <- d2$e1
d2$yyyymm <- d2$yyyymm + 0.5
r <- rbind( d1, d2 )
r[ order(r$yyyymm) , ]
}
list.bysize <- function( data , reset=c() ){
r <- list(
size5 = subset( data, 5 == size ),
size30= subset( data, 30 == size ),
size100=subset( data, 100 == size ),
size500=subset( data, 500 == size ),
size1000=subset(data,1000 == size )
)
lapply(
r ,
function(d){
n <- nrow( d )
d$worker.prev <- c( NA, d [ -n, "worker" ] )
d$worker.next <- c( d [ -1, "worker" ] , NA )
d$worker.inc <- log( d$worker / d$worker.prev )
d$worker.diff<- d$worker - d$worker.prev
d [ d$yyyymm %in% reset , "worker.inc" ] <- NA
d [ d$yyyymm %in% reset , "worker.diff"] <- NA
d
}
)
}
# Renewed data
x.long <- read.data.long( datafile.new )
x.bysize <- subset( x.long, industry=="TL" & 0