# R script to extract figures for scheduled cash earnings from the Monthly Labour Survey's long-term data
# URL: http://tsigeto.info/maikin/maikin-longterm-earnings-sche.r.txt
# 2021/12/31 - 2023/11/26
# Created by TANAKA Sigeto <http://tsigeto.info/maikin/>
# for Figures 1 and 2 in https://remcat.hatenadiary.jp/entry/20220102/rev2019
# Result: http://tsigeto.info/maikin/maikin-earningsdiff-2004-2020.txt
# See also: https://remcat.hatenadiary.jp/entry/20220102/rev2019#appendix

# CSV files were downloaded from the Portal Site of Official Statistics of Japan (e-Stat), 
#   long-term accumulated data of the Monthly Labour Survey: National Survey 
#   from https://www.e-stat.go.jp/stat-search/files?tstat=000001011791 (2021-12-31).
# To explore the changes due to the recalculation of the data in 2019,
#   the script below compares the new and old files, focusing on scheduled cash earnings 
#   for every month (for all industries, all establishment sizes, all worker types).

# Preparation: Delete the Japanese characters in the first column and in the first row, if they make errors in the data processing by R.

datafile.new <- "hon-maikin-k-jissu.csv"
datafile.old <- "juu-maikin-k-jissu.csv"

read.earnings <- function ( datafile ) {
	x0 <- read.csv( datafile ) 
	x1 <- subset(
		x0,
		substring( x0[ , 3  ] , 1, 2) != "CY" & 	# Each month
		substring( x0[ , 4  ] , 1, 2) == "TL" & 	# All industries
		substring( x0[ , 5  ] , 1, 1) == "T" &  	# All sizes
		x0[ , 6  ] == 0 	# Both full- and part-time
	)
	x2 <- x1[ , c( 2,3,8 ) ]  	# Extract year, month, contract cash earnings
#	yyyymm <- paste ( x2[,1] , x2[,2], sep="" )
	yyyymm <- sprintf( "%s%02d" , x2[,1] , as.numeric(x2[,2]) )
	r <- as.numeric( x2[, 3] )
	names(r) <- yyyymm
	r
}
earnings.new <- read.earnings ( datafile.new )
earnings.old <- read.earnings ( datafile.old )

earnings <- data.frame  ( 
	yyyymm = as.numeric( names(earnings.old) ),
	new = earnings.new[ names(earnings.old) ] ,
	old = earnings.old
)
earnings$pct <- 100*( (earnings$new / earnings$old) - 1 ) 
earnings.diff <- subset( earnings, 0<pct )