# R script to extract figures for scheduled cash earnings from the Monthly Labour Survey's long-term data # URL: http://tsigeto.info/maikin/maikin-longterm-earnings-sche.r.txt # 2021/12/31 - 2023/11/26 # Created by TANAKA Sigeto # for Figures 1 and 2 in https://remcat.hatenadiary.jp/entry/20220102/rev2019 # Result: http://tsigeto.info/maikin/maikin-earningsdiff-2004-2020.txt # See also: https://remcat.hatenadiary.jp/entry/20220102/rev2019#appendix # CSV files were downloaded from the Portal Site of Official Statistics of Japan (e-Stat), # long-term accumulated data of the Monthly Labour Survey: National Survey # from https://www.e-stat.go.jp/stat-search/files?tstat=000001011791 (2021-12-31). # To explore the changes due to the recalculation of the data in 2019, # the script below compares the new and old files, focusing on scheduled cash earnings # for every month (for all industries, all establishment sizes, all worker types). # Preparation: Delete the Japanese characters in the first column and in the first row, if they make errors in the data processing by R. datafile.new <- "hon-maikin-k-jissu.csv" datafile.old <- "juu-maikin-k-jissu.csv" read.earnings <- function ( datafile ) { x0 <- read.csv( datafile ) x1 <- subset( x0, substring( x0[ , 3 ] , 1, 2) != "CY" & # Each month substring( x0[ , 4 ] , 1, 2) == "TL" & # All industries substring( x0[ , 5 ] , 1, 1) == "T" & # All sizes x0[ , 6 ] == 0 # Both full- and part-time ) x2 <- x1[ , c( 2,3,8 ) ] # Extract year, month, contract cash earnings # yyyymm <- paste ( x2[,1] , x2[,2], sep="" ) yyyymm <- sprintf( "%s%02d" , x2[,1] , as.numeric(x2[,2]) ) r <- as.numeric( x2[, 3] ) names(r) <- yyyymm r } earnings.new <- read.earnings ( datafile.new ) earnings.old <- read.earnings ( datafile.old ) earnings <- data.frame ( yyyymm = as.numeric( names(earnings.old) ), new = earnings.new[ names(earnings.old) ] , old = earnings.old ) earnings$pct <- 100*( (earnings$new / earnings$old) - 1 ) earnings.diff <- subset( earnings, 0