# Extract the number of workers and contractual cash earnings from # the original tables ("maikin genhyou" in Japanese) of Monthly Labour Survey. # Excel files are available from # https://www.e-stat.go.jp/stat-search/files?tstat=000001011791 # 2021/08/15 - 2021/09/06 # Created by TANAKA Sigeto # URL: http://tsigeto.info/maikin/maikin-monthly.pl.txt # Option: # -all (without restriction of file name pattern) # -na (print lines with '-' or '*' to STDERR) %Option = ( all => 0 , na => 0) ; $Option{all}= 1 if grep { s/^\-all$//} @ARGV; $Option{na} = 1 if grep { s/^\-na$// } @ARGV; @ARGV = grep( $_ ne '', @ARGV ); $\ = "\n" ; $, = $" = "\t" ; %Class = ( # Size (workers in an establishment) 'T' => 0, # All sizes 1 =>1000, # 1000 and over 3 =>500 , # 500-999 5 =>100 , # 100-499 7 => 30 , # 30- 99 9 => 5 , # 5- 29 ) ; # Print the header (yyyymm for survey year-month; e0 and e1 for N of workers) print qw( file line yyyymm size e0 e1 wage industry ) ; FILE: foreach(@ARGV) { open (FILE, $_ ) || die("Cannot open file $_\n" ); my $Filename = $_; $Filename =~ s/\.txt$// ; my $Ym = '' ; if( $Filename =~ /hon\-mks(\d\d\d\d\d\d)/ ) { $Ym = $1 ; } elsif( $Filename =~ /mks190_(\d\d\d\d\d\d)/ ) { $Ym = $1 ; } elsif( $Filename =~ /(sai)?(\d\d)(\d\d)mks/ ) { my $ad = 1988 + $2 ; $Ym = $ad . $3 ; } # Filename pattern is restricted unless the option '-all' was specified next if '' eq $Ym && ! $Option{all} ; my $Line=0; my $Ind=''; while(){ ++$Line; my @field = split /\t/; foreach(@field){ s/^[\"\s]*//; s/[\"\s]*$//; s/(\d),(\d)/$1$2/g ; } # Industry if( ( $field[0] eq 'TL' || $field[0] =~ /^[C-R]/ ) && $field[1] eq '' ) { $Ind = $field[0]; next; } # Establishment size my $class = $Class{ $field[0] }; next if $class eq '' ; next if( $Done{$Filename}{$Ind}{$class} ) ; # Number of workers my( $e0, $e1 ) = @field[3,6]; # Wage my($wage) = $field[13]; # Missing values my $na=0; ( $e0 =~ s/^[\-\*]$// ) && ++$na ; ( $e1 =~ s/^[\-\*]$// ) && ++$na ; ($wage=~ s/^[\-\*]$// ) && ++$na ; if( $Option{na} && $na ){ print STDERR $Filename, $Line , $Ym, $class, $e0 , $e1, $wage, $Ind, '||' . $_ ; } print $Filename, $Line , $Ym, $class, $e0 , $e1, $wage, $Ind; ++ $Done{$Filename}{$Ind}{$class}; } }