# Extract the number of workers and contractual cash earnings from 
#  the original tables ("maikin genhyou" in Japanese) of Monthly Labour Survey.
# Excel files are available from 
#  https://www.e-stat.go.jp/stat-search/files?tstat=000001011791
# 2021/08/15 - 2021/09/06
# Created by TANAKA Sigeto 
# URL: http://tsigeto.info/maikin/maikin-monthly.pl.txt
# Option:
#    -all (without restriction of file name pattern)
#    -na  (print lines with '-' or '*' to STDERR)
%Option = ( all => 0 , na => 0) ;
$Option{all}= 1 if grep { s/^\-all$//} @ARGV;
$Option{na} = 1 if grep { s/^\-na$// } @ARGV;
@ARGV = grep( $_ ne '', @ARGV );
$\ = "\n" ;
$, = $" = "\t" ;
%Class = (   	# Size (workers in an establishment)
	'T' => 0, 	# All sizes
	1 =>1000, 	# 1000 and over
	3 =>500 , 	# 500-999
	5 =>100 , 	# 100-499
	7 => 30 , 	#  30- 99
	9 =>  5 , 	#   5- 29
) ;
# Print the header (yyyymm for survey year-month; e0 and e1 for N of workers)
print qw( file line yyyymm size e0 e1 wage industry ) ;
FILE: foreach(@ARGV) {
	open (FILE, $_ ) || die("Cannot open file $_\n" );
	my $Filename = $_;
	$Filename =~ s/\.txt$// ;
	my $Ym = '' ;
	if( $Filename =~ /hon\-mks(\d\d\d\d\d\d)/ ) {
		$Ym = $1 ;
	}
	elsif( $Filename =~ /mks190_(\d\d\d\d\d\d)/ ) {
		$Ym = $1 ;
	}
	elsif( $Filename =~ /(sai)?(\d\d)(\d\d)mks/ ) {
		my $ad = 1988 + $2 ;
		$Ym = $ad . $3 ;
	}
	# Filename pattern is restricted unless the option '-all' was specified
	next if '' eq $Ym && ! $Option{all} ;
	my $Line=0;
	my $Ind='';
	while(){
		++$Line;
		my @field = split /\t/;
		foreach(@field){
			s/^[\"\s]*//;
			s/[\"\s]*$//;
			s/(\d),(\d)/$1$2/g ;
		}
		
		# Industry
		if( ( $field[0] eq 'TL' || $field[0] =~ /^[C-R]/ ) && $field[1] eq '' ) {
			$Ind = $field[0];
			next;
		}
		# Establishment size
		my $class = $Class{ $field[0] };
		next if $class eq '' ;
		next if( $Done{$Filename}{$Ind}{$class} ) ;
		# Number of workers
		my( $e0, $e1 ) = @field[3,6];
		# Wage
		my($wage) = $field[13];
		# Missing values
		my $na=0;
		( $e0 =~ s/^[\-\*]$// ) && ++$na ;
		( $e1 =~ s/^[\-\*]$// ) && ++$na ;
		($wage=~ s/^[\-\*]$// ) && ++$na ;
		if( $Option{na} && $na ){
			print STDERR $Filename, $Line , $Ym, $class, $e0 , $e1, $wage, $Ind, '||' . $_ ;
		}
		print $Filename, $Line , $Ym, $class, $e0 , $e1, $wage, $Ind;
		++ $Done{$Filename}{$Ind}{$class};
	}
}