#!/usr/local/bin/jperl ######################################################## # 賃金構造基本調査職業分類→SSM小分類・威信スコアの対応 # jperl wgpg7595.pl occtable.txt wage9590.txt ######################################################## $, = $" = "\t" ; $\ = "\n" ; ######################################################## # 順位相関ルーチン ######################################################## sub rankcorr { print "" ; print shift(@_) ; local( $N, $P, $Q, $X, $Y, $concord, $discord ) = &rank( @_ ); print "N\t$N" ; print "P\t$P" ; print "Q\t$Q" ; print "X\t$X" ; print "Y\t$Y" ; $diff= $P-$Q; $sum = $P+$Q; $GK_Gamma = $diff/$sum; $Kendall_Tau = $diff / ( sqrt($sum+$X) * sqrt($sum+$Y) ) ; printf( "Goodman-Kraskal's Gamma\t%6.3f\n" , $GK_Gamma ) ; printf( "Kendall's Tau\t%6.3f\n" , $Kendall_Tau ) ; ( $concord, $discord ); } sub rank { @_%2 && return undef ; local(@x) = @_[ 0 .. (@_/2-1) ] ; local(@y) = @_[ @_/2 .. $#_ ] ; local( $i, $j, $diff_x, $diff_y , $P, $Q, $X, $Y, $N ) = ( 0,0,0,0,0,0,0,0,0 ) ; local( @concord ) = ( '' x scalar(@x) ) ; local( @discord ) = ( '' x scalar(@x) ) ; # 変数値 '0' は欠損とみなすので注意。 for( $i=0; $i<$#x; ++$i ){ ''==$x[$i] && next; ''==$y[$i] && next; ++$N; for( $j=1+$i; $j<=$#x; ++$j ){ ''==$x[$j] && next; ''==$y[$j] && next; $diff_x = $x[$i]<=>$x[$j] ; $diff_y = $y[$i]<=>$y[$j] ; if ( 0 < $diff_x*$diff_y ) { ++$P; ++$concord[$i]; ++$concord[$j] ; } elsif(0> $diff_x*$diff_y ) { ++$Q; ++$discord[$i]; ++$discord[$j] ; } elsif( 0==$diff_x ) { ++$X; } elsif( 0==$diff_y ) { ++$Y; } } } ''==$x[$#x] || ''==$y[$#x] || ++$N; return( $N, $P, $Q, $X, $Y, join( $; , @concord ), join( $; , @discord ) ) ; } ######################################################## # SSM 職業分類・威信スコアの読み込み ######################################################## open(IN, shift) || die( "Cannot open file ." ); while( ) { next unless /^(\d\d\d)\t(\d)/ ; chop; @F = split(/\t/) ; $score75{ $F[0] } = $F[2]; $ssm_occ{ $F[0] } = $F[3]; } ######################################################## # 賃金構造基本調査データの読み込み・出力 ######################################################## open(IN, shift) || die( "Cannot open file ." ); print "wage_occ" , "95fWA" , "95mWA" , "code" , "ssm_occ" , "score75" ; while( ) { next unless /^(\d\d\d) / ; chop; @F = split(/\t/) ; ( $code, $name ) = split( / / , $F[0] , 2 ) ; print $name, $F[2], $F[7], $code, $ssm_occ{$code}, $score75{$code} ; # ( '-' eq $F[2] ) && ( $F[2]='' ) ; # ( '-' eq $F[7] ) && ( $F[7]='' ) ; push( @occ95w, $F[0] ); push( @wf, $F[2] ); push( @wm, $F[7] ); push( @score75, $score75{$code} ); } ######################################################## # 順位相関の計算・出力 ######################################################## print "\n-------------\nRank Correlations:" ; ( $fm_W_con, $fm_W_dis ) = &rankcorr( "Female wage * Male wage" , @wf, @wm ) ; ( $f_WP_con, $f_WP_dis ) = &rankcorr( "Prestige * Female wage", @score75 , @wf ) ; ( $m_WP_con, $m_WP_dis ) = &rankcorr( "Prestige * Male wage ", @score75 , @wm, ) ; print "\nNumbers of discordant pair:" ; @fm_W_dis = split( $; , $fm_W_dis ) ; @f_WP_dis = split( $; , $f_WP_dis ) ; @m_WP_dis = split( $; , $m_WP_dis ) ; print "occname\tfemale-male wage\tfemale wage-prestige\tmale wage-prestige" ; for( $i=$[; $i<=$#occ95w; ++$i ){ print $occ95w[$i] , $fm_W_dis[$i] , $f_WP_dis[$i] , $m_WP_dis[$i] ; }