#!/usr/bin/perl # ------ extract-isa14.pl # Make a list of sessions of 2014 XVIII ISA World Congress of Sociology # 2013/08/15 - 2013/09/19 # Download the lists of sessions from ISA website: # wget -N -x http://www.isa-sociology.org/congress2014/rc/joint-sessions.php # wget -N -x -r -l1 -np http://www.isa-sociology.org/congress2014/rc/ # wget -N -x -r -l1 -np http://www.isa-sociology.org/congress2014/wg/ # wget -N -x -r -l1 -np http://www.isa-sociology.org/congress2014/tg/ undef $/ ; $\ ="\n"; $" = $, ="\t"; $Count =0; sub url { my $f = shift; $f =~ s|\@|\?| ; $f =~ s|\\|\/|g ; "http://$f" ; } sub output { my ( @p ) = @_ ; my $text; foreach $text (@p){ $text =~ s/^[^>]+>// ; $text =~ s|| // |g ; $text =~ s/<[^>]+>/ /g ; $text =~ s/\n/ /g ; $text =~ s/&#(\d+);/ chr($1) /eg ; $text =~ s/…/ ... /g ; $text =~ s/ / /g ; $text =~ s/\s+/ /g ; $text =~ s/(\[[A-Z]+\])\s*$//i; # $text =~ s/; s/\n/ /g ; s|(.+)|i ; # print "\n\n"; print ""; output ($h1); my @entry = split ( /(.+)|i ; next if $title =~ m |(.+)$|; my ($theme) = $organizer =~ m|

(.*)$|; $organizer =~ s|

(.*)$||; my @organizer = map { s/\S+\@\S+//; s/,?\s*$//; $_ } split( /
/, $organizer ); @organizer = grep { /\S/ } @organizer ; my $coordinator= 0; # output( $committee , $id , "$baseurl\#$id" , $title ); output( $committee , $id , "$baseurl\#$id" , $title, $e ); # 2013/09/19 changed # output( $committee , $id , "$baseurl\#$id" , $title, join( '; ' , @organizer) , $theme); ++$Count; } } print STDERR $Count, " records." #------------- EOF