use strict;
use warnings;

use POSIX;

# ----------------------------------------------------------------------------
# Program to read data released by the UK Met Office on 08/12/2009 and
# produce KML files showing the location of all of the temperature
# stations in the released data, and then overlaying the globe with
# temperature anomalies.  These KML file can be imported to Google
# Earth or Google Maps for display.  The program also outputs per
# station CSV files containing anomaly data on a monthly basis.  It
# then outputs northern, southern and global trend data from 1850 for
# use with GNUPLOT.  Finally it outputs an animation of the October
# anomaly grids since 1850.
#
# In reading the Met Office data the program also looks for missing
# information (such as missing fields in the files and missing data in
# the observations).  It also checks for any oddities such as
# non-numeric values in numeric fields.
#
# It also validates the Normals data for the 1961-1990 base period.
# All the calculations are based on information drawn from the paper
# "Uncertainty estimates in regional and global observed temperature
# changes: a new dataset from 1850", P. Brohan, J. J. Kennedy,
# I. Harris, S. F. B. Tett & P. D. Jones, 2005.
#
# IMPORTANT NOTE: This program does not remove outliers.  In the
# published texts removal of outliers is done by cutting off data that
# it five standard deviations away from normal.  This data is not
# removed here.
#
# Written by John Graham-Cumming (http://www.jgc.org/)
#
# Released under the General Public License, version 2
# ----------------------------------------------------------------------------

# To run this program do 'perl hadcrut.pl'

# ----------------------------------------------------------------------------
# 1. This section reads data from the files and converts it to a Perl
# hash structure for use by the rest of the program.  It performs
# integrity checks on the Met Office data.
# ----------------------------------------------------------------------------

# A directory called MetOffice08122009 is assumed to exist and contain
# a collection of two-digit subdirectories which contain the actual
# observation files.

my $root = 'MetOffice08122009';
my $top = "$root/";
my @subdirs = glob "$top*";

my @month_names = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
		    'Sep', 'Oct', 'Nov', 'Dec' );

# Verify that all the subdirectories have two digits and that there's
# no extraneous data.  In doing so build up a list of all the
# observation files.  These should also be in a specific form: each
# has a six digit name where the first two digits correspond to the
# two digits in the directory name.
# 
# If this code doesn't force an error the @obs array will contain the
# names of all the observation files in the released data.  Each entry
# will be the path to that file.

my @obs;

foreach my $s (@subdirs) {
  if ( $s !~ /^$top(\d\d)$/ ) {
    die "Unexpected subdirectory $s\n";
  }

  # Captures the two-digit portion of the directory name
  my $two = $1;

  my @o = glob "$top$two/*";
  foreach my $f (@o) {
    if ( $f !~ /^$s\/$two\d{4}$/ ) {
      die "Unexpected file $f\n";
    } else {
      push @obs, ($f);
    }
  }
}

print "Found ", $#obs+1, " observation files\n";

# This hash contains the fields that are expected in the released data
# and maps to a regular expression for recognizing the data.  These
# expressions are used to look for anomalies (such as an alpha
# character in a numeric field).

my %fields = ( 
  'Number'                                => qr/^\d{6}$/,

   # The Name and Country fields contain all sorts of extra characters
   # that you might not expect e.g. SPAIN is followed by a line of -
   # characters

  'Name'                              => qr/^[A-Za-z \.\-\/\(\)\d\*',]+$/,
  'Country'                               => qr/^[A-Z \.\-\(\)']+$/,
  'Lat'                                   => qr/^-?\d{1,2}\.\d$/,
  'Long'                                  => qr/^-?\d{1,3}\.\d$/,
  'Height'                                => qr/^-?\d+$/,
  'Start year'                            => qr/^[12]\d{3}$/,
  'End year'                              => qr/^[12]\d{3}$/,
  'First Good year'                       => qr/^[12]\d{3}$/,
  'Source ID'                             => qr/^\d+$/,
  'Source file'                           => qr/^.+$/,
  'Jones data to'                         => qr/^[12]\d{3}$/,
  'Normals source'                        => qr/^.+$/,
  'Normals source start year'             => qr/^[12]\d{3}$/,
  'Normals source end year'               => qr/^[12]\d{3}$/,
  'Normals'                               => qr/^(\s*-?\d+\.\d\s*){12}$/,
  'Normals source percent availability'   => qr/^\d+$/,
  'Normals source variable code'          => qr/^\d+$/,
  'Standard deviations source'            => qr/^.+$/,
  'Standard deviations source start year' => qr/^[12]\d{3}$/,
  'Standard deviations source end year'   => qr/^[12]\d{3}$/,
  'Standard deviations'                   => qr/^(\s*-?\d+\.\d\s*){12}$/
);

# Now read through each of the observation files and verify that they
# have all the fields that are required.  Extract the latitude and
# longitude and use these to fill in hash elements named 'latitude'
# and 'longitude' in the %observations hash.  This hash is keyed by
# the observation number.  Other data extracted from the Met Office
# data is stored in this hash, see example below.
#
# For example an entry in this hash might look like
#
# $observations{988510}
# $observations{988510}{latitude}       = 6.1
# $observations{988510}{longitude}      = -125.2
# $observations{988510}{normal_start}   = 1961
# $observations{988510}{normal_end}     = 1990
# $observations{988510}{normals}        = ( array of 12 averages )
# $observations{988510}{normals_source} = 'WMO'
# $observations{988510}{good_start}     = 1908
#
# The actual observations are stored as follows:
#
# $observations{988510}{observations}{1984} = ( array of 12 averages )

my %observations;

foreach my $f (@obs) {
  if ( open F, "<$f" ) {

    # Parse out the name of the file to extract the expected Number
    # which should match the Number field inside the file

    $f =~ /$top\d{2}\/(\d{6})/;
    my $number = $1;

    # This will get set to 1 as soon as we find the Obs: marker in the
    # file and hence are done with the header portion and are on to
    # the actual observations

    my $doing_obs = 0;

    # This is used to make sure that the observations are all there,
    # i.e. that there are no gaps in the years in a single file

    my $year_obs = 0;

    while (<F>) {
      chomp;

      my $line = $_;

      if ( !$doing_obs ) {

	# The line should have an = sign in it, or it should be the
	# start of the actual observations which begins with Obs:

	if ( $line =~ /^([^=]+)\s*=\s*(.+)/ ) {
          my ( $field_name, $field_value ) = ( $1, $2 );

	  if ( exists($fields{$field_name}) ) {
	    if ( $field_value =~ /$fields{$field_name}/ ) {
	    
	      # Check that the Number field is the same as the number
	      # in the file name

	      if ( ( $field_name eq 'Number' ) && ( $field_value != $number ) ) {
		die "Number field $field_value doesn't match file name $f\n";
	      }

	      # Capture latitude and longitude, name and cut off year

	      if ( $field_name eq 'Lat' ) {
		$observations{$number}{latitude} = $field_value;
	      } elsif ( $field_name eq 'Long' ) {

		# Note that I am flipping the sign on the longitude here because
                # it appears that the Met Office is swapping east and west
                # compared to how KML thinks about longitude

		$observations{$number}{longitude} = -$field_value;
	      } elsif ( $field_name eq 'Name' ) {
		$observations{$number}{name} = $field_value;
	      } elsif ( $field_name eq 'First Good year' ) {
		$observations{$number}{good_year} = $field_value;
	      }

	      # Grab the 'normals'.  This should be the 1961-1990 mean
	      # temperature per month

	      if ( $field_name eq 'Normals' ) {
		my @normals = split(' ', $field_value);

		# Some of the Normals are just lists of -99.0 meaning
		# that there are actually no normals.  If that's the
		# case then don't add the entry.

		my @undefined = grep(/^-99\.0$/, @normals);
		if ( $#undefined != 11 ) {
		  $observations{$number}{normals} = \@normals;
		}
	      } elsif ( $field_name eq 'Normals source' ) {
		$observations{$number}{normals_source} = $field_value;
	      } elsif ( $field_name eq 'Normals source start year' ) {
		$observations{$number}{normals_start} = $field_value;
	      } elsif  ( $field_name eq 'Normals source end year' ) {
		$observations{$number}{normals_end} = $field_value;
	      }
	    } else {
	      die "Value for field $field_name in $f is incorrect ($field_value)\n";
	    }
 	  } else {

	    # Since I am not using any extraneous data I just report
	    # it and carry on.  With the released data set this
	    # message is never produced.

	    print STDERR "Unexpected header value $field_name in file $f\n";
	  }
	} elsif ( $line =~ /^Obs:$/ ) {
	  $doing_obs = 1;
	} else {
	  die "Unexpected line [$line] in $f\n";
	}
      } else {

	# Observation lines should start with a four figure year
	# followed by 12 observations.  The years should be contiguous.

	if ( $line =~ /([12]\d{3})\s+(\s*-?\d+\.\d\s*){12}/ ) {
	  if ( $year_obs == 0 ) {
	    $year_obs = $1;
	  } else {
	    $year_obs += 1;
	  }

	  if ( $year_obs != $1 ) {
	    die "Gap in observation record at year $1 in file $f\n";
	  }

	  # The first element of this array will be the year of the
	  # observation and the remaining 12 elements will be the
	  # monthly values.

	  my @o = split( ' ', $line );
	  my $year = shift @o;

	  # Some of the files have all -99.0 values for the
	  # observations, which is an entire bogus year.  Ignore those
	  # completely.  Also drop any data that's before the First
	  # Good year.

	  my @undefined = grep(/^-99\.0$/, @o);
	  if ( $#undefined != 11 ) {
	    if ( ( !defined( $observations{$number}{good_year} ) ) || 
		 ( $year >= $observations{$number}{good_year} ) ) {
	      $observations{$number}{observations}{$year} = \@o;
	    }
	  }
	} else {
	  die "Unexpected observation line [$line] in $f\n";
	}
      }
    }

    close F;

    # Verify that we have captured latitude, longitude, and name

    if ( !exists( $observations{$number}{latitude} ) ||
	 !exists( $observations{$number}{longitude} ) ||
	 !exists( $observations{$number}{name} ) ) {
      die "Incomplete information for $f\n";
    }

    # Missing normals data is interesting and worth warning about, but
    # not fatal.  If the normals data is present then verify that it
    # is correct.

    if ( exists( $observations{$number}{normals} ) &&
	 exists( $observations{$number}{normals_start} ) &&
	 exists( $observations{$number}{normals_end} ) ) {

      # To check the Normals information run through all the
      # observations counting the number of observation years
      # available between 1961 and 1990 inclusive producing the
      # average value for each month.  Because there could be
      # individual missing readings (value of -99.0) we have to keep a
      # separate count on a monthly basis for averaging.

      my @counts = ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 );
      my @months = ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 );
      my $year_count = 0;

      foreach my $y (sort keys %{$observations{$number}{observations}}) {
	if ( ( $y >= 1961 ) && ( $y <= 1990 ) ) {
	  $year_count += 1;
	  foreach my $i (0..11) {
            my $o = $observations{$number}{observations}{$y}[$i];

	    if ( $o != -99 ) {
	      $months[$i] += $o;
	      $counts[$i] += 1;
	    }
	  }
	}
      }

      # HadCRUT paper only uses data where there are 15 years of
      # data between 1961 and 1990.

      if ( $year_count >= 15 ) {
	foreach my $i (0..11) {
	  my $n = $observations{$number}{normals}[$i];

	  # The Normals data supplied has only one decimal place so
	  # perform rounding to one decimal place.  Since it's not
	  # possible to rely on Perl's rounding functionality I do it
	  # myself here, by checking that the value is within 0.05 of
	  # the expected.

	  my $avg = $months[$i]/$counts[$i];

	  # Only complain about a difference if the data says it comes
	  # from Data (i.e. from the observations).  If it comes from
	  # WMO or Extrpolated then it's from a different data set.

	  if ( ( $observations{$number}{normals_source} eq 'Data') &&
	       ( floor(abs($avg-$n)*100) > 5 ) ) {
	    my $diff = $avg-$n;
	    print STDERR "$f: Normals value $n != $avg (difference $diff) (month $i)\n";
	  }
	}
      }
    }
  } else {
    die "Failed to open observation file $f\n";
  }
}

# ----------------------------------------------------------------------------
# 2. Run through the observations and remove any observation data for
# which we do not have Normals.  Then calculate anomaly data for every
# year and month for which there is data using the Normals data
# supplied by the Met Office.
# 
# For each year for which there is data there will be a corresponding array
# of anomaly values in the hash key {anomalies}, e.g.
#
# $observations{988510}{observations}{1984} = ( array of 12 averages )
# $observations{988510}{anomalies}{1984}    = ( array of 12 averages )
# 
# The second array can be thought of as
#   $observations{988510}{observations}{1984} - $observations{988510}{normals}
# ----------------------------------------------------------------------------

my @remove;

foreach my $o (sort keys %observations) {
  if ( !exists($observations{$o}{normals}) ) {
    push @remove, ($o);
  }
}

print "Removing ", $#remove+1, " stations which are missing Normals\n";

foreach my $o (@remove) {
  delete $observations{$o};
}

my $count = keys %observations;
print "Working with remaining $count stations\n";

foreach my $o (sort keys %observations) {
  foreach my $y (sort keys %{$observations{$o}{observations}}) {
    foreach my $m (0..11) {
      if ( $observations{$o}{observations}{$y}[$m] != -99.0 ) {
	$observations{$o}{anomalies}{$y}[$m] = 
	  ($observations{$o}{observations}{$y}[$m] -
	   $observations{$o}{normals}[$m]);
      } else {
	$observations{$o}{anomalies}{$y}[$m] = undef;
      }
    }
  }
}

# ----------------------------------------------------------------------------
# 3. Perform gridding.  Assume a 5 degree square grid size and perform
# the gridding as discussed in Jones/Moberg 2003: find all the
# stations that are inside the square and average their anomaly
# values.
# ----------------------------------------------------------------------------

# The grid has a pair of subhashes for longitude and latitude
#
# e.g. $grid{}{}{170}{50} is the grid at 170 long, 50 lat to 175 long to
# 55 lat, within a year, then a month containing a count and a total
#
# $grid{1899}{0}{170}{50}{count}
# $grid{1899}{0}{170}{50}{total}
#
# Also keep track of the stations used in this grid box, this is just
# a string containing the numbers of each station used in the grid
#
# $grid{1899}{0}{170}{50}{stations}

my %grid;

# The size of the edge of a grid square in degrees

my $grid_size = 5;

foreach my $o (sort keys %observations) {
  my $lat = $observations{$o}{latitude};
  my $long = $observations{$o}{longitude};

  $lat  = int((90+$lat)/$grid_size)*$grid_size-90;
  $long = int((180+$long)/$grid_size)*$grid_size-180;

  foreach my $y (sort keys %{$observations{$o}{observations}}) {
    foreach my $m (0..11) {
      my $a = $observations{$o}{anomalies}{$y}[$m];

      if ( defined( $a ) ) {
	$grid{$y}{$m}{$long}{$lat}{count} += 1;
	$grid{$y}{$m}{$long}{$lat}{total} += $a;
	$grid{$y}{$m}{$long}{$lat}{stations} .= "$o ";
      }
    }
  }
}

# ----------------------------------------------------------------------------
# 4. Use the gridded data to calculate northern and southern
# hemisphere average temperature anomaly.  This is done by averaging
# the anomalies over all the grid squares in the two hemispheres on a
# monthly basis to produce a single figure for each.
# ----------------------------------------------------------------------------

# Both these hashes have the same shape.  They contain a total and
# count of anomalies from all the grid squares in their hemisphere for
# the specific month and year.
#
# e.g. $north{1899}{0}{count}
#      $north{1899}{0}{total}

my %north;
my %south;

foreach my $year (sort keys %grid) {
  foreach my $month (sort { $a <=> $b } keys %{$grid{$year}}) {
    foreach my $long (sort keys %{$grid{$year}{$month}}) {
      foreach my $lat (sort keys %{$grid{$year}{$month}{$long}}) {
	if ( $lat >= 0 ) {
	  $north{$year}{$month}{count} += 1;
	  $north{$year}{$month}{total} +=
	    $grid{$year}{$month}{$long}{$lat}{total} /
	      $grid{$year}{$month}{$long}{$lat}{count};
	} else {
	  $south{$year}{$month}{count} += 1;
	  $south{$year}{$month}{total} +=
	    $grid{$year}{$month}{$long}{$lat}{total} /
	      $grid{$year}{$month}{$long}{$lat}{count};
	}
      }
    }
  }
}

# ----------------------------------------------------------------------------
# 5. These are all helper functions for creating kml files that
# prevent duplication below.  They all return a string containing KML
# that can be sent to a file.
# ----------------------------------------------------------------------------

sub kml_start
{
  my ( $name ) = @_;  # Name for the <name>
  return <<EOF;
<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2">
<Document>
<name>$name</name>
EOF

}

sub kml_end
{
  return <<EOF;
</Document>
</kml>
EOF

}

sub kml_placemark
{
  my ( $name,             # Name of the placemark
       $description,      # Description
       $long,             # The longitude
       $lat,              # The latitude
       $altitude ) = @_;  # The altitude
  return <<EOF;
  <Placemark>
   <name>$name</name>
   <description>$description</description>
    <Point>
      <coordinates>$long,$lat,$altitude</coordinates>
    </Point>
  </Placemark>
EOF

}

sub kml_style
{
  my ( $id,            # The ID attribute for the style
       $colour ) = @_; # The color (AABBGGRR)

  return <<EOF;
<Style id="$id">
  <PolyStyle>
    <color>$colour</color>
    <colorMode>normal</colorMode>
  </PolyStyle>
</Style>
EOF

}

# This is the function that places a coloured square on the globe to
# show gridded anomaly data

sub kml_square
{
  my ( $name,         # Name of the grid square
       $style,        # Style from kml_style definition
       $long,         # Longitude of bottom-left corner
       $lat,          # Latitude of bottom-left corner
       $size,         # Number of degrees along an edge
       $year, $month ) = @_;  # (optional) Time when square is displayed

  my ( $lx, $ly ) = ( $long,         $lat         );
  my ( $rx, $ry ) = ( $long + $size, $lat + $size );

  my $timestamp = '';

  if ( defined( $year ) && defined( $month ) ) {
      my $begin = sprintf( "%d-%02d-01", $year, $month+1 );
      my $end = sprintf( "%d-%02d-01", $year+1, $month+1 );
      $timestamp=<<EOF;
     <TimeSpan>
       <begin>$begin</begin>
       <end>$end</end>
     </TimeSpan>
EOF

  }

  return <<EOF;
  <Placemark>
    <name>$name</name>
    <styleUrl>#$style</styleUrl>
    $timestamp
    <Polygon>
      <altitudeMode>clampToGround</altitudeMode>
      <outerBoundaryIs>
        <LinearRing>
          <coordinates>
            $lx, $ly
            $lx, $ry
            $rx, $ry
            $rx, $ly
          </coordinates>
        </LinearRing>
      </outerBoundaryIs>
    </Polygon>
  </Placemark>
EOF

  }

# ----------------------------------------------------------------------------
# 6. This section writes output files containing versions of the data
# from the %observations and %grid hashes.
# ----------------------------------------------------------------------------

# Use the %observations hash to build the output a KML file, this will
# be stored in a file called MetOffice08122009Points.kml.  This file
# contains just point information showing where the temperature
# measuring stations are located.  Since the hash has been edited to
# remove stations with no anomalies these will not appear in this KML
# file.
#
# IMPORTANT NOTE: this is currently outputting an altitude of 0 for
# each observation.

my $kml = "${root}Points.kml";

if ( open F, ">$kml" ) {
  print F kml_start($kml);

  foreach my $o (sort keys %observations) {
    print F kml_placemark( $observations{$o}{name}, $o, 
			   $observations{$o}{longitude}, 
			   $observations{$o}{latitude}, 0 );
  }

  print F kml_end();
  close F;
} else {
  die "Failed to create output KML file $kml\n";
}

# Make one CSV file per station with anomaly data showing the
# anomalies by year and month.  The CSV files have the file name
# MetOffice08122009AnomalyXXXXXX.csv where the XXXXXX is the station
# number.  To keep things tidy they are written to a directory called
# MetOffice08122009CSV which is expected to exist.
#
# Only data from 1850 onwards is plotted (in line with the HadCRUT3
# dataset).

my $csv = "${root}CSV/${root}Anomaly";

foreach my $o (sort keys %observations) {
  if ( open F, ">$csv$o.csv" ) {
    foreach my $y (sort keys %{$observations{$o}{anomalies}}) {
      if ( $y >= 1850 ) {
	foreach my $m (0..11) {
	  if ( defined( $observations{$o}{anomalies}{$y}[$m] ) ) {
	    print F "$month_names[$m]/$y,", $observations{$o}{anomalies}{$y}[$m],"\n";
	  }
	}
      }
    }
    close F;
  } else {
    die "Failed to create output CSV file $csv\n";
  }
}

# Output a KML file showing the average temperature anomaly for
# October 2009.  This file will be called
# MetOffice08122009AnomalyOct2009.kml and shows the average anomaly
# over the year in each grid area.  It's possible to change this by
# altering $year and $month below (watch out for the fact that 0 =
# Jan).

my $year = 2009;
my $month = 9;

# This table is used to colour the output polygons based on the degree
# of anomaly.  The $alpha value sets the transparency for the polygons
# (00 = transparent, ff = opaque).  The actual colour values are (B,
# G, R) tuples.  To find the colour look for the smallest key in this
# hash greater than the anomaly value.  Note that the final entry of
# 1000 is meant to catch all the temperatures greater than 5 degrees
# over.

my $alpha = 'aa';

my %anomaly_colours = (
 -5   => 'd70929', -3   => 'ff4d26', -1   => 'ffa03f',
 -0.5 => 'ffd972', -0.2 => 'fff8aa', 0    => 'ffffe0',
 0.2  => 'bfffff', 0.5  => '99e0ff', 1    => '72adff',
 3    => '5e6df8', 5    => '3226d8',1000  => '2100a5' );

$kml = "${root}Anomaly$month_names[$month]$year.kml";

if ( open F, ">$kml" ) {
  print F kml_start( $kml );

  foreach my $s (sort { $a <=> $b } keys %anomaly_colours) {
    print F kml_style( "ps$s", "$alpha$anomaly_colours{$s}" );
  }

  foreach my $long (sort keys %{$grid{$year}{$month}}) {
    foreach my $lat (sort keys %{$grid{$year}{$month}{$long}}) {
       my $name = $grid{$year}{$month}{$long}{$lat}{stations};
       my $an =  $grid{$year}{$month}{$long}{$lat}{total} / 
	 $grid{$year}{$month}{$long}{$lat}{count};
       my $style;
       foreach my $s (sort { $a <=> $b } keys %anomaly_colours) {
	 if ( $an < $s ) {
	   $style = "ps$s";
	   last;
	 }
       }

       print F kml_square( $name, $style, $long, $lat, $grid_size )
    }
  }

  foreach my $o (sort keys %observations) {
    print F kml_placemark( $observations{$o}{name}, $o, 
			   $observations{$o}{longitude},
			   $observations{$o}{latitude}, 0 );
  }

  print F kml_end();
  close F;
} else {
  die "Failed to create output KML file $kml\n";
}

# Now output GNUPLOT compatible files that shows the temperature trend
# for the northern and southern hemispheres (and the average of the
# two) since January 1850.  The files are named MetOffice08122009Trend
# followed by North, South or Global and with the '.dat' extension

my $gnu = "${root}TrendNorth.dat";

if ( open F, ">$gnu" ) {
  foreach my $year (sort keys %north) {
    if ( $year >= 1850 ) {
      foreach my $month (sort { $a <=> $b } keys %{$north{$year}}) {
	my $an = $north{$year}{$month}{total} / 
	  $north{$year}{$month}{count};
	print F sprintf( "$year.%02d\t$an\n", $month );
      }
    }
  }
  close F;
} else {
  die "Failed to open GNUPLOT file $gnu\n";
}

$gnu = "${root}TrendSouth.dat";

if ( open F, ">$gnu" ) {
  foreach my $year (sort keys %south) {
    if ( $year >= 1850 ) {
      foreach my $month (sort { $a <=> $b } keys %{$south{$year}}) {
	my $an = $south{$year}{$month}{total} / 
	  $south{$year}{$month}{count};
	print F sprintf( "$year.%02d\t$an\n", $month );
      }
    }
  }
  close F;
} else {
  die "Failed to open GNUPLOT file $gnu\n";
}

$gnu = "${root}TrendGlobal.dat";

if ( open F, ">$gnu" ) {
  foreach my $year (sort keys %north) {
    if ( $year >= 1850 ) {
      foreach my $month (sort { $a <=> $b } keys %{$north{$year}}) {

	# This loop is assuming that the Northern hemisphere has more
	# data points than the southern and that it's enough to check
	# that we have data for the same year/month in the southern
	# hemisphere.  If we don't have for both then we don't output
	# an average.

	if ( exists( $south{$year}{$month} ) ) {
	  my $an = $north{$year}{$month}{total} / 
	    $north{$year}{$month}{count};
	  my $as = $south{$year}{$month}{total} / 
	    $south{$year}{$month}{count};
	  my $avg = ( $an + $as ) / 2;
	  print F sprintf( "$year.%02d\t$avg\n", $month );
	} else {
	  print "Omitting data for global average $year/$month\n";
	}
      }
    }
  }
  close F;
} else {
  die "Failed to open GNUPLOT file $gnu\n";
}

# Finally, create an animated KML file from the gridded data that
# shows the change in temperature from 1850 for a specific month.  The
# file will be called MetOffice08122009AnimatedMON.kml

$month = 9;  # The month to animate

$kml = "${root}Animated$month_names[$month].kml";

if ( open F, ">$kml" ) {
  print F kml_start($kml);

  foreach my $s (sort { $a <=> $b } keys %anomaly_colours) {
    print F kml_style( "ps$s", "$alpha$anomaly_colours{$s}" );
  }

  foreach my $year (sort keys %grid) {
    if ( $year >= 1850 ) {
      foreach my $long (sort keys %{$grid{$year}{$month}}) {
	foreach my $lat (sort keys %{$grid{$year}{$month}{$long}}) {
	  my $name = $grid{$year}{$month}{$long}{$lat}{stations};
	  my $an =  $grid{$year}{$month}{$long}{$lat}{total} / 
	    $grid{$year}{$month}{$long}{$lat}{count};
	  my $style;
	  foreach my $s (sort { $a <=> $b } keys %anomaly_colours) {
	    if ( $an < $s ) {
	      $style = "ps$s";
	      last;
	    }
	  }
	  
	  # Pass in the year and month to enable the time dependence
	  # and hence the animation
	  
	  print F kml_square( $name, $style, $long, $lat, $grid_size,
			      $year, $month );
	}
      }
    }
  }

  print F kml_end();
  close F;
} else {
  die "Failed to open KML file $kml\n";
}

