123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536 |
- # ------------------------------------------------------------------------------
- # (C) Crown copyright Met Office. All rights reserved.
- # For further details please refer to the file COPYRIGHT.txt
- # which you should have received as part of this distribution.
- # ------------------------------------------------------------------------------
- use strict;
- use warnings;
- # ------------------------------------------------------------------------------
- package Fcm::Build::Fortran;
- use Text::Balanced qw{extract_bracketed extract_delimited};
- # Actions of this class
- my %ACTION_OF = (extract_interface => \&_extract_interface);
- # Regular expressions
- # Matches a variable attribute
- my $RE_ATTR = qr{
- allocatable|dimension|external|intent|optional|parameter|pointer|save|target
- }imsx;
- # Matches a name
- my $RE_NAME = qr{[A-Za-z]\w*}imsx;
- # Matches a specification type
- my $RE_SPEC = qr{
- character|complex|double\s*precision|integer|logical|real|type
- }imsx;
- # Matches the identifier of a program unit that does not have arguments
- my $RE_UNIT_BASE = qr{block\s*data|module|program}imsx;
- # Matches the identifier of a program unit that has arguments
- my $RE_UNIT_CALL = qr{function|subroutine}imsx;
- # Matches the identifier of any program unit
- my $RE_UNIT = qr{$RE_UNIT_BASE|$RE_UNIT_CALL}msx;
- my %RE = (
- # A comment line
- COMMENT => qr{\A\s*(?:!|\z)}msx,
- # A trailing comment, capture the expression before the comment
- COMMENT_END => qr{\A([^'"]*?)\s*!.*\z}msx,
- # A contination marker, capture the expression before the marker
- CONT => qr{\A(.*)&\s*\z}msx,
- # A contination marker at the beginning of a line, capture the marker and
- # the expression after the marker
- CONT_LEAD => qr{\A(\s*&)(.*)\z}msx,
- # Capture a variable identifier, removing any type component expression
- NAME_COMP => qr{\b($RE_NAME)(?:\s*\%\s*$RE_NAME)*\b}msx,
- # Matches the first identifier in a line
- NAME_LEAD => qr{\A\s*$RE_NAME\s*}msx,
- # Captures a name identifier after a comma, and the expression after
- NAME_LIST => qr{\A(?:.*?)\s*,\s*($RE_NAME)\b(.*)\z}msx,
- # Captures the next quote character
- QUOTE => qr{\A[^'"]*(['"])}msx,
- # Matches an attribute declaration
- TYPE_ATTR => qr{\A\s*($RE_ATTR)\b}msx,
- # Matches a type declaration
- TYPE_SPEC => qr{\A\s*($RE_SPEC)\b}msx,
- # Captures the expression after one or more program unit attributes
- UNIT_ATTR => qr{\A\s*(?:(?:elemental|recursive|pure)\s+)+(.*)\z}imsx,
- # Captures the identifier and the symbol of a program unit with no arguments
- UNIT_BASE => qr{\A\s*($RE_UNIT_BASE)\s+($RE_NAME)\s*\z}imsx,
- # Captures the identifier and the symbol of a program unit with arguments
- UNIT_CALL => qr{\A\s*($RE_UNIT_CALL)\s+($RE_NAME)\b}imsx,
- # Captures the end of a program unit, its identifier and its symbol
- UNIT_END => qr{\A\s*(end)(?:\s+($RE_NAME)(?:\s+($RE_NAME))?)?\s*\z}imsx,
- # Captures the expression after a program unit type specification
- UNIT_SPEC => qr{\A\s*$RE_SPEC\b(.*)\z}imsx,
- );
- # Keywords in type declaration statements
- my %TYPE_DECL_KEYWORD_SET = map { ($_, 1) } qw{
- allocatable
- dimension
- in
- inout
- intent
- kind
- len
- optional
- out
- parameter
- pointer
- save
- target
- };
- # Creates and returns an instance of this class.
- sub new {
- my ($class) = @_;
- bless(
- sub {
- my $key = shift();
- if (!exists($ACTION_OF{$key})) {
- return;
- }
- $ACTION_OF{$key}->(@_);
- },
- $class,
- );
- }
- # Methods.
- for my $key (keys(%ACTION_OF)) {
- no strict qw{refs};
- *{$key} = sub { my $self = shift(); $self->($key, @_) };
- }
- # Extracts the calling interfaces of top level subroutines and functions from
- # the $handle for reading Fortran sources.
- sub _extract_interface {
- my ($handle) = @_;
- map { _present_line($_) } @{_reduce_to_interface(_load($handle))};
- }
- # Reads $handle for the next Fortran statement, handling continuations.
- sub _load {
- my ($handle) = @_;
- my $ctx = {signature_token_set_of => {}, statements => []};
- my $state = {
- in_contains => undef, # in a "contains" section of a program unit
- in_interface => undef, # in an "interface" block
- in_quote => undef, # in a multi-line quote
- stack => [], # program unit stack
- };
- my $NEW_STATEMENT = sub {
- { name => q{}, # statement name, e.g. function, integer, ...
- lines => [], # original lines in the statement
- line_number => 0, # line number (start) in the original source
- symbol => q{}, # name of a program unit (signature, end)
- type => q{}, # e.g. signature, use, type, attr, end
- value => q{}, # the actual value of the statement
- };
- };
- my $statement;
- LINE:
- while (my $line = readline($handle)) {
- if (!defined($statement)) {
- $statement = $NEW_STATEMENT->();
- }
- my $value = $line;
- chomp($value);
- # Pre-processor directives and continuation
- if (!$statement->{line_number} && index($value, '#') == 0) {
- $statement->{line_number} = $.;
- $statement->{name} = 'cpp';
- }
- if ($statement->{name} eq 'cpp') {
- push(@{$statement->{lines}}, $line);
- $statement->{value} .= $value;
- if (rindex($value, '\\') != length($value) - 1) {
- $statement = undef;
- }
- next LINE;
- }
- # Normal Fortran
- if ($value =~ $RE{COMMENT}) {
- next LINE;
- }
- if (!$statement->{line_number}) {
- $statement->{line_number} = $.;
- }
- my ($cont_head, $cont_tail);
- if ($statement->{line_number} != $.) { # is a continuation
- ($cont_head, $cont_tail) = $value =~ $RE{CONT_LEAD};
- if ($cont_head) {
- $value = $cont_tail;
- }
- }
- # Correctly handle ! and & in quotes
- my ($head, $tail) = (q{}, $value);
- if ($state->{in_quote} && index($value, $state->{in_quote}) >= 0) {
- my $index = index($value, $state->{in_quote});
- $head = substr($value, 0, $index + 1);
- $tail
- = length($value) > $index + 1
- ? substr($value, $index + 2)
- : q{};
- $state->{in_quote} = undef;
- }
- if (!$state->{in_quote}) {
- while ($tail) {
- if (index($tail, q{!}) >= 0) {
- if (!($tail =~ s/$RE{COMMENT_END}/$1/)) {
- ($head, $tail, $state->{in_quote})
- = _load_extract_quote($head, $tail);
- }
- }
- else {
- while (index($tail, q{'}) > 0
- || index($tail, q{"}) > 0)
- {
- ($head, $tail, $state->{in_quote})
- = _load_extract_quote($head, $tail);
- }
- $head .= $tail;
- $tail = q{};
- }
- }
- }
- $cont_head ||= q{};
- push(@{$statement->{lines}}, $cont_head . $head . $tail . "\n");
- $statement->{value} .= $head . $tail;
- # Process a statement only if it is marked with a continuation
- if (!($statement->{value} =~ s/$RE{CONT}/$1/)) {
- $statement->{value} =~ s{\s+\z}{}msx;
- if (_process($statement, $ctx, $state)) {
- push(@{$ctx->{statements}}, $statement);
- }
- $statement = undef;
- }
- }
- return $ctx;
- }
- # Helper, removes a quoted string from $tail.
- sub _load_extract_quote {
- my ($head, $tail) = @_;
- my ($extracted, $remainder, $prefix)
- = extract_delimited($tail, q{'"}, qr{[^'"]*}msx, q{});
- if ($extracted) {
- return ($head . $prefix . $extracted, $remainder);
- }
- else {
- my ($quote) = $tail =~ $RE{QUOTE};
- return ($head . $tail, q{}, $quote);
- }
- }
- # Study statements and put attributes into array $statements
- sub _process {
- my ($statement, $ctx, $state) = @_;
- my $name;
- # End Interface
- if ($state->{in_interface}) {
- if ($statement->{value} =~ qr{\A\s*end\s*interface\b}imsx) {
- $state->{in_interface} = 0;
- }
- return;
- }
- # End Program Unit
- if (@{$state->{stack}} && $statement->{value} =~ qr{\A\s*end\b}imsx) {
- my ($end, $type, $symbol) = lc($statement->{value}) =~ $RE{UNIT_END};
- if (!$end) {
- return;
- }
- my ($top_type, $top_symbol) = @{$state->{stack}->[-1]};
- if (!$type
- || $top_type eq $type && (!$symbol || $top_symbol eq $symbol))
- {
- pop(@{$state->{stack}});
- if ($state->{in_contains} && !@{$state->{stack}}) {
- $state->{in_contains} = 0;
- }
- if (!$state->{in_contains}) {
- $statement->{name} = $top_type;
- $statement->{symbol} = $top_symbol;
- $statement->{type} = 'end';
- return $statement;
- }
- }
- return;
- }
- # Interface/Contains
- ($name) = $statement->{value} =~ qr{\A\s*(contains|interface)\b}imsx;
- if ($name) {
- $state->{'in_' . lc($name)} = 1;
- return;
- }
- # Program Unit
- my ($type, $symbol, @tokens) = _process_prog_unit($statement->{value});
- if ($type) {
- push(@{$state->{stack}}, [$type, $symbol]);
- if ($state->{in_contains}) {
- return;
- }
- $statement->{name} = lc($type);
- $statement->{type} = 'signature';
- $statement->{symbol} = lc($symbol);
- $ctx->{signature_token_set_of}{$symbol}
- = {map { (lc($_) => 1) } @tokens};
- return $statement;
- }
- if ($state->{in_contains}) {
- return;
- }
- # Use
- if ($statement->{value} =~ qr{\A\s*(use)\b}imsx) {
- $statement->{name} = 'use';
- $statement->{type} = 'use';
- return $statement;
- }
- # Type Declarations
- ($name) = $statement->{value} =~ $RE{TYPE_SPEC};
- if ($name) {
- $name =~ s{\s}{}gmsx;
- $statement->{name} = lc($name);
- $statement->{type} = 'type';
- return $statement;
- }
- # Attribute Statements
- ($name) = $statement->{value} =~ $RE{TYPE_ATTR};
- if ($name) {
- $statement->{name} = $name;
- $statement->{type} = 'attr';
- return $statement;
- }
- }
- # Parse a statement for program unit header. Returns a list containing the type,
- # the symbol and the signature tokens of the program unit.
- sub _process_prog_unit {
- my ($string) = @_;
- my ($type, $symbol, @args) = (q{}, q{});
- # Is it a blockdata, module or program?
- ($type, $symbol) = $string =~ $RE{UNIT_BASE};
- if ($type) {
- $type = lc($type);
- $type =~ s{\s*}{}gmsx;
- return ($type, $symbol);
- }
- # Remove the attribute and type declaration of a procedure
- $string =~ s/$RE{UNIT_ATTR}/$1/;
- my ($match) = $string =~ $RE{UNIT_SPEC};
- if ($match) {
- $string = $match;
- extract_bracketed($string);
- }
- # Is it a function or subroutine?
- ($type, $symbol) = lc($string) =~ $RE{UNIT_CALL};
- if (!$type) {
- return;
- }
- my $extracted = extract_bracketed($string, q{()}, qr{[^(]*}msx);
- # Get signature tokens from SUBROUTINE/FUNCTION
- if ($extracted) {
- $extracted =~ s{\s}{}gmsx;
- @args = split(q{,}, substr($extracted, 1, length($extracted) - 2));
- if ($type eq 'function') {
- my $result = extract_bracketed($string, q{()}, qr{[^(]*}msx);
- if ($result) {
- $result =~ s{\A\(\s*(.*?)\s*\)\z}{$1}msx; # remove braces
- push(@args, $result);
- }
- else {
- push(@args, $symbol);
- }
- }
- }
- return (lc($type), lc($symbol), map { lc($_) } @args);
- }
- # Reduces the list of statements to contain only the interface block.
- sub _reduce_to_interface {
- my ($ctx) = @_;
- my (%token_set, @interface_statements);
- STATEMENT:
- for my $statement (reverse(@{$ctx->{statements}})) {
- if ($statement->{type} eq 'end'
- && grep { $_ eq $statement->{name} } qw{subroutine function})
- {
- push(@interface_statements, $statement);
- %token_set
- = %{$ctx->{signature_token_set_of}{$statement->{symbol}}};
- next STATEMENT;
- }
- if ($statement->{type} eq 'signature'
- && grep { $_ eq $statement->{name} } qw{subroutine function})
- {
- push(@interface_statements, $statement);
- %token_set = ();
- next STATEMENT;
- }
- if ($statement->{type} eq 'use') {
- my ($head, $tail)
- = split(qr{\s*:\s*}msx, lc($statement->{value}), 2);
- if ($tail) {
- my @imports = map { [split(qr{\s*=>\s*}msx, $_, 2)] }
- split(qr{\s*,\s*}msx, $tail);
- my @useful_imports
- = grep { exists($token_set{$_->[0]}) } @imports;
- if (!@useful_imports) {
- next STATEMENT;
- }
- if (@imports != @useful_imports) {
- my @token_strings
- = map { $_->[0] . ($_->[1] ? ' => ' . $_->[1] : q{}) }
- @useful_imports;
- my ($last, @rest) = reverse(@token_strings);
- my @token_lines
- = (reverse(map { $_ . q{,&} } @rest), $last);
- push(
- @interface_statements,
- { lines => [
- sprintf("%s:&\n", $head),
- (map { sprintf(" & %s\n", $_) } @token_lines),
- ]
- },
- );
- next STATEMENT;
- }
- }
- push(@interface_statements, $statement);
- next STATEMENT;
- }
- if ($statement->{type} eq 'attr') {
- my ($spec, @tokens) = ($statement->{value} =~ /$RE{NAME_COMP}/g);
- if (grep { exists($token_set{$_}) } @tokens) {
- for my $token (@tokens) {
- $token_set{$token} = 1;
- }
- push(@interface_statements, $statement);
- next STATEMENT;
- }
- }
- if ($statement->{type} eq 'type') {
- my ($variable_string, $spec_string)
- = reverse(split('::', lc($statement->{value}), 2));
- if ($spec_string) {
- $spec_string =~ s{$RE{NAME_LEAD}}{}msx;
- }
- else {
- # The first expression in the statement is the type + attrib
- $variable_string =~ s{$RE{NAME_LEAD}}{}msx;
- $spec_string = extract_bracketed($variable_string, '()',
- qr{[\s\*]*}msx);
- }
- # Useful tokens are those that comes after a comma
- my $tail = q{,} . lc($variable_string);
- my @tokens;
- while ($tail) {
- if ($tail =~ qr{\A\s*['"]}msx) {
- extract_delimited($tail, q{'"}, qr{\A[^'"]*}msx, q{});
- }
- elsif ($tail =~ qr{\A\s*\(}msx) {
- extract_bracketed($tail, '()', qr{\A[^(]*}msx);
- }
- else {
- my $token;
- ($token, $tail) = $tail =~ $RE{NAME_LIST};
- if ($token && $token_set{$token}) {
- @tokens = ($variable_string =~ /$RE{NAME_COMP}/g);
- $tail = q{};
- }
- }
- }
- if (@tokens && $spec_string) {
- my @spec_tokens = (lc($spec_string) =~ /$RE{NAME_COMP}/g);
- push(
- @tokens,
- ( grep { !exists($TYPE_DECL_KEYWORD_SET{$_}) }
- @spec_tokens
- ),
- );
- }
- if (grep { exists($token_set{$_}) } @tokens) {
- for my $token (@tokens) {
- $token_set{$token} = 1;
- }
- push(@interface_statements, $statement);
- next STATEMENT;
- }
- }
- }
- if (!@interface_statements) {
- return [];
- }
- [ {lines => ["interface\n"]},
- reverse(@interface_statements),
- {lines => ["end interface\n"]},
- ];
- }
- # Processes and returns the line of the statement.
- sub _present_line {
- my ($statement) = @_;
- map {
- s{\s+}{ }gmsx; # collapse multiple spaces
- s{\s+\z}{\n}msx; # remove trailing spaces
- $_;
- } @{$statement->{lines}};
- }
- # ------------------------------------------------------------------------------
- 1;
- __END__
- =head1 NAME
- Fcm::Build::Fortran
- =head1 SYNOPSIS
- use Fcm::Build::Fortran;
- my $fortran_util = Fcm::Build::Fortran->new();
- open(my($handle), '<', $path_to_a_fortran_source_file);
- print($fortran_util->extract_interface($handle)); # prints interface
- close($handle);
- =head1 DESCRIPTION
- A class to analyse Fortran source. Currently, it has a single method to extract
- the calling interfaces of top level subroutines and functions in a Fortran
- source.
- =head1 METHODS
- =over 4
- =item $class->new()
- Creates and returns an instance of this class.
- =item $instance->extract_interface($handle)
- Extracts the calling interfaces of top level subroutines and functions in a
- Fortran source that can be read from $handle. Returns an interface block as a
- list of lines.
- =back
- =head1 ACKNOWLEDGEMENT
- This module is inspired by the logic developed by the European Centre
- for Medium-Range Weather Forecasts (ECMWF).
- =head1 COPYRIGHT
- (C) Crown copyright Met Office. All rights reserved.
- =cut
|