Fortran.pm 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. # ------------------------------------------------------------------------------
  2. # (C) Crown copyright Met Office. All rights reserved.
  3. # For further details please refer to the file COPYRIGHT.txt
  4. # which you should have received as part of this distribution.
  5. # ------------------------------------------------------------------------------
  6. use strict;
  7. use warnings;
  8. # ------------------------------------------------------------------------------
  9. package Fcm::Build::Fortran;
  10. use Text::Balanced qw{extract_bracketed extract_delimited};
  11. # Actions of this class
  12. my %ACTION_OF = (extract_interface => \&_extract_interface);
  13. # Regular expressions
  14. # Matches a variable attribute
  15. my $RE_ATTR = qr{
  16. allocatable|dimension|external|intent|optional|parameter|pointer|save|target
  17. }imsx;
  18. # Matches a name
  19. my $RE_NAME = qr{[A-Za-z]\w*}imsx;
  20. # Matches a specification type
  21. my $RE_SPEC = qr{
  22. character|complex|double\s*precision|integer|logical|real|type
  23. }imsx;
  24. # Matches the identifier of a program unit that does not have arguments
  25. my $RE_UNIT_BASE = qr{block\s*data|module|program}imsx;
  26. # Matches the identifier of a program unit that has arguments
  27. my $RE_UNIT_CALL = qr{function|subroutine}imsx;
  28. # Matches the identifier of any program unit
  29. my $RE_UNIT = qr{$RE_UNIT_BASE|$RE_UNIT_CALL}msx;
  30. my %RE = (
  31. # A comment line
  32. COMMENT => qr{\A\s*(?:!|\z)}msx,
  33. # A trailing comment, capture the expression before the comment
  34. COMMENT_END => qr{\A([^'"]*?)\s*!.*\z}msx,
  35. # A contination marker, capture the expression before the marker
  36. CONT => qr{\A(.*)&\s*\z}msx,
  37. # A contination marker at the beginning of a line, capture the marker and
  38. # the expression after the marker
  39. CONT_LEAD => qr{\A(\s*&)(.*)\z}msx,
  40. # Capture a variable identifier, removing any type component expression
  41. NAME_COMP => qr{\b($RE_NAME)(?:\s*\%\s*$RE_NAME)*\b}msx,
  42. # Matches the first identifier in a line
  43. NAME_LEAD => qr{\A\s*$RE_NAME\s*}msx,
  44. # Captures a name identifier after a comma, and the expression after
  45. NAME_LIST => qr{\A(?:.*?)\s*,\s*($RE_NAME)\b(.*)\z}msx,
  46. # Captures the next quote character
  47. QUOTE => qr{\A[^'"]*(['"])}msx,
  48. # Matches an attribute declaration
  49. TYPE_ATTR => qr{\A\s*($RE_ATTR)\b}msx,
  50. # Matches a type declaration
  51. TYPE_SPEC => qr{\A\s*($RE_SPEC)\b}msx,
  52. # Captures the expression after one or more program unit attributes
  53. UNIT_ATTR => qr{\A\s*(?:(?:elemental|recursive|pure)\s+)+(.*)\z}imsx,
  54. # Captures the identifier and the symbol of a program unit with no arguments
  55. UNIT_BASE => qr{\A\s*($RE_UNIT_BASE)\s+($RE_NAME)\s*\z}imsx,
  56. # Captures the identifier and the symbol of a program unit with arguments
  57. UNIT_CALL => qr{\A\s*($RE_UNIT_CALL)\s+($RE_NAME)\b}imsx,
  58. # Captures the end of a program unit, its identifier and its symbol
  59. UNIT_END => qr{\A\s*(end)(?:\s+($RE_NAME)(?:\s+($RE_NAME))?)?\s*\z}imsx,
  60. # Captures the expression after a program unit type specification
  61. UNIT_SPEC => qr{\A\s*$RE_SPEC\b(.*)\z}imsx,
  62. );
  63. # Keywords in type declaration statements
  64. my %TYPE_DECL_KEYWORD_SET = map { ($_, 1) } qw{
  65. allocatable
  66. dimension
  67. in
  68. inout
  69. intent
  70. kind
  71. len
  72. optional
  73. out
  74. parameter
  75. pointer
  76. save
  77. target
  78. };
  79. # Creates and returns an instance of this class.
  80. sub new {
  81. my ($class) = @_;
  82. bless(
  83. sub {
  84. my $key = shift();
  85. if (!exists($ACTION_OF{$key})) {
  86. return;
  87. }
  88. $ACTION_OF{$key}->(@_);
  89. },
  90. $class,
  91. );
  92. }
  93. # Methods.
  94. for my $key (keys(%ACTION_OF)) {
  95. no strict qw{refs};
  96. *{$key} = sub { my $self = shift(); $self->($key, @_) };
  97. }
  98. # Extracts the calling interfaces of top level subroutines and functions from
  99. # the $handle for reading Fortran sources.
  100. sub _extract_interface {
  101. my ($handle) = @_;
  102. map { _present_line($_) } @{_reduce_to_interface(_load($handle))};
  103. }
  104. # Reads $handle for the next Fortran statement, handling continuations.
  105. sub _load {
  106. my ($handle) = @_;
  107. my $ctx = {signature_token_set_of => {}, statements => []};
  108. my $state = {
  109. in_contains => undef, # in a "contains" section of a program unit
  110. in_interface => undef, # in an "interface" block
  111. in_quote => undef, # in a multi-line quote
  112. stack => [], # program unit stack
  113. };
  114. my $NEW_STATEMENT = sub {
  115. { name => q{}, # statement name, e.g. function, integer, ...
  116. lines => [], # original lines in the statement
  117. line_number => 0, # line number (start) in the original source
  118. symbol => q{}, # name of a program unit (signature, end)
  119. type => q{}, # e.g. signature, use, type, attr, end
  120. value => q{}, # the actual value of the statement
  121. };
  122. };
  123. my $statement;
  124. LINE:
  125. while (my $line = readline($handle)) {
  126. if (!defined($statement)) {
  127. $statement = $NEW_STATEMENT->();
  128. }
  129. my $value = $line;
  130. chomp($value);
  131. # Pre-processor directives and continuation
  132. if (!$statement->{line_number} && index($value, '#') == 0) {
  133. $statement->{line_number} = $.;
  134. $statement->{name} = 'cpp';
  135. }
  136. if ($statement->{name} eq 'cpp') {
  137. push(@{$statement->{lines}}, $line);
  138. $statement->{value} .= $value;
  139. if (rindex($value, '\\') != length($value) - 1) {
  140. $statement = undef;
  141. }
  142. next LINE;
  143. }
  144. # Normal Fortran
  145. if ($value =~ $RE{COMMENT}) {
  146. next LINE;
  147. }
  148. if (!$statement->{line_number}) {
  149. $statement->{line_number} = $.;
  150. }
  151. my ($cont_head, $cont_tail);
  152. if ($statement->{line_number} != $.) { # is a continuation
  153. ($cont_head, $cont_tail) = $value =~ $RE{CONT_LEAD};
  154. if ($cont_head) {
  155. $value = $cont_tail;
  156. }
  157. }
  158. # Correctly handle ! and & in quotes
  159. my ($head, $tail) = (q{}, $value);
  160. if ($state->{in_quote} && index($value, $state->{in_quote}) >= 0) {
  161. my $index = index($value, $state->{in_quote});
  162. $head = substr($value, 0, $index + 1);
  163. $tail
  164. = length($value) > $index + 1
  165. ? substr($value, $index + 2)
  166. : q{};
  167. $state->{in_quote} = undef;
  168. }
  169. if (!$state->{in_quote}) {
  170. while ($tail) {
  171. if (index($tail, q{!}) >= 0) {
  172. if (!($tail =~ s/$RE{COMMENT_END}/$1/)) {
  173. ($head, $tail, $state->{in_quote})
  174. = _load_extract_quote($head, $tail);
  175. }
  176. }
  177. else {
  178. while (index($tail, q{'}) > 0
  179. || index($tail, q{"}) > 0)
  180. {
  181. ($head, $tail, $state->{in_quote})
  182. = _load_extract_quote($head, $tail);
  183. }
  184. $head .= $tail;
  185. $tail = q{};
  186. }
  187. }
  188. }
  189. $cont_head ||= q{};
  190. push(@{$statement->{lines}}, $cont_head . $head . $tail . "\n");
  191. $statement->{value} .= $head . $tail;
  192. # Process a statement only if it is marked with a continuation
  193. if (!($statement->{value} =~ s/$RE{CONT}/$1/)) {
  194. $statement->{value} =~ s{\s+\z}{}msx;
  195. if (_process($statement, $ctx, $state)) {
  196. push(@{$ctx->{statements}}, $statement);
  197. }
  198. $statement = undef;
  199. }
  200. }
  201. return $ctx;
  202. }
  203. # Helper, removes a quoted string from $tail.
  204. sub _load_extract_quote {
  205. my ($head, $tail) = @_;
  206. my ($extracted, $remainder, $prefix)
  207. = extract_delimited($tail, q{'"}, qr{[^'"]*}msx, q{});
  208. if ($extracted) {
  209. return ($head . $prefix . $extracted, $remainder);
  210. }
  211. else {
  212. my ($quote) = $tail =~ $RE{QUOTE};
  213. return ($head . $tail, q{}, $quote);
  214. }
  215. }
  216. # Study statements and put attributes into array $statements
  217. sub _process {
  218. my ($statement, $ctx, $state) = @_;
  219. my $name;
  220. # End Interface
  221. if ($state->{in_interface}) {
  222. if ($statement->{value} =~ qr{\A\s*end\s*interface\b}imsx) {
  223. $state->{in_interface} = 0;
  224. }
  225. return;
  226. }
  227. # End Program Unit
  228. if (@{$state->{stack}} && $statement->{value} =~ qr{\A\s*end\b}imsx) {
  229. my ($end, $type, $symbol) = lc($statement->{value}) =~ $RE{UNIT_END};
  230. if (!$end) {
  231. return;
  232. }
  233. my ($top_type, $top_symbol) = @{$state->{stack}->[-1]};
  234. if (!$type
  235. || $top_type eq $type && (!$symbol || $top_symbol eq $symbol))
  236. {
  237. pop(@{$state->{stack}});
  238. if ($state->{in_contains} && !@{$state->{stack}}) {
  239. $state->{in_contains} = 0;
  240. }
  241. if (!$state->{in_contains}) {
  242. $statement->{name} = $top_type;
  243. $statement->{symbol} = $top_symbol;
  244. $statement->{type} = 'end';
  245. return $statement;
  246. }
  247. }
  248. return;
  249. }
  250. # Interface/Contains
  251. ($name) = $statement->{value} =~ qr{\A\s*(contains|interface)\b}imsx;
  252. if ($name) {
  253. $state->{'in_' . lc($name)} = 1;
  254. return;
  255. }
  256. # Program Unit
  257. my ($type, $symbol, @tokens) = _process_prog_unit($statement->{value});
  258. if ($type) {
  259. push(@{$state->{stack}}, [$type, $symbol]);
  260. if ($state->{in_contains}) {
  261. return;
  262. }
  263. $statement->{name} = lc($type);
  264. $statement->{type} = 'signature';
  265. $statement->{symbol} = lc($symbol);
  266. $ctx->{signature_token_set_of}{$symbol}
  267. = {map { (lc($_) => 1) } @tokens};
  268. return $statement;
  269. }
  270. if ($state->{in_contains}) {
  271. return;
  272. }
  273. # Use
  274. if ($statement->{value} =~ qr{\A\s*(use)\b}imsx) {
  275. $statement->{name} = 'use';
  276. $statement->{type} = 'use';
  277. return $statement;
  278. }
  279. # Type Declarations
  280. ($name) = $statement->{value} =~ $RE{TYPE_SPEC};
  281. if ($name) {
  282. $name =~ s{\s}{}gmsx;
  283. $statement->{name} = lc($name);
  284. $statement->{type} = 'type';
  285. return $statement;
  286. }
  287. # Attribute Statements
  288. ($name) = $statement->{value} =~ $RE{TYPE_ATTR};
  289. if ($name) {
  290. $statement->{name} = $name;
  291. $statement->{type} = 'attr';
  292. return $statement;
  293. }
  294. }
  295. # Parse a statement for program unit header. Returns a list containing the type,
  296. # the symbol and the signature tokens of the program unit.
  297. sub _process_prog_unit {
  298. my ($string) = @_;
  299. my ($type, $symbol, @args) = (q{}, q{});
  300. # Is it a blockdata, module or program?
  301. ($type, $symbol) = $string =~ $RE{UNIT_BASE};
  302. if ($type) {
  303. $type = lc($type);
  304. $type =~ s{\s*}{}gmsx;
  305. return ($type, $symbol);
  306. }
  307. # Remove the attribute and type declaration of a procedure
  308. $string =~ s/$RE{UNIT_ATTR}/$1/;
  309. my ($match) = $string =~ $RE{UNIT_SPEC};
  310. if ($match) {
  311. $string = $match;
  312. extract_bracketed($string);
  313. }
  314. # Is it a function or subroutine?
  315. ($type, $symbol) = lc($string) =~ $RE{UNIT_CALL};
  316. if (!$type) {
  317. return;
  318. }
  319. my $extracted = extract_bracketed($string, q{()}, qr{[^(]*}msx);
  320. # Get signature tokens from SUBROUTINE/FUNCTION
  321. if ($extracted) {
  322. $extracted =~ s{\s}{}gmsx;
  323. @args = split(q{,}, substr($extracted, 1, length($extracted) - 2));
  324. if ($type eq 'function') {
  325. my $result = extract_bracketed($string, q{()}, qr{[^(]*}msx);
  326. if ($result) {
  327. $result =~ s{\A\(\s*(.*?)\s*\)\z}{$1}msx; # remove braces
  328. push(@args, $result);
  329. }
  330. else {
  331. push(@args, $symbol);
  332. }
  333. }
  334. }
  335. return (lc($type), lc($symbol), map { lc($_) } @args);
  336. }
  337. # Reduces the list of statements to contain only the interface block.
  338. sub _reduce_to_interface {
  339. my ($ctx) = @_;
  340. my (%token_set, @interface_statements);
  341. STATEMENT:
  342. for my $statement (reverse(@{$ctx->{statements}})) {
  343. if ($statement->{type} eq 'end'
  344. && grep { $_ eq $statement->{name} } qw{subroutine function})
  345. {
  346. push(@interface_statements, $statement);
  347. %token_set
  348. = %{$ctx->{signature_token_set_of}{$statement->{symbol}}};
  349. next STATEMENT;
  350. }
  351. if ($statement->{type} eq 'signature'
  352. && grep { $_ eq $statement->{name} } qw{subroutine function})
  353. {
  354. push(@interface_statements, $statement);
  355. %token_set = ();
  356. next STATEMENT;
  357. }
  358. if ($statement->{type} eq 'use') {
  359. my ($head, $tail)
  360. = split(qr{\s*:\s*}msx, lc($statement->{value}), 2);
  361. if ($tail) {
  362. my @imports = map { [split(qr{\s*=>\s*}msx, $_, 2)] }
  363. split(qr{\s*,\s*}msx, $tail);
  364. my @useful_imports
  365. = grep { exists($token_set{$_->[0]}) } @imports;
  366. if (!@useful_imports) {
  367. next STATEMENT;
  368. }
  369. if (@imports != @useful_imports) {
  370. my @token_strings
  371. = map { $_->[0] . ($_->[1] ? ' => ' . $_->[1] : q{}) }
  372. @useful_imports;
  373. my ($last, @rest) = reverse(@token_strings);
  374. my @token_lines
  375. = (reverse(map { $_ . q{,&} } @rest), $last);
  376. push(
  377. @interface_statements,
  378. { lines => [
  379. sprintf("%s:&\n", $head),
  380. (map { sprintf(" & %s\n", $_) } @token_lines),
  381. ]
  382. },
  383. );
  384. next STATEMENT;
  385. }
  386. }
  387. push(@interface_statements, $statement);
  388. next STATEMENT;
  389. }
  390. if ($statement->{type} eq 'attr') {
  391. my ($spec, @tokens) = ($statement->{value} =~ /$RE{NAME_COMP}/g);
  392. if (grep { exists($token_set{$_}) } @tokens) {
  393. for my $token (@tokens) {
  394. $token_set{$token} = 1;
  395. }
  396. push(@interface_statements, $statement);
  397. next STATEMENT;
  398. }
  399. }
  400. if ($statement->{type} eq 'type') {
  401. my ($variable_string, $spec_string)
  402. = reverse(split('::', lc($statement->{value}), 2));
  403. if ($spec_string) {
  404. $spec_string =~ s{$RE{NAME_LEAD}}{}msx;
  405. }
  406. else {
  407. # The first expression in the statement is the type + attrib
  408. $variable_string =~ s{$RE{NAME_LEAD}}{}msx;
  409. $spec_string = extract_bracketed($variable_string, '()',
  410. qr{[\s\*]*}msx);
  411. }
  412. # Useful tokens are those that comes after a comma
  413. my $tail = q{,} . lc($variable_string);
  414. my @tokens;
  415. while ($tail) {
  416. if ($tail =~ qr{\A\s*['"]}msx) {
  417. extract_delimited($tail, q{'"}, qr{\A[^'"]*}msx, q{});
  418. }
  419. elsif ($tail =~ qr{\A\s*\(}msx) {
  420. extract_bracketed($tail, '()', qr{\A[^(]*}msx);
  421. }
  422. else {
  423. my $token;
  424. ($token, $tail) = $tail =~ $RE{NAME_LIST};
  425. if ($token && $token_set{$token}) {
  426. @tokens = ($variable_string =~ /$RE{NAME_COMP}/g);
  427. $tail = q{};
  428. }
  429. }
  430. }
  431. if (@tokens && $spec_string) {
  432. my @spec_tokens = (lc($spec_string) =~ /$RE{NAME_COMP}/g);
  433. push(
  434. @tokens,
  435. ( grep { !exists($TYPE_DECL_KEYWORD_SET{$_}) }
  436. @spec_tokens
  437. ),
  438. );
  439. }
  440. if (grep { exists($token_set{$_}) } @tokens) {
  441. for my $token (@tokens) {
  442. $token_set{$token} = 1;
  443. }
  444. push(@interface_statements, $statement);
  445. next STATEMENT;
  446. }
  447. }
  448. }
  449. if (!@interface_statements) {
  450. return [];
  451. }
  452. [ {lines => ["interface\n"]},
  453. reverse(@interface_statements),
  454. {lines => ["end interface\n"]},
  455. ];
  456. }
  457. # Processes and returns the line of the statement.
  458. sub _present_line {
  459. my ($statement) = @_;
  460. map {
  461. s{\s+}{ }gmsx; # collapse multiple spaces
  462. s{\s+\z}{\n}msx; # remove trailing spaces
  463. $_;
  464. } @{$statement->{lines}};
  465. }
  466. # ------------------------------------------------------------------------------
  467. 1;
  468. __END__
  469. =head1 NAME
  470. Fcm::Build::Fortran
  471. =head1 SYNOPSIS
  472. use Fcm::Build::Fortran;
  473. my $fortran_util = Fcm::Build::Fortran->new();
  474. open(my($handle), '<', $path_to_a_fortran_source_file);
  475. print($fortran_util->extract_interface($handle)); # prints interface
  476. close($handle);
  477. =head1 DESCRIPTION
  478. A class to analyse Fortran source. Currently, it has a single method to extract
  479. the calling interfaces of top level subroutines and functions in a Fortran
  480. source.
  481. =head1 METHODS
  482. =over 4
  483. =item $class->new()
  484. Creates and returns an instance of this class.
  485. =item $instance->extract_interface($handle)
  486. Extracts the calling interfaces of top level subroutines and functions in a
  487. Fortran source that can be read from $handle. Returns an interface block as a
  488. list of lines.
  489. =back
  490. =head1 ACKNOWLEDGEMENT
  491. This module is inspired by the logic developed by the European Centre
  492. for Medium-Range Weather Forecasts (ECMWF).
  493. =head1 COPYRIGHT
  494. (C) Crown copyright Met Office. All rights reserved.
  495. =cut