(**************************************************************************)
(*                                                                        *)
(*  Program to work out module dependencies in a Modula-2 program.        *)
(*  Copyright (C) 2016   Peter Moylan                                     *)
(*                                                                        *)
(*  This program is free software: you can redistribute it and/or modify  *)
(*  it under the terms of the GNU General Public License as published by  *)
(*  the Free Software Foundation, either version 3 of the License, or     *)
(*  (at your option) any later version.                                   *)
(*                                                                        *)
(*  This program is distributed in the hope that it will be useful,       *)
(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *)
(*  GNU General Public License for more details.                          *)
(*                                                                        *)
(*  You should have received a copy of the GNU General Public License     *)
(*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *)
(*                                                                        *)
(*  To contact author:   http://www.pmoylan.org   peter@pmoylan.org       *)
(*                                                                        *)
(**************************************************************************)

IMPLEMENTATION MODULE Scanner;

        (********************************************************)
        (*                                                      *)
        (*              Simplified lexical analyser             *)
        (*                                                      *)
        (*  Programmer:         P. Moylan                       *)
        (*  Started:            14 January 2000                 *)
        (*  Last edited:        10 August 2016                  *)
        (*  Status:             OK                              *)
        (*                                                      *)
        (*    Remark: For checking imports we need to parse     *)
        (*    only a small subset of the language, and one      *)
        (*    consequence of this is that we can get away       *)
        (*    with a very crude lexical analyser.  We need      *)
        (*    only detect whitespace, comments, alphanumeric    *)
        (*    strings starting with a letter; everything else   *)
        (*    is interpreted as a one-character token.          *)
        (*                                                      *)
        (********************************************************)

IMPORT TextIO, Strings, IOChan, IOConsts;

(************************************************************************)

CONST
    CharsPerLine = 1024;
    Nul = CHR(0);  Tab = CHR(9);  CtrlZ = CHR(26);

TYPE
    LineIndex = [0..CharsPerLine-1];
    CharSet = SET OF CHAR;

VAR
    (* No multithreading, so we can afford to use a global "current line". *)

    LineBuffer: ARRAY LineIndex OF CHAR;
    NextPos: CARDINAL;
    fileid: IOChan.ChanId;

CONST
    Letters = CharSet{'A'..'Z', 'a'..'z'};
    IdChars = Letters + CharSet {'0'..'9', '_', '$'};

(************************************************************************)

PROCEDURE StripSpaces (VAR (*INOUT*) string: ARRAY OF CHAR);

    (* Removes leading and trailing spaces from string. *)

    VAR k: CARDINAL;

    BEGIN
        k := Strings.Length (string);
        WHILE (k > 0) AND ((string[k-1] = ' ') OR (string[k-1] = Tab)) DO
            DEC (k);
        END (*WHILE*);
        string[k] := Nul;
        k := 0;
        WHILE (string[k] = ' ') OR (string[k] = Tab) DO
            INC (k);
        END (*WHILE*);
        IF k > 0 THEN
            Strings.Delete (string, 0, k);
        END (*IF*);
    END StripSpaces;

(************************************************************************)

PROCEDURE StartScan (cid: IOChan.ChanId);

    (* Resets the scanner to work with a new file. *)

    BEGIN
        LineBuffer := "";  NextPos := 0;
        fileid := cid;
    END StartScan;

(************************************************************************)

PROCEDURE GetNextLine;

    (* Reads a new line into LineBuffer, resets NextPos. *)

    VAR status: IOConsts.ReadResults;

    BEGIN
        TextIO.ReadRestLine (fileid, LineBuffer);
        NextPos := 0;
        status := IOChan.ReadResult(fileid);
        IF (status <> IOConsts.allRight) AND (status <> IOConsts.endOfLine) THEN
            LineBuffer[0] := CtrlZ;
        END (*IF*);
        TextIO.SkipLine (fileid);
    END GetNextLine;

(************************************************************************)

PROCEDURE SkipComments;

    (* Skips over comments, including nested comments. *)

    VAR ch: CHAR;

    BEGIN
        INC (NextPos, 2);
        LOOP
            ch := LineBuffer[NextPos];  INC(NextPos);
            IF ch = Nul THEN
                GetNextLine;
                ch := LineBuffer[0];
                IF ch = CtrlZ THEN
                    EXIT (*LOOP*);
                END (*IF*);
                NextPos := 1;
            END (*IF*);
            IF (ch = '(') AND (LineBuffer[NextPos] = '*') THEN
                SkipComments;
            ELSIF (ch = '*') AND (LineBuffer[NextPos] = ')') THEN
                INC (NextPos);
                EXIT (*LOOP*);
            END (*IF*);
        END (*LOOP*);
    END SkipComments;

(************************************************************************)

PROCEDURE SkipBlanks;  FORWARD;

(************************************************************************)

PROCEDURE SkipPast (str: ARRAY OF CHAR);

    (* Skips to beyond the specified string. *)

    VAR pos: CARDINAL;  found: BOOLEAN;

    BEGIN
        found := FALSE;
        REPEAT
            IF LineBuffer[NextPos] = CtrlZ THEN
                found := TRUE;
            END (*IF*);
            Strings.FindNext (str, LineBuffer, NextPos, found, pos);
            IF found THEN
                NextPos := pos + LENGTH(str);
            ELSE
                GetNextLine;
            END (*IF*);
        UNTIL found;
    END SkipPast;

(************************************************************************)

PROCEDURE SkipConditional;

    (* Skips an IF condition in a <* ... *> comment. These can be       *)
    (* nested. On entry we have already scanned past the <* and the     *)
    (* keyword IF.  On exit we have moved past the terminating END      *)
    (* and its closing *>.                                              *)

    VAR token: ARRAY [0..63] OF CHAR;
        alphanumeric: BOOLEAN;

    BEGIN
        LOOP
            SkipPast ("<*");  Scan (token, alphanumeric);
            IF token[0] = Nul THEN
                EXIT (*LOOP*);
            ELSIF alphanumeric THEN
                IF Strings.Equal (token, "IF") THEN
                    SkipConditional;
                ELSIF Strings.Equal (token, "END") THEN
                    SkipPast ("*>");
                    EXIT (*LOOP*);
                END (*IF*);
            END (*IF*);
        END (*LOOP*);
    END SkipConditional;

(************************************************************************)

PROCEDURE SkipPseudoComments;

    (* Skips <* ... *> comments. These cannot be nested, but in the     *)
    (* special case of an IF we have to skip all the way to the         *)
    (* corresponding END.                                               *)

    VAR token: ARRAY [0..63] OF CHAR;
        alphanumeric: BOOLEAN;

    BEGIN
        INC (NextPos, 2);  Scan (token, alphanumeric);
        IF alphanumeric AND Strings.Equal (token, "IF") THEN
            SkipConditional;
        ELSIF (token[0] = '*') AND (LineBuffer[NextPos] = '>') THEN
            (* special case: empty pseudo-comment. *)
            INC (NextPos);
        ELSE
            SkipPast ("*>");
        END (*IF*);
    END SkipPseudoComments;

(************************************************************************)

PROCEDURE SkipBlanks;

    (* Skips over whitespace and comments. *)

    VAR ch: CHAR;

    BEGIN
        LOOP
            ch := LineBuffer[NextPos];
            WHILE ch = Nul DO
                GetNextLine;
                ch := LineBuffer[0];
            END (*WHILE*);
            IF ch = CtrlZ THEN
                EXIT (*LOOP*);
            END (*IF*);
            IF (ch = ' ') OR (ch = Tab) THEN
                INC (NextPos);
            ELSIF (ch = '(') AND (LineBuffer[NextPos+1] = '*') THEN
                SkipComments;
            ELSIF (ch = '<') AND (LineBuffer[NextPos+1] = '*') THEN
                SkipPseudoComments;
            ELSE
                EXIT (*LOOP*);
            END (*IF*);
        END (*LOOP*);
    END SkipBlanks;

(************************************************************************)

PROCEDURE Scan (VAR (*OUT*) token: ARRAY OF CHAR;
                VAR (*OUT*) alphanumeric: BOOLEAN);

    (* Returns the next input token, returns alphanumeric=TRUE iff      *)
    (* this is an identifier or a keyword.                              *)

    VAR k: CARDINAL;

    BEGIN
        SkipBlanks;
        IF LineBuffer[NextPos] IN Letters THEN
            alphanumeric := TRUE;
            k := 0;
            REPEAT
                token[k] := LineBuffer[NextPos];
                INC (k);  INC (NextPos);
            UNTIL (k > HIGH(token)) OR NOT (LineBuffer[NextPos] IN IdChars);
            IF k <= HIGH(token) THEN
                token[k] := Nul;
            END (*IF*);
        ELSE
            alphanumeric := FALSE;
            token[0] := LineBuffer[NextPos];
            IF token[0] = CtrlZ THEN
                token[0] := Nul;
            ELSE
                INC (NextPos);
            END (*IF*);
            token[1] := Nul;
        END (*IF*);
    END Scan;

(************************************************************************)

END Scanner.

