From 078ef7f529356c86e43db5525b8dc973c12375a1 Mon Sep 17 00:00:00 2001 From: GabrielChiquetto Date: Thu, 25 Apr 2019 00:54:42 -0300 Subject: [PATCH 01/22] Add token class implementation. Add RELOP attribute to TokenType enum. Add Keywords enum. --- src/java/Keywords.java | 34 ++++++++++++++++++++++++++++++++++ src/java/Token.java | 30 ++++++++++++++++++++++++++++++ src/java/TokenType.java | 2 ++ 3 files changed, 66 insertions(+) create mode 100644 src/java/Keywords.java diff --git a/src/java/Keywords.java b/src/java/Keywords.java new file mode 100644 index 0000000..b3eadc8 --- /dev/null +++ b/src/java/Keywords.java @@ -0,0 +1,34 @@ +/** + * Enum which defines the language reserved keywords. + * @author Leonardo Rocha, Gabriel Chiquetto + */ +public enum Keywords{ + /** Used for class declarations. */ + CLASS, + /** Used for class inheritance. */ + EXTENDS, + /** Primitive type declaration "integer". */ + INT, + /** String reference declaration. */ + STRING, + /** Terminates execution of current scope. */ + BREAK, + /** Show a message in the standard output. */ + PRINT, + /** Read from standard input. */ + READ, + /** It causes program control to transfer back to the caller of the method. */ + RETURN, + /** References the immediate parent of a class. */ + SUPER, + /** Conditional statement that executes a block when the expression is true. */ + IF, + /** Executes when the Boolean expression of the matching "if" is false. */ + ELSE, + /** Iterative Loop control structure. */ + FOR, + /** Memory allocation for a new object. */ + NEW, + /** Method called when the object is allocated. */ + CONSTRUCTOR +} diff --git a/src/java/Token.java b/src/java/Token.java index c80884b..c557d5a 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -1,3 +1,33 @@ +/** + * This class represents every recognized single unit(token) of the source code. + * A token is a Pair and the attribute is optional. + */ public class Token { + /** Abstract symbol that represents a type of lexical unit. */ + private TokenType tokenType; + /** Optional value of a token. */ + private TokenType attribute; + /** Sequence of characters that represents the token in the source code. */ + private String lexeme; + /** + * Initializes a token with the main fields. + * @param tokenType type of the generated token. + * @param attribute value of the token. + * @param lexeme sequence of characacters that represents the token in the source code. + */ + public Token(TokenType tokenType, TokenType attribute, String lexeme){ + this.tokenType = tokenType; + this.attribute = attribute; + this.lexeme = lexeme; + } + + /** + * Initializes a token without an attribute. + * @param tokenType type of the generated token. + * @param lexeme sequence of characacters that represents the token in the source code. + */ + public Token(TokenType tokenType, String lexeme){ + Token(tokenType, TokenType.UNDEF, lexeme); + } } diff --git a/src/java/TokenType.java b/src/java/TokenType.java index 52e0a8a..7c40c3f 100644 --- a/src/java/TokenType.java +++ b/src/java/TokenType.java @@ -9,6 +9,8 @@ public enum TokenType { IDENTIFIER, /** A sequence of decimal digits. */ INTEGER_LITERAL, + /** Relational operator attribute. */ + RELOP, /** Relational operator '<'. */ LESSTHAN, /** Relational operator '>'. */ From 0e921b5085de28e48f3776d9c8a0ae8f33d20fde Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Fri, 26 Apr 2019 16:15:07 -0400 Subject: [PATCH 02/22] Add @author in Token.java Add Preprocessor class to handle file preprocessing. Add Main class. Add test file. Add intelliJ run configuration. Fix compile errors. --- .idea/runConfigurations/XPPCompiler.xml | 11 +++++++++ bin/test.xpp | 7 ++++++ src/java/Main.java | 32 +++++++++++++++++++++++++ src/java/Preprocessor.java | 22 +++++++++++++++++ src/java/Token.java | 7 +++--- src/java/TokenType.java | 2 +- src/java/java.iml | 1 + 7 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 .idea/runConfigurations/XPPCompiler.xml create mode 100644 bin/test.xpp create mode 100644 src/java/Main.java create mode 100644 src/java/Preprocessor.java diff --git a/.idea/runConfigurations/XPPCompiler.xml b/.idea/runConfigurations/XPPCompiler.xml new file mode 100644 index 0000000..ac3582f --- /dev/null +++ b/.idea/runConfigurations/XPPCompiler.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/bin/test.xpp b/bin/test.xpp new file mode 100644 index 0000000..ebe8156 --- /dev/null +++ b/bin/test.xpp @@ -0,0 +1,7 @@ +//this is a simple comment to test + +int x = 10; +int y = 10; +/* + another comment to ignore +*/ \ No newline at end of file diff --git a/src/java/Main.java b/src/java/Main.java new file mode 100644 index 0000000..3ca6785 --- /dev/null +++ b/src/java/Main.java @@ -0,0 +1,32 @@ +import java.io.*; + +/** + * Main class. + * @author Leonardo-Rocha + */ +public class Main{ + + public static void main(String[] args){ + + File directory = new File("../../bin/test.xpp"); + System.out.println(directory.getAbsolutePath()); + Preprocessor preprocessor = new Preprocessor(); + + try{ + FileReader fileReader = new FileReader(directory); + + BufferedReader bufferedReader = new BufferedReader(fileReader); + + preprocessor.preProcess(bufferedReader); + + bufferedReader.close(); + } + catch(FileNotFoundException ex) { + System.out.println("Unable to open file '" + directory + "'"); + } + catch(IOException ex) { + System.out.println("Error reading file '" + directory + "'"); + } + } + +} \ No newline at end of file diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java new file mode 100644 index 0000000..04a1c5e --- /dev/null +++ b/src/java/Preprocessor.java @@ -0,0 +1,22 @@ +import java.io.BufferedReader; +import java.io.IOException; + +/** + * Class to Preprocess files. + * @author Leonardo-Rocha + */ +class Preprocessor{ + + /** + * PreProcess file removing comments. + * @param bufferedReader file reference. + */ + static void preProcess(BufferedReader bufferedReader) throws IOException { + String line; + + while((line = bufferedReader.readLine()) != null) { + System.out.println(line); + } + //TODO complete method. + } +} \ No newline at end of file diff --git a/src/java/Token.java b/src/java/Token.java index c557d5a..61a40bc 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -1,6 +1,7 @@ /** * This class represents every recognized single unit(token) of the source code. * A token is a Pair and the attribute is optional. + * @author Gabriel Chiquetto, Leonardo-Rocha */ public class Token { /** Abstract symbol that represents a type of lexical unit. */ @@ -14,7 +15,7 @@ public class Token { * Initializes a token with the main fields. * @param tokenType type of the generated token. * @param attribute value of the token. - * @param lexeme sequence of characacters that represents the token in the source code. + * @param lexeme sequence of characters that represents the token in the source code. */ public Token(TokenType tokenType, TokenType attribute, String lexeme){ this.tokenType = tokenType; @@ -25,9 +26,9 @@ public Token(TokenType tokenType, TokenType attribute, String lexeme){ /** * Initializes a token without an attribute. * @param tokenType type of the generated token. - * @param lexeme sequence of characacters that represents the token in the source code. + * @param lexeme sequence of characters that represents the token in the source code. */ public Token(TokenType tokenType, String lexeme){ - Token(tokenType, TokenType.UNDEF, lexeme); + this(tokenType, TokenType.UNDEF, lexeme); } } diff --git a/src/java/TokenType.java b/src/java/TokenType.java index 7c40c3f..aa8cf59 100644 --- a/src/java/TokenType.java +++ b/src/java/TokenType.java @@ -59,7 +59,7 @@ public enum TokenType { LINE_COMMENT, /** C-style multi-line comment start using "/*".*/ LBLOCK_COMMENT, - /** C-style multi-line comment end using "*/".*/ + /** C-style multi-line comment end using "* /".*/ RBLOCK_COMMENT, /** End of file indicator. */ EOF diff --git a/src/java/java.iml b/src/java/java.iml index c90834f..b710c21 100644 --- a/src/java/java.iml +++ b/src/java/java.iml @@ -4,6 +4,7 @@ + From 5dd532a9491322e39aa5e82846ad3360d2d24082 Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Fri, 26 Apr 2019 16:52:29 -0400 Subject: [PATCH 03/22] Reformat all code using IntelliJ. Change PreProcessor to identify singleline comments. --- src/java/Main.java | 34 +++++----- src/java/Preprocessor.java | 31 +++++++-- src/java/Token.java | 25 +++++--- src/java/TokenType.java | 125 +++++++++++++++++++++++++++---------- 4 files changed, 151 insertions(+), 64 deletions(-) diff --git a/src/java/Main.java b/src/java/Main.java index 3ca6785..882e5c5 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -2,31 +2,29 @@ /** * Main class. + * * @author Leonardo-Rocha */ -public class Main{ +public class Main { - public static void main(String[] args){ + public static void main(String[] args) { - File directory = new File("../../bin/test.xpp"); - System.out.println(directory.getAbsolutePath()); - Preprocessor preprocessor = new Preprocessor(); + File directory = new File("../../bin/test.xpp"); + System.out.println(directory.getAbsolutePath()); - try{ - FileReader fileReader = new FileReader(directory); + try { + FileReader fileReader = new FileReader(directory); - BufferedReader bufferedReader = new BufferedReader(fileReader); + LineNumberReader lineNumberReader = new LineNumberReader(fileReader); - preprocessor.preProcess(bufferedReader); + Preprocessor.preProcess(lineNumberReader); - bufferedReader.close(); - } - catch(FileNotFoundException ex) { - System.out.println("Unable to open file '" + directory + "'"); - } - catch(IOException ex) { - System.out.println("Error reading file '" + directory + "'"); - } - } + lineNumberReader.close(); + } catch (FileNotFoundException ex) { + System.out.println("Unable to open file '" + directory + "'"); + } catch (IOException ex) { + System.out.println("Error reading file '" + directory + "'"); + } + } } \ No newline at end of file diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 04a1c5e..9c3d1ab 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -1,22 +1,41 @@ -import java.io.BufferedReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.LineNumberReader; +import java.util.stream.Stream; /** * Class to Preprocess files. + * * @author Leonardo-Rocha */ -class Preprocessor{ +class Preprocessor { /** * PreProcess file removing comments. - * @param bufferedReader file reference. + * + * @param lineNumberReader file reference. */ - static void preProcess(BufferedReader bufferedReader) throws IOException { + static void preProcess(LineNumberReader lineNumberReader) throws IOException { + //TODO complete method. String line; + Stream stringStream = lineNumberReader.lines(); + while ((line = lineNumberReader.readLine()) != null) { + char[] charArray = line.toCharArray(); + + for (int position = 0; position < charArray.length; position++) { + if (isASingleLineComment(charArray, position)) { + System.out.println("Found a single-line comment at line " + lineNumberReader.getLineNumber()); + } - while((line = bufferedReader.readLine()) != null) { + } System.out.println(line); } - //TODO complete method. + } + + private static boolean isASingleLineComment(char[] charArray, int position) { + if (position + 1 == charArray.length) + return false; + else + return (charArray[position] == '/' && charArray[position + 1] == '/'); } } \ No newline at end of file diff --git a/src/java/Token.java b/src/java/Token.java index 61a40bc..331dff4 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -1,23 +1,31 @@ /** * This class represents every recognized single unit(token) of the source code. * A token is a Pair and the attribute is optional. - * @author Gabriel Chiquetto, Leonardo-Rocha + * + * @author Gabriel Chiquetto, Leonardo-Rocha */ public class Token { - /** Abstract symbol that represents a type of lexical unit. */ + /** + * Abstract symbol that represents a type of lexical unit. + */ private TokenType tokenType; - /** Optional value of a token. */ + /** + * Optional value of a token. + */ private TokenType attribute; - /** Sequence of characters that represents the token in the source code. */ + /** + * Sequence of characters that represents the token in the source code. + */ private String lexeme; /** * Initializes a token with the main fields. + * * @param tokenType type of the generated token. * @param attribute value of the token. - * @param lexeme sequence of characters that represents the token in the source code. + * @param lexeme sequence of characters that represents the token in the source code. */ - public Token(TokenType tokenType, TokenType attribute, String lexeme){ + public Token(TokenType tokenType, TokenType attribute, String lexeme) { this.tokenType = tokenType; this.attribute = attribute; this.lexeme = lexeme; @@ -25,10 +33,11 @@ public Token(TokenType tokenType, TokenType attribute, String lexeme){ /** * Initializes a token without an attribute. + * * @param tokenType type of the generated token. - * @param lexeme sequence of characters that represents the token in the source code. + * @param lexeme sequence of characters that represents the token in the source code. */ - public Token(TokenType tokenType, String lexeme){ + public Token(TokenType tokenType, String lexeme) { this(tokenType, TokenType.UNDEF, lexeme); } } diff --git a/src/java/TokenType.java b/src/java/TokenType.java index aa8cf59..314060c 100644 --- a/src/java/TokenType.java +++ b/src/java/TokenType.java @@ -1,66 +1,127 @@ /** * Enum used to group all possible token types. - * @author Gabriel Chiquetto, Leonardo-Rocha -*/ + * + * @author Gabriel Chiquetto, Leonardo-Rocha + */ public enum TokenType { - /** Undefined - used when doesn't match any type of token. */ + /** + * Undefined - used when doesn't match any type of token. + */ UNDEF, - /** A sequence of letters, numbers and underscores. */ + /** + * A sequence of letters, numbers and underscores. + */ IDENTIFIER, - /** A sequence of decimal digits. */ + /** + * A sequence of decimal digits. + */ INTEGER_LITERAL, - /** Relational operator attribute. */ + /** + * Relational operator attribute. + */ RELOP, - /** Relational operator '<'. */ + /** + * Relational operator '<'. + */ LESSTHAN, - /** Relational operator '>'. */ + /** + * Relational operator '>'. + */ GREATER_THAN, - /** Relational operator '<='. */ + /** + * Relational operator '<='. + */ LESS_OR_EQUAL, - /** Relational operator '>='. */ + /** + * Relational operator '>='. + */ GREATER_OR_EQUAL, - /** Relational operator '=='. */ + /** + * Relational operator '=='. + */ EQUAL, - /** Relational operator '!='. */ + /** + * Relational operator '!='. + */ NOT_EQUAL, - /** Addition operator '+'. */ + /** + * Addition operator '+'. + */ PLUS, - /** Subtraction operator '-'. */ + /** + * Subtraction operator '-'. + */ MINUS, - /** Multiplication operator '*'. */ + /** + * Multiplication operator '*'. + */ TIMES, - /** Division operator '/'. */ + /** + * Division operator '/'. + */ DIV, - /** Modulo operator '%'. */ + /** + * Modulo operator '%'. + */ MOD, - /** Attribution operator '='. */ + /** + * Attribution operator '='. + */ ATTRIB, - /** Left parenthesis separator '('. */ + /** + * Left parenthesis separator '('. + */ LPAREN, - /** Right parenthesis separator ')'. */ + /** + * Right parenthesis separator ')'. + */ RPAREN, - /** Left bracket separator '['. */ + /** + * Left bracket separator '['. + */ LBRACKET, - /** Right bracket separator ']'. */ + /** + * Right bracket separator ']'. + */ RBRACKET, - /** Left brace separator '{'. */ + /** + * Left brace separator '{'. + */ LBRACE, - /** Right brace separator '}'. */ + /** + * Right brace separator '}'. + */ RBRACE, - /** End of instruction indicator ';'. */ + /** + * End of instruction indicator ';'. + */ SEMICOLON, - /** Floating point number or object field accessor '.'. */ + /** + * Floating point number or object field accessor '.'. + */ DOT, - /** Array element separator ',' - e.g. String[2] test = {"test1", "test2"}. */ + /** + * Array element separator ',' - e.g. String[2] test = {"test1", "test2"}. + */ COMMA, - /** String literal delimitation - e.g. String test = "teste". */ + /** + * String literal delimitation - e.g. String test = "teste". + */ DOUBLE_QUOTATION, - /** Cpp-style single-line comment using "//". */ + /** + * Cpp-style single-line comment using "//". + */ LINE_COMMENT, - /** C-style multi-line comment start using "/*".*/ + /** + * C-style multi-line comment start using "/*". + */ LBLOCK_COMMENT, - /** C-style multi-line comment end using "* /".*/ + /** + * C-style multi-line comment end using "* /". + */ RBLOCK_COMMENT, - /** End of file indicator. */ + /** + * End of file indicator. + */ EOF } From 711ed257c2462133e0ffbf00bc0ebc1e9acd007e Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Fri, 26 Apr 2019 17:49:28 -0400 Subject: [PATCH 04/22] Add single-line comment removal in preprocessor. Change Main to support preprocessor class->instance method changes. --- src/java/Main.java | 4 +-- src/java/Preprocessor.java | 64 ++++++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/src/java/Main.java b/src/java/Main.java index 882e5c5..f37beb3 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -11,13 +11,13 @@ public static void main(String[] args) { File directory = new File("../../bin/test.xpp"); System.out.println(directory.getAbsolutePath()); + Preprocessor preprocessor = new Preprocessor(); try { FileReader fileReader = new FileReader(directory); - LineNumberReader lineNumberReader = new LineNumberReader(fileReader); - Preprocessor.preProcess(lineNumberReader); + preprocessor.preProcess(lineNumberReader); lineNumberReader.close(); } catch (FileNotFoundException ex) { diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 9c3d1ab..86dc806 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -1,7 +1,5 @@ -import java.io.FileInputStream; import java.io.IOException; import java.io.LineNumberReader; -import java.util.stream.Stream; /** * Class to Preprocess files. @@ -9,33 +7,67 @@ * @author Leonardo-Rocha */ class Preprocessor { + private static final int MAX_BUFFER_SIZE = 8192; + + /** + * Preprocessed output. + */ + private String[] output; /** * PreProcess file removing comments. * * @param lineNumberReader file reference. */ - static void preProcess(LineNumberReader lineNumberReader) throws IOException { - //TODO complete method. + void preProcess(LineNumberReader lineNumberReader) throws IOException { String line; - Stream stringStream = lineNumberReader.lines(); + output = new String[MAX_BUFFER_SIZE]; + int outputLineIndex = 0; + boolean skipLine = false; + while ((line = lineNumberReader.readLine()) != null) { - char[] charArray = line.toCharArray(); + if (isASinglelineComment(line)) { + System.out.println("Found a single-line comment at line " + lineNumberReader.getLineNumber()); + skipLine = true; + } + if (isAMultilineComment(lineNumberReader)) { + //TODO complete method. + } - for (int position = 0; position < charArray.length; position++) { - if (isASingleLineComment(charArray, position)) { - System.out.println("Found a single-line comment at line " + lineNumberReader.getLineNumber()); - } + if (!skipLine) + output[outputLineIndex++] = line; - } + skipLine = false; + } + output[outputLineIndex] = "EOF"; + printOutput(); + } + + /** + * Print preprocessed code. + */ + public void printOutput() { + for (String line : output) { System.out.println(line); + if (line.equals("EOF")) + break; } } - private static boolean isASingleLineComment(char[] charArray, int position) { - if (position + 1 == charArray.length) - return false; - else - return (charArray[position] == '/' && charArray[position + 1] == '/'); + /** + * @param line current line being preprocessed. + * @return true if the line is a single-line comment. + */ + private static boolean isASinglelineComment(String line) { + return line.contains("//"); + } + + /** + * @param line + * @return true if we found a multi-line comment. + */ + private static boolean isAMultilineComment(LineNumberReader lineNumberReader) { + //TODO complete method. + return true; } } \ No newline at end of file From f2f32f76732374ac53e1ae7a7aafe302627fb60c Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Fri, 26 Apr 2019 18:04:42 -0400 Subject: [PATCH 05/22] Add early implementation of multi-line comment removal. --- src/java/Preprocessor.java | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 86dc806..c5abb8e 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -26,11 +26,11 @@ void preProcess(LineNumberReader lineNumberReader) throws IOException { boolean skipLine = false; while ((line = lineNumberReader.readLine()) != null) { - if (isASinglelineComment(line)) { + if (isASingleLineComment(line)) { System.out.println("Found a single-line comment at line " + lineNumberReader.getLineNumber()); skipLine = true; } - if (isAMultilineComment(lineNumberReader)) { + if (isAMultiLineComment(lineNumberReader, line)) { //TODO complete method. } @@ -58,16 +58,31 @@ public void printOutput() { * @param line current line being preprocessed. * @return true if the line is a single-line comment. */ - private static boolean isASinglelineComment(String line) { + private static boolean isASingleLineComment(String line) { return line.contains("//"); } /** * @param line + * @param currentLine * @return true if we found a multi-line comment. */ - private static boolean isAMultilineComment(LineNumberReader lineNumberReader) { - //TODO complete method. + private static boolean isAMultiLineComment(LineNumberReader lineNumberReader, String currentLine) { + if(currentLine.contains("/*")){ + //try to match with another */ + String separator = "/" + "*"; + if(currentLine.contains("*/")) { + String[] splitStrings = currentLine.split(separator); + System.out.println(splitStrings[0]); + System.out.println(splitStrings[1]); + } + else{ + //search for the matching */ in another line + while(!(currentLine = lineNumberReader.readLine()).contains("*/")){ + //found the line that contains the matching */ + } + } + } return true; } } \ No newline at end of file From f0659853c01a12e88822ae1194857f075c9438e6 Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Sat, 27 Apr 2019 18:36:30 -0400 Subject: [PATCH 06/22] Change test file to support strange cases. Change initial print in Main. Add preprocessor implementation to remove multi-line comments - working but need refactoring. --- bin/test.xpp | 9 ++-- src/java/Main.java | 2 +- src/java/Preprocessor.java | 89 ++++++++++++++++++++++---------------- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/bin/test.xpp b/bin/test.xpp index ebe8156..33a4183 100644 --- a/bin/test.xpp +++ b/bin/test.xpp @@ -1,7 +1,8 @@ //this is a simple comment to test +x = /*fucking comment */ 10; int x = 10; -int y = 10; -/* - another comment to ignore -*/ \ No newline at end of file +/* another comment */ +/* strange comment +*/ +int y = 10; \ No newline at end of file diff --git a/src/java/Main.java b/src/java/Main.java index f37beb3..0dc51eb 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -10,7 +10,7 @@ public class Main { public static void main(String[] args) { File directory = new File("../../bin/test.xpp"); - System.out.println(directory.getAbsolutePath()); + System.out.println("Test file: " + directory.getAbsolutePath()); Preprocessor preprocessor = new Preprocessor(); try { diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index c5abb8e..762892e 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -20,24 +20,19 @@ class Preprocessor { * @param lineNumberReader file reference. */ void preProcess(LineNumberReader lineNumberReader) throws IOException { - String line; + String currentLine; output = new String[MAX_BUFFER_SIZE]; int outputLineIndex = 0; - boolean skipLine = false; - while ((line = lineNumberReader.readLine()) != null) { - if (isASingleLineComment(line)) { - System.out.println("Found a single-line comment at line " + lineNumberReader.getLineNumber()); - skipLine = true; - } - if (isAMultiLineComment(lineNumberReader, line)) { - //TODO complete method. - } - - if (!skipLine) - output[outputLineIndex++] = line; - - skipLine = false; + while ((currentLine = lineNumberReader.readLine()) != null) { + if (isAMultiLineComment(currentLine)) { + String[] splitMultiLineComment = splitMultiLineComment(lineNumberReader, currentLine); + for (String line : splitMultiLineComment) { + if (line != null && !line.isEmpty()) + output[outputLineIndex++] = line; + } + } else if (isNotASingleLineComment(currentLine)) + output[outputLineIndex++] = currentLine; } output[outputLineIndex] = "EOF"; printOutput(); @@ -55,34 +50,52 @@ public void printOutput() { } /** - * @param line current line being preprocessed. + * @param currentLine last read line.. * @return true if the line is a single-line comment. */ - private static boolean isASingleLineComment(String line) { - return line.contains("//"); + private static boolean isNotASingleLineComment(String currentLine) { + return !currentLine.contains("//"); } /** - * @param line - * @param currentLine - * @return true if we found a multi-line comment. + * @param currentLine last read line. + * @return true if the line marks a Multi-line comment. */ - private static boolean isAMultiLineComment(LineNumberReader lineNumberReader, String currentLine) { - if(currentLine.contains("/*")){ - //try to match with another */ - String separator = "/" + "*"; - if(currentLine.contains("*/")) { - String[] splitStrings = currentLine.split(separator); - System.out.println(splitStrings[0]); - System.out.println(splitStrings[1]); - } - else{ - //search for the matching */ in another line - while(!(currentLine = lineNumberReader.readLine()).contains("*/")){ - //found the line that contains the matching */ - } - } + private static boolean isAMultiLineComment(String currentLine) { + return currentLine.contains("/*"); + } + + private static String[] splitMultiLineComment(LineNumberReader lineNumberReader, String currentLine) throws IOException { + int linesDistance = 0; + String[] output = new String[2]; + + String[] splitStrings = currentLine.split("/\\*"); + if(splitStrings.length > 0) + output[0] = splitStrings[0]; + else + output[0] = ""; + //search for the matching */ + while (!currentLine.contains("*/")) { + currentLine = lineNumberReader.readLine(); + linesDistance++; + } + //found the line that contains the matching */ + if (linesDistance == 0) { + splitStrings = splitStrings[1].split("\\*/"); + if(splitStrings.length > 1) + output[0] += "" + splitStrings[1]; + else + output[0] += ""; + } else { + splitStrings = currentLine.split("\\*/"); + if (splitStrings.length > 1) + output[1] = splitStrings[1]; + else + output[1] = ""; } - return true; + + return output; } -} \ No newline at end of file + + +} From 68087aca3d19a5e3a5a18d974081a4a8a1ef4a7b Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Sat, 27 Apr 2019 19:00:22 -0400 Subject: [PATCH 07/22] Refactor preprocessor methods. --- src/java/Main.java | 2 +- src/java/Preprocessor.java | 71 +++++++++++++++++++++++++++++--------- 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/java/Main.java b/src/java/Main.java index 0dc51eb..107ec85 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -3,7 +3,7 @@ /** * Main class. * - * @author Leonardo-Rocha + * @author Leonardo-Rocha, Gabriel Chiquetto. */ public class Main { diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 762892e..c0bf669 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -4,7 +4,7 @@ /** * Class to Preprocess files. * - * @author Leonardo-Rocha + * @author Leonardo-Rocha, Gabriel Chiquetto. */ class Preprocessor { private static final int MAX_BUFFER_SIZE = 8192; @@ -15,34 +15,60 @@ class Preprocessor { private String[] output; /** - * PreProcess file removing comments. + * PreProcess file. * * @param lineNumberReader file reference. + * @throws IOException if something goes wrong during removeCommentsAndAddEOF. */ void preProcess(LineNumberReader lineNumberReader) throws IOException { - String currentLine; output = new String[MAX_BUFFER_SIZE]; + + removeCommentsAndAddEOF(lineNumberReader); + + printOutput(); + } + + /** + * Remove single-line/multi-line comments and add EOF to the end of the file. + * + * @param lineNumberReader file reference. + * @throws IOException if something goes wrong during buffer line reading. + */ + private void removeCommentsAndAddEOF(LineNumberReader lineNumberReader) throws IOException { + String currentLine; int outputLineIndex = 0; while ((currentLine = lineNumberReader.readLine()) != null) { if (isAMultiLineComment(currentLine)) { - String[] splitMultiLineComment = splitMultiLineComment(lineNumberReader, currentLine); - for (String line : splitMultiLineComment) { - if (line != null && !line.isEmpty()) - output[outputLineIndex++] = line; - } + outputLineIndex = handleMultiLineComments(lineNumberReader, currentLine, outputLineIndex); } else if (isNotASingleLineComment(currentLine)) output[outputLineIndex++] = currentLine; } output[outputLineIndex] = "EOF"; - printOutput(); + } + + /** + * @param lineNumberReader file reference. + * @param currentLine last read line + * @param outputLineIndex last valid output line index. + * @return outputLineIndex. + * @throws IOException if something goes wrong during splitMultiLineComment. + */ + private int handleMultiLineComments(LineNumberReader lineNumberReader, String currentLine, int outputLineIndex) + throws IOException { + String[] splitMultiLineComment = splitMultiLineComment(lineNumberReader, currentLine); + for (String line : splitMultiLineComment) { + if (line != null && !line.isEmpty()) + output[outputLineIndex++] = line; + } + return outputLineIndex; } /** * Print preprocessed code. */ public void printOutput() { - for (String line : output) { + for (String line : getOutput()) { System.out.println(line); if (line.equals("EOF")) break; @@ -50,8 +76,8 @@ public void printOutput() { } /** - * @param currentLine last read line.. - * @return true if the line is a single-line comment. + * @param currentLine last read line. + * @return true if the line is NOT a single-line comment. */ private static boolean isNotASingleLineComment(String currentLine) { return !currentLine.contains("//"); @@ -65,12 +91,19 @@ private static boolean isAMultiLineComment(String currentLine) { return currentLine.contains("/*"); } - private static String[] splitMultiLineComment(LineNumberReader lineNumberReader, String currentLine) throws IOException { + /** + * @param lineNumberReader file reference. + * @param currentLine last read line. + * @return processed String[] without Multi-line comment. + * @throws IOException if something goes wrong during buffer line reading or string splitting.. + */ + private static String[] splitMultiLineComment(LineNumberReader lineNumberReader, String currentLine) + throws IOException { int linesDistance = 0; String[] output = new String[2]; String[] splitStrings = currentLine.split("/\\*"); - if(splitStrings.length > 0) + if (splitStrings.length > 0) output[0] = splitStrings[0]; else output[0] = ""; @@ -82,7 +115,7 @@ private static String[] splitMultiLineComment(LineNumberReader lineNumberReader, //found the line that contains the matching */ if (linesDistance == 0) { splitStrings = splitStrings[1].split("\\*/"); - if(splitStrings.length > 1) + if (splitStrings.length > 1) output[0] += "" + splitStrings[1]; else output[0] += ""; @@ -97,5 +130,11 @@ private static String[] splitMultiLineComment(LineNumberReader lineNumberReader, return output; } - + /** + * Get the preprocessed output. + * @return output. + */ + public String[] getOutput() { + return output; + } } From 6c3f38afc49fc1f4db031954d31ca3759fc9384c Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Sun, 28 Apr 2019 20:46:16 -0400 Subject: [PATCH 08/22] Refactor preprocessor and main. Change some names in TokenType. --- src/java/Main.java | 8 +-- src/java/Preprocessor.java | 118 ++++++++++++++++++++++--------------- src/java/TokenType.java | 4 +- 3 files changed, 76 insertions(+), 54 deletions(-) diff --git a/src/java/Main.java b/src/java/Main.java index 107ec85..f4d3489 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -11,15 +11,11 @@ public static void main(String[] args) { File directory = new File("../../bin/test.xpp"); System.out.println("Test file: " + directory.getAbsolutePath()); - Preprocessor preprocessor = new Preprocessor(); try { - FileReader fileReader = new FileReader(directory); - LineNumberReader lineNumberReader = new LineNumberReader(fileReader); + Preprocessor preprocessor = new Preprocessor(); + preprocessor.preProcess(directory); - preprocessor.preProcess(lineNumberReader); - - lineNumberReader.close(); } catch (FileNotFoundException ex) { System.out.println("Unable to open file '" + directory + "'"); } catch (IOException ex) { diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index c0bf669..6711339 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -1,3 +1,5 @@ +import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.io.LineNumberReader; @@ -14,16 +16,29 @@ class Preprocessor { */ private String[] output; + /** + * Buffered reader containing the source code. + */ + private LineNumberReader lineNumberReader; + + /** + * Current line being read. + */ + private String currentLine; + /** * PreProcess file. * * @param lineNumberReader file reference. * @throws IOException if something goes wrong during removeCommentsAndAddEOF. */ - void preProcess(LineNumberReader lineNumberReader) throws IOException { + void preProcess(File directory) throws IOException { output = new String[MAX_BUFFER_SIZE]; + FileReader fileReader = new FileReader(directory); + lineNumberReader = new LineNumberReader(fileReader); - removeCommentsAndAddEOF(lineNumberReader); + removeCommentsAndAddEOF(); + lineNumberReader.close(); printOutput(); } @@ -31,33 +46,29 @@ void preProcess(LineNumberReader lineNumberReader) throws IOException { /** * Remove single-line/multi-line comments and add EOF to the end of the file. * - * @param lineNumberReader file reference. * @throws IOException if something goes wrong during buffer line reading. */ - private void removeCommentsAndAddEOF(LineNumberReader lineNumberReader) throws IOException { - String currentLine; + private void removeCommentsAndAddEOF() throws IOException { + int outputLineIndex = 0; while ((currentLine = lineNumberReader.readLine()) != null) { - if (isAMultiLineComment(currentLine)) { - outputLineIndex = handleMultiLineComments(lineNumberReader, currentLine, outputLineIndex); - } else if (isNotASingleLineComment(currentLine)) + if (isABeginMultiLineComment()) { + outputLineIndex = handleMultiLineComments(outputLineIndex); + } else if (isNotASingleLineComment()) output[outputLineIndex++] = currentLine; } output[outputLineIndex] = "EOF"; } /** - * @param lineNumberReader file reference. - * @param currentLine last read line - * @param outputLineIndex last valid output line index. + * @param outputLineIndex last valid output line index. * @return outputLineIndex. * @throws IOException if something goes wrong during splitMultiLineComment. */ - private int handleMultiLineComments(LineNumberReader lineNumberReader, String currentLine, int outputLineIndex) - throws IOException { - String[] splitMultiLineComment = splitMultiLineComment(lineNumberReader, currentLine); - for (String line : splitMultiLineComment) { + private int handleMultiLineComments(int outputLineIndex) throws IOException { + String[] validCode = splitMultiLineComment(); + for (String line : validCode) { if (line != null && !line.isEmpty()) output[outputLineIndex++] = line; } @@ -76,62 +87,77 @@ public void printOutput() { } /** - * @param currentLine last read line. - * @return true if the line is NOT a single-line comment. + * @return true if the current line is NOT a single-line comment. */ - private static boolean isNotASingleLineComment(String currentLine) { - return !currentLine.contains("//"); + private boolean isNotASingleLineComment() { + return currentLine != null && !currentLine.contains("//"); } /** - * @param currentLine last read line. - * @return true if the line marks a Multi-line comment. + * @return true if the current line begins a Multi-line comment. */ - private static boolean isAMultiLineComment(String currentLine) { - return currentLine.contains("/*"); + private boolean isABeginMultiLineComment() { + return currentLine != null && currentLine.contains("/*"); + } + + /** + * @return true if the current line NOT ends a Multi-line comment + */ + private boolean isNotAEndMultiLineComment() { + return currentLine != null && !currentLine.contains("*/"); } /** - * @param lineNumberReader file reference. - * @param currentLine last read line. * @return processed String[] without Multi-line comment. * @throws IOException if something goes wrong during buffer line reading or string splitting.. */ - private static String[] splitMultiLineComment(LineNumberReader lineNumberReader, String currentLine) - throws IOException { + private String[] splitMultiLineComment() throws IOException { int linesDistance = 0; String[] output = new String[2]; String[] splitStrings = currentLine.split("/\\*"); - if (splitStrings.length > 0) - output[0] = splitStrings[0]; - else - output[0] = ""; - //search for the matching */ - while (!currentLine.contains("*/")) { - currentLine = lineNumberReader.readLine(); - linesDistance++; - } - //found the line that contains the matching */ + + output[0] = getValidStatementAtPosition(splitStrings, 0); + linesDistance = findEndMultiLineComment(linesDistance); + if (linesDistance == 0) { - splitStrings = splitStrings[1].split("\\*/"); - if (splitStrings.length > 1) - output[0] += "" + splitStrings[1]; - else - output[0] += ""; + splitStrings = currentLine.split("\\*/"); + output[0] += getValidStatementAtPosition(splitStrings, 1); } else { splitStrings = currentLine.split("\\*/"); - if (splitStrings.length > 1) - output[1] = splitStrings[1]; - else - output[1] = ""; + output[1] = getValidStatementAtPosition(splitStrings, 1); } return output; } + /** + * @param linesDistance Distance between the initial read line and the line where the end Multi-line is found. + * @return linesDistance after the search. + * @throws IOException + */ + private int findEndMultiLineComment(int linesDistance) throws IOException { + while (isNotAEndMultiLineComment()) { + currentLine = lineNumberReader.readLine(); + linesDistance++; + } + return linesDistance; + } + + private static String getValidStatementAtPosition(String[] splitStrings, int position) { + String validStatement; + + if (splitStrings.length > position) + validStatement = splitStrings[position]; + else + validStatement = ""; + + return validStatement; + } + /** * Get the preprocessed output. + * * @return output. */ public String[] getOutput() { diff --git a/src/java/TokenType.java b/src/java/TokenType.java index 314060c..43d69f6 100644 --- a/src/java/TokenType.java +++ b/src/java/TokenType.java @@ -19,11 +19,11 @@ public enum TokenType { /** * Relational operator attribute. */ - RELOP, + REL_OP, /** * Relational operator '<'. */ - LESSTHAN, + LESS_THAN, /** * Relational operator '>'. */ From 4dfc489428335f585ad0f8c1f4d783f3b0b17232 Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Sun, 28 Apr 2019 23:11:15 -0400 Subject: [PATCH 09/22] Change main. Refactor preprocessor. Change preprocessor to write output in a file. Document undocumented fields in preprocessor. --- bin/{test.xpp => test} | 0 bin/test_preprocessed.txt | 4 +++ src/java/Main.java | 14 +++++----- src/java/Preprocessor.java | 53 ++++++++++++++++++++++++++++---------- 4 files changed, 51 insertions(+), 20 deletions(-) rename bin/{test.xpp => test} (100%) create mode 100644 bin/test_preprocessed.txt diff --git a/bin/test.xpp b/bin/test similarity index 100% rename from bin/test.xpp rename to bin/test diff --git a/bin/test_preprocessed.txt b/bin/test_preprocessed.txt new file mode 100644 index 0000000..f98325e --- /dev/null +++ b/bin/test_preprocessed.txt @@ -0,0 +1,4 @@ +x = 10; +int x = 10; +int y = 10; +EOF diff --git a/src/java/Main.java b/src/java/Main.java index f4d3489..8161b18 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -9,17 +9,17 @@ public class Main { public static void main(String[] args) { - File directory = new File("../../bin/test.xpp"); - System.out.println("Test file: " + directory.getAbsolutePath()); - + File filePath = new File("../../bin/test"); + System.out.println("Test file: " + filePath.getAbsolutePath()); + System.out.println("Pre-processing test file..."); try { Preprocessor preprocessor = new Preprocessor(); - preprocessor.preProcess(directory); - + preprocessor.preProcess(filePath); + System.out.println("Preprocess successful."); } catch (FileNotFoundException ex) { - System.out.println("Unable to open file '" + directory + "'"); + System.out.println("Unable to open file '" + filePath + "'"); } catch (IOException ex) { - System.out.println("Error reading file '" + directory + "'"); + System.out.println("Error reading file '" + filePath + "'"); } } diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 6711339..4edac44 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -1,14 +1,14 @@ -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.io.LineNumberReader; +import java.io.*; /** * Class to Preprocess files. * - * @author Leonardo-Rocha, Gabriel Chiquetto. + * @author Leonardo-Rocha. */ class Preprocessor { + /** + * Maximum number of lines in the source code. + */ private static final int MAX_BUFFER_SIZE = 8192; /** @@ -29,18 +29,19 @@ class Preprocessor { /** * PreProcess file. * - * @param lineNumberReader file reference. + * @param filePath path of the file to be preprocessed. * @throws IOException if something goes wrong during removeCommentsAndAddEOF. */ - void preProcess(File directory) throws IOException { + void preProcess(File filePath) throws IOException { output = new String[MAX_BUFFER_SIZE]; - FileReader fileReader = new FileReader(directory); + FileReader fileReader = new FileReader(filePath); lineNumberReader = new LineNumberReader(fileReader); removeCommentsAndAddEOF(); + lineNumberReader.close(); - printOutput(); + printOutputAndWriteToFile(filePath); } /** @@ -55,7 +56,7 @@ private void removeCommentsAndAddEOF() throws IOException { while ((currentLine = lineNumberReader.readLine()) != null) { if (isABeginMultiLineComment()) { outputLineIndex = handleMultiLineComments(outputLineIndex); - } else if (isNotASingleLineComment()) + } else if (isNotASingleLineComment() && !currentLine.isEmpty()) output[outputLineIndex++] = currentLine; } output[outputLineIndex] = "EOF"; @@ -69,21 +70,41 @@ private void removeCommentsAndAddEOF() throws IOException { private int handleMultiLineComments(int outputLineIndex) throws IOException { String[] validCode = splitMultiLineComment(); for (String line : validCode) { - if (line != null && !line.isEmpty()) + if (isAValidLine(line)) output[outputLineIndex++] = line; } return outputLineIndex; } /** - * Print preprocessed code. + * Print preprocessed code and write in a new file with the same name + _preprocessed.txt. + * + * @param filePath original file path. */ - public void printOutput() { + private void printOutputAndWriteToFile(File filePath) throws IOException { + + BufferedWriter outputWriter; + outputWriter = new BufferedWriter(new FileWriter(filePath + "_preprocessed.txt")); + for (String line : getOutput()) { System.out.println(line); + if (isAValidLine(line)) { + outputWriter.write(line); + outputWriter.newLine(); + } if (line.equals("EOF")) break; } + outputWriter.close(); + } + + /** + * + * @param line line to be evaluated. + * @return true if the line is valid. + */ + private boolean isAValidLine(String line) { + return line != null && !line.isEmpty(); } /** @@ -144,6 +165,12 @@ private int findEndMultiLineComment(int linesDistance) throws IOException { return linesDistance; } + /** + * Check the array length and access the given position. + * @param splitStrings array of strings to be filtered. + * @param position array position. + * @return + */ private static String getValidStatementAtPosition(String[] splitStrings, int position) { String validStatement; From fd8f906a2f86c0aba50a55dfa4f55f3d4be1f734 Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Mon, 29 Apr 2019 18:07:30 -0400 Subject: [PATCH 10/22] Add new constructor in token. Add TokenGenerator Initial implementation. --- src/java/Token.java | 9 +++ src/java/TokenGenerator.java | 146 +++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) diff --git a/src/java/Token.java b/src/java/Token.java index 331dff4..a8b4abd 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -40,4 +40,13 @@ public Token(TokenType tokenType, TokenType attribute, String lexeme) { public Token(TokenType tokenType, String lexeme) { this(tokenType, TokenType.UNDEF, lexeme); } + + /** + * Initializes a token only with a token type. + * + * @param tokenType type of the generated token. + */ + public Token(TokenType tokenType){ + this(tokenType, TokenType.UNDEF, ""); + } } diff --git a/src/java/TokenGenerator.java b/src/java/TokenGenerator.java index d9fbe4a..26b28be 100644 --- a/src/java/TokenGenerator.java +++ b/src/java/TokenGenerator.java @@ -1,3 +1,149 @@ +import java.io.File; +import java.io.FileNotFoundException; +import java.util.Scanner; + +import static java.lang.Character.*; + +/** + * + */ public class TokenGenerator { + /** + * Scanner to parse the input. + */ + private Scanner scanner; + + /** + * Line being evaluated. + */ + private String currentLine; + + /** + * Char at the current position. + */ + private char currentChar; + + /** + * Current position. + */ + private int currentLinePosition; + + /** + * Position of the last returned token. + */ + private int lastTokenPosition; + + /** + * Lexeme of the last token returned. + */ + private String lastLexeme; + + /** + * Constructor. + * + * @param sourceCode File reference. + */ + TokenGenerator(File sourceCode) { + try { + scanner = new Scanner(sourceCode); + } catch (FileNotFoundException ex) { + System.out.println("Unable to open file '" + sourceCode + "'"); + } finally { + currentLine = scanner.nextLine(); + currentLinePosition = 0; + lastTokenPosition = 0; + currentChar = currentLine.charAt(currentLinePosition); + } + } + + /** + * @return next Token in the input. + */ + public Token getNextToken() { + + if (isLetter(currentChar) || currentChar == '_') { + advanceInput(); + while (isLetterOrDigit(currentChar) || currentChar == '_') + advanceInput(); + + updateLexeme(); + + return new Token(TokenType.IDENTIFIER, lastLexeme); + } else if (isDigit(currentChar)) { + advanceInput(); + while (isDigit(currentChar)) + advanceInput(); + + updateLexeme(); + + return new Token(TokenType.INTEGER_LITERAL, lastLexeme); + } else if (currentChar == '<') { + advanceInput(); + if (currentChar == '=') + return new Token(TokenType.REL_OP, TokenType.LESS_OR_EQUAL, ""); + + return new Token(TokenType.REL_OP, TokenType.LESS_THAN, ""); + } else if (currentChar == '>') { + advanceInput(); + if (currentChar == '=') + return new Token(TokenType.REL_OP, TokenType.GREATER_OR_EQUAL, ""); + + return new Token(TokenType.REL_OP, TokenType.GREATER_THAN, ""); + } else if (currentChar == '=') { + advanceInput(); + if (currentChar == '=') + return new Token(TokenType.REL_OP, TokenType.EQUAL, ""); + return new Token(TokenType.ATTRIB); + } else if (currentChar == '!') { + advanceInput(); + if (currentChar == '=') { + advanceInput(); + return new Token(TokenType.NOT_EQUAL); + } + } else if (currentChar == '+') { + advanceInput(); + return new Token(TokenType.PLUS); + } else if (currentChar == '-') { + advanceInput(); + ; + return new Token(TokenType.MINUS); + } else if (currentChar == '*') { + advanceInput(); + return new Token(TokenType.TIMES); + } else if (currentChar == '/') { + advanceInput(); + return new Token(TokenType.DIV); + } else if (currentChar == '%') { + advanceInput(); + return new Token(TokenType.MOD); + } + + return new Token(TokenType.UNDEF); + } + + /** + * Update lexeme according to the last valid token position. + */ + private void updateLexeme() { + lastTokenPosition = 0; + if (lastLexeme != null && currentLine.contains(lastLexeme)) + lastTokenPosition = currentLine.indexOf(lastLexeme); + + lastLexeme = currentLine.substring(lastTokenPosition, currentLinePosition); + } + + /** + * Advance on input incrementing the line position and updating the current char. + */ + private void advanceInput() { + currentLinePosition++; + currentChar = currentLine.charAt(currentLinePosition); + } + /** + * @param scanner reference. + */ + public void setScanner(Scanner scanner) { + this.scanner = scanner; + } } From 614d0a7175d779e0a309c39976810d383ba623be Mon Sep 17 00:00:00 2001 From: GabrielChiquetto Date: Wed, 1 May 2019 01:21:58 -0300 Subject: [PATCH 11/22] -change EOF to '$' -Refactor main and add lexical showCase for testing -add .equals and .showCase method to Token Class -Complete the TokenType identification in .getNextToken - Verified functionality for identifiers and Integers -add white space ignore mechanism -add basic lexical error handling --- bin/test_preprocessed.txt | 2 +- src/java/LexicalError.java | 17 +++++++ src/java/Main.java | 38 ++++++++++++---- src/java/Preprocessor.java | 8 +++- src/java/Token.java | 10 ++++ src/java/TokenGenerator.java | 88 ++++++++++++++++++++++++++++++------ 6 files changed, 138 insertions(+), 25 deletions(-) create mode 100644 src/java/LexicalError.java diff --git a/bin/test_preprocessed.txt b/bin/test_preprocessed.txt index f98325e..0166d7e 100644 --- a/bin/test_preprocessed.txt +++ b/bin/test_preprocessed.txt @@ -1,4 +1,4 @@ x = 10; int x = 10; int y = 10; -EOF +$ diff --git a/src/java/LexicalError.java b/src/java/LexicalError.java new file mode 100644 index 0000000..b104be6 --- /dev/null +++ b/src/java/LexicalError.java @@ -0,0 +1,17 @@ +public class LexicalError{ + private static boolean errorState = false; + + private static String errorLog = ""; + + public static void UnexpectedChar(char unexpectedChar){ + errorLog = errorLog + ("Unexpected char found:" + + unexpectedChar + "\n"); + errorState = true; + } + + public static void ComputeErrorLog(){ + if(errorState){ + System.out.println(errorLog); + } + } +} \ No newline at end of file diff --git a/src/java/Main.java b/src/java/Main.java index 8161b18..d9a6c0d 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -9,18 +9,40 @@ public class Main { public static void main(String[] args) { - File filePath = new File("../../bin/test"); - System.out.println("Test file: " + filePath.getAbsolutePath()); + File filePath = new File("bin\\test"); + + filePath = FindFile(useProcessedFile(filePath)); + + RunTest(filePath); + } + + private static File FindFile(String path){ + File filePath = new File(path); + return filePath; + } + + private static void RunTest(File source){ + TokenGenerator Tokenizer = new TokenGenerator(source); + Token currentToken = Tokenizer.getNextToken(); + while (!currentToken.equals(TokenType.EOF)){ + currentToken.showCase(); + currentToken = Tokenizer.getNextToken(); + } + + } + + private static String useProcessedFile(File rawSource){ + System.out.println("Test file: " + rawSource.getAbsolutePath()); System.out.println("Pre-processing test file..."); try { Preprocessor preprocessor = new Preprocessor(); - preprocessor.preProcess(filePath); + preprocessor.preProcess(rawSource); System.out.println("Preprocess successful."); - } catch (FileNotFoundException ex) { - System.out.println("Unable to open file '" + filePath + "'"); + }catch (FileNotFoundException ex) { + System.out.println("Unable to open file '" + rawSource + "'"); } catch (IOException ex) { - System.out.println("Error reading file '" + filePath + "'"); - } + System.out.println("Error reading file '" + rawSource + "'"); + } + return rawSource + "_preprocessed.txt"; } - } \ No newline at end of file diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 4edac44..59fd6dc 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -6,6 +6,10 @@ * @author Leonardo-Rocha. */ class Preprocessor { + /** + * Definition of End Of File Marker. + */ + private static final char EOF = '$'; /** * Maximum number of lines in the source code. */ @@ -59,7 +63,7 @@ private void removeCommentsAndAddEOF() throws IOException { } else if (isNotASingleLineComment() && !currentLine.isEmpty()) output[outputLineIndex++] = currentLine; } - output[outputLineIndex] = "EOF"; + output[outputLineIndex] = "" + EOF; } /** @@ -92,7 +96,7 @@ private void printOutputAndWriteToFile(File filePath) throws IOException { outputWriter.write(line); outputWriter.newLine(); } - if (line.equals("EOF")) + if (line.equals("" + EOF)) break; } outputWriter.close(); diff --git a/src/java/Token.java b/src/java/Token.java index a8b4abd..af74046 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -49,4 +49,14 @@ public Token(TokenType tokenType, String lexeme) { public Token(TokenType tokenType){ this(tokenType, TokenType.UNDEF, ""); } + + public boolean equals(TokenType obj) { + return this.tokenType == obj; + } + + public void showCase(){ + System.out.println("lexeme :" + this.lexeme); + System.out.println("TokenType : " + tokenType); + System.out.println("attribute : "+ attribute); + } } diff --git a/src/java/TokenGenerator.java b/src/java/TokenGenerator.java index 26b28be..7be97ae 100644 --- a/src/java/TokenGenerator.java +++ b/src/java/TokenGenerator.java @@ -38,6 +38,8 @@ public class TokenGenerator { */ private String lastLexeme; + private static final String OpString = "+-/%*" ; + /** * Constructor. * @@ -49,10 +51,16 @@ public class TokenGenerator { } catch (FileNotFoundException ex) { System.out.println("Unable to open file '" + sourceCode + "'"); } finally { - currentLine = scanner.nextLine(); - currentLinePosition = 0; - lastTokenPosition = 0; - currentChar = currentLine.charAt(currentLinePosition); + try { + currentLine = scanner.nextLine(); + currentLinePosition = 0; + lastTokenPosition = 0; + currentChar = currentLine.charAt(currentLinePosition); + + if(isWhitespace(currentChar)){advanceInput();} + } catch (NullPointerException e){ + currentChar = '$'; + } } } @@ -71,41 +79,49 @@ public Token getNextToken() { return new Token(TokenType.IDENTIFIER, lastLexeme); } else if (isDigit(currentChar)) { advanceInput(); - while (isDigit(currentChar)) + while (isDigit(currentChar)) { advanceInput(); - + } + if(!isWhitespace(currentChar) && !isOperator(currentChar)){ + LexicalError.UnexpectedChar(currentChar); + } updateLexeme(); return new Token(TokenType.INTEGER_LITERAL, lastLexeme); } else if (currentChar == '<') { advanceInput(); - if (currentChar == '=') + if (currentChar == '=') { + advanceInput(); return new Token(TokenType.REL_OP, TokenType.LESS_OR_EQUAL, ""); - + } return new Token(TokenType.REL_OP, TokenType.LESS_THAN, ""); } else if (currentChar == '>') { advanceInput(); - if (currentChar == '=') + if (currentChar == '=') { + advanceInput(); return new Token(TokenType.REL_OP, TokenType.GREATER_OR_EQUAL, ""); - + } return new Token(TokenType.REL_OP, TokenType.GREATER_THAN, ""); } else if (currentChar == '=') { advanceInput(); - if (currentChar == '=') + if (currentChar == '=') { + advanceInput(); return new Token(TokenType.REL_OP, TokenType.EQUAL, ""); + } return new Token(TokenType.ATTRIB); } else if (currentChar == '!') { advanceInput(); if (currentChar == '=') { advanceInput(); return new Token(TokenType.NOT_EQUAL); + }else{ + LexicalError.UnexpectedChar('!'); } } else if (currentChar == '+') { advanceInput(); return new Token(TokenType.PLUS); } else if (currentChar == '-') { advanceInput(); - ; return new Token(TokenType.MINUS); } else if (currentChar == '*') { advanceInput(); @@ -116,6 +132,38 @@ public Token getNextToken() { } else if (currentChar == '%') { advanceInput(); return new Token(TokenType.MOD); + }else if (currentChar == '('){ + advanceInput(); + return new Token(TokenType.LPAREN); + }else if (currentChar == ')'){ + advanceInput(); + return new Token(TokenType.RPAREN); + }else if (currentChar == '{'){ + advanceInput(); + return new Token(TokenType.LBRACE); + }else if (currentChar == '}'){ + advanceInput(); + return new Token(TokenType.RBRACE); + }else if (currentChar == '['){ + advanceInput(); + return new Token(TokenType.LBRACKET); + }else if (currentChar == ']'){ + advanceInput(); + return new Token(TokenType.RBRACKET); + }else if (currentChar == ';'){ + advanceInput(); + return new Token(TokenType.SEMICOLON); + }else if (currentChar == '.'){ + advanceInput(); + return new Token(TokenType.DOT); + }else if (currentChar == ','){ + advanceInput(); + return new Token(TokenType.COMMA); + }else if (currentChar == '"'){ + advanceInput(); + return new Token(TokenType.DOUBLE_QUOTATION); + }else if (currentChar == '$'){ + return new Token(TokenType.EOF); } return new Token(TokenType.UNDEF); @@ -136,14 +184,26 @@ private void updateLexeme() { * Advance on input incrementing the line position and updating the current char. */ private void advanceInput() { - currentLinePosition++; + if (currentLinePosition + 1 != currentLine.length()) { + currentLinePosition++; + }else{ + currentLine = scanner.nextLine(); + currentLinePosition = 0; + } currentChar = currentLine.charAt(currentLinePosition); + while (isWhitespace(currentChar)) + advanceInput(); } - /** * @param scanner reference. */ public void setScanner(Scanner scanner) { this.scanner = scanner; } + + private boolean isOperator(char expectedOp){ + return OpString.contains("" + expectedOp); + } + + } From 17ba13bb27b3ff3280ef20e014e2dfedc066113e Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Wed, 1 May 2019 13:24:30 -0400 Subject: [PATCH 12/22] Format all classes properly. Document undocumented methods and classes. Fix errors according to IntelliJ code inspection. Remove unused entries in enum TokenType. --- src/java/Keywords.java | 60 +++++++++++++++++++++++++++--------- src/java/LexicalError.java | 22 ++++++++++--- src/java/Main.java | 38 +++++++++++++++-------- src/java/Parser.java | 2 +- src/java/Preprocessor.java | 27 ++++++++-------- src/java/Token.java | 14 ++++----- src/java/TokenGenerator.java | 58 +++++++++++++++++++--------------- src/java/TokenType.java | 16 ++-------- 8 files changed, 145 insertions(+), 92 deletions(-) diff --git a/src/java/Keywords.java b/src/java/Keywords.java index b3eadc8..31aa9f4 100644 --- a/src/java/Keywords.java +++ b/src/java/Keywords.java @@ -1,34 +1,64 @@ /** * Enum which defines the language reserved keywords. + * * @author Leonardo Rocha, Gabriel Chiquetto */ -public enum Keywords{ - /** Used for class declarations. */ +@SuppressWarnings("unused") +public enum Keywords { + /** + * Used for class declarations. + */ CLASS, - /** Used for class inheritance. */ + /** + * Used for class inheritance. + */ EXTENDS, - /** Primitive type declaration "integer". */ + /** + * Primitive type declaration "integer". + */ INT, - /** String reference declaration. */ + /** + * String reference declaration. + */ STRING, - /** Terminates execution of current scope. */ + /** + * Terminates execution of current scope. + */ BREAK, - /** Show a message in the standard output. */ + /** + * Show a message in the standard output. + */ PRINT, - /** Read from standard input. */ + /** + * Read from standard input. + */ READ, - /** It causes program control to transfer back to the caller of the method. */ + /** + * It causes program control to transfer back to the caller of the method. + */ RETURN, - /** References the immediate parent of a class. */ + /** + * References the immediate parent of a class. + */ SUPER, - /** Conditional statement that executes a block when the expression is true. */ + /** + * Conditional statement that executes a block when the expression is true. + */ IF, - /** Executes when the Boolean expression of the matching "if" is false. */ + /** + * Executes when the Boolean expression of the matching "if" is false. + */ ELSE, - /** Iterative Loop control structure. */ + /** + * Iterative Loop control structure. + */ FOR, - /** Memory allocation for a new object. */ + /** + * Memory allocation for a new object. + */ NEW, - /** Method called when the object is allocated. */ + /** + * Method called when the object is allocated. + */ CONSTRUCTOR } diff --git a/src/java/LexicalError.java b/src/java/LexicalError.java index b104be6..acb72a5 100644 --- a/src/java/LexicalError.java +++ b/src/java/LexicalError.java @@ -1,16 +1,30 @@ -public class LexicalError{ +class LexicalError { + /** + * Indicates the error state. + */ private static boolean errorState = false; + /** + * Error log message. + */ private static String errorLog = ""; - public static void UnexpectedChar(char unexpectedChar){ + /** + * Unexpected char error. + * + * @param unexpectedChar char to display why is an error. + */ + public static void unexpectedChar(char unexpectedChar) { errorLog = errorLog + ("Unexpected char found:" + unexpectedChar + "\n"); errorState = true; } - public static void ComputeErrorLog(){ - if(errorState){ + /** + * Log error message. + */ + public static void computeErrorLog() { + if (errorState) { System.out.println(errorLog); } } diff --git a/src/java/Main.java b/src/java/Main.java index d9a6c0d..b10194f 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -5,44 +5,56 @@ * * @author Leonardo-Rocha, Gabriel Chiquetto. */ -public class Main { +class Main { public static void main(String[] args) { - File filePath = new File("bin\\test"); + File filePath = openFile("bin\\test"); - filePath = FindFile(useProcessedFile(filePath)); + filePath = openFile(preprocessFile(filePath)); - RunTest(filePath); + runTest(filePath); } - private static File FindFile(String path){ - File filePath = new File(path); - return filePath; + /** + * @param path path of the file to be open. + * @return open file reference. + */ + private static File openFile(String path) { + return new File(path); } - private static void RunTest(File source){ + /** + * Runs a test with the TokenGenerator to visually check if it's working. + * + * @param source file to run the token generator. + */ + private static void runTest(File source) { TokenGenerator Tokenizer = new TokenGenerator(source); Token currentToken = Tokenizer.getNextToken(); - while (!currentToken.equals(TokenType.EOF)){ + while (!currentToken.equals(TokenType.EOF)) { currentToken.showCase(); currentToken = Tokenizer.getNextToken(); } - + } - private static String useProcessedFile(File rawSource){ + /** + * @param rawSource source to preprocess. + * @return preprocessed file path. + */ + private static String preprocessFile(File rawSource) { System.out.println("Test file: " + rawSource.getAbsolutePath()); System.out.println("Pre-processing test file..."); try { Preprocessor preprocessor = new Preprocessor(); preprocessor.preProcess(rawSource); System.out.println("Preprocess successful."); - }catch (FileNotFoundException ex) { + } catch (FileNotFoundException ex) { System.out.println("Unable to open file '" + rawSource + "'"); } catch (IOException ex) { System.out.println("Error reading file '" + rawSource + "'"); - } + } return rawSource + "_preprocessed.txt"; } } \ No newline at end of file diff --git a/src/java/Parser.java b/src/java/Parser.java index aa7e4e0..91c1439 100644 --- a/src/java/Parser.java +++ b/src/java/Parser.java @@ -1,3 +1,3 @@ -public class Parser { +class Parser { } \ No newline at end of file diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 59fd6dc..e2afbf6 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -58,9 +58,9 @@ private void removeCommentsAndAddEOF() throws IOException { int outputLineIndex = 0; while ((currentLine = lineNumberReader.readLine()) != null) { - if (isABeginMultiLineComment()) { + if (isBeginMultiLineComment()) { outputLineIndex = handleMultiLineComments(outputLineIndex); - } else if (isNotASingleLineComment() && !currentLine.isEmpty()) + } else if (isNotSingleLineComment() && !currentLine.isEmpty()) output[outputLineIndex++] = currentLine; } output[outputLineIndex] = "" + EOF; @@ -74,7 +74,7 @@ private void removeCommentsAndAddEOF() throws IOException { private int handleMultiLineComments(int outputLineIndex) throws IOException { String[] validCode = splitMultiLineComment(); for (String line : validCode) { - if (isAValidLine(line)) + if (isValidLine(line)) output[outputLineIndex++] = line; } return outputLineIndex; @@ -92,7 +92,7 @@ private void printOutputAndWriteToFile(File filePath) throws IOException { for (String line : getOutput()) { System.out.println(line); - if (isAValidLine(line)) { + if (isValidLine(line)) { outputWriter.write(line); outputWriter.newLine(); } @@ -103,32 +103,31 @@ private void printOutputAndWriteToFile(File filePath) throws IOException { } /** - * * @param line line to be evaluated. * @return true if the line is valid. */ - private boolean isAValidLine(String line) { + private boolean isValidLine(String line) { return line != null && !line.isEmpty(); } /** * @return true if the current line is NOT a single-line comment. */ - private boolean isNotASingleLineComment() { + private boolean isNotSingleLineComment() { return currentLine != null && !currentLine.contains("//"); } /** * @return true if the current line begins a Multi-line comment. */ - private boolean isABeginMultiLineComment() { + private boolean isBeginMultiLineComment() { return currentLine != null && currentLine.contains("/*"); } /** * @return true if the current line NOT ends a Multi-line comment */ - private boolean isNotAEndMultiLineComment() { + private boolean isNotEndMultiLineComment() { return currentLine != null && !currentLine.contains("*/"); } @@ -159,10 +158,10 @@ private String[] splitMultiLineComment() throws IOException { /** * @param linesDistance Distance between the initial read line and the line where the end Multi-line is found. * @return linesDistance after the search. - * @throws IOException + * @throws IOException if an error occurs during buffer line reading. */ private int findEndMultiLineComment(int linesDistance) throws IOException { - while (isNotAEndMultiLineComment()) { + while (isNotEndMultiLineComment()) { currentLine = lineNumberReader.readLine(); linesDistance++; } @@ -171,9 +170,10 @@ private int findEndMultiLineComment(int linesDistance) throws IOException { /** * Check the array length and access the given position. + * * @param splitStrings array of strings to be filtered. - * @param position array position. - * @return + * @param position array position. + * @return validStatement string. */ private static String getValidStatementAtPosition(String[] splitStrings, int position) { String validStatement; @@ -191,6 +191,7 @@ private static String getValidStatementAtPosition(String[] splitStrings, int pos * * @return output. */ + @SuppressWarnings("WeakerAccess") public String[] getOutput() { return output; } diff --git a/src/java/Token.java b/src/java/Token.java index af74046..371b681 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -4,19 +4,19 @@ * * @author Gabriel Chiquetto, Leonardo-Rocha */ -public class Token { +class Token { /** * Abstract symbol that represents a type of lexical unit. */ - private TokenType tokenType; + private final TokenType tokenType; /** * Optional value of a token. */ - private TokenType attribute; + private final TokenType attribute; /** * Sequence of characters that represents the token in the source code. */ - private String lexeme; + private final String lexeme; /** * Initializes a token with the main fields. @@ -46,7 +46,7 @@ public Token(TokenType tokenType, String lexeme) { * * @param tokenType type of the generated token. */ - public Token(TokenType tokenType){ + public Token(TokenType tokenType) { this(tokenType, TokenType.UNDEF, ""); } @@ -54,9 +54,9 @@ public boolean equals(TokenType obj) { return this.tokenType == obj; } - public void showCase(){ + public void showCase() { System.out.println("lexeme :" + this.lexeme); System.out.println("TokenType : " + tokenType); - System.out.println("attribute : "+ attribute); + System.out.println("attribute : " + attribute); } } diff --git a/src/java/TokenGenerator.java b/src/java/TokenGenerator.java index 7be97ae..4c3d10b 100644 --- a/src/java/TokenGenerator.java +++ b/src/java/TokenGenerator.java @@ -5,9 +5,11 @@ import static java.lang.Character.*; /** - * + * Also known as Scanner or Lexer. This class does the lexical analysis and returns the tokens to the parser. + * + * @author Leonardo-Rocha, Gabriel Chiquetto. */ -public class TokenGenerator { +class TokenGenerator { /** * Scanner to parse the input. */ @@ -38,7 +40,7 @@ public class TokenGenerator { */ private String lastLexeme; - private static final String OpString = "+-/%*" ; + private static final String OpString = "+-/%*"; /** * Constructor. @@ -52,13 +54,16 @@ public class TokenGenerator { System.out.println("Unable to open file '" + sourceCode + "'"); } finally { try { + assert scanner != null; currentLine = scanner.nextLine(); currentLinePosition = 0; lastTokenPosition = 0; currentChar = currentLine.charAt(currentLinePosition); - if(isWhitespace(currentChar)){advanceInput();} - } catch (NullPointerException e){ + if (isWhitespace(currentChar)) { + advanceInput(); + } + } catch (NullPointerException e) { currentChar = '$'; } } @@ -82,8 +87,8 @@ public Token getNextToken() { while (isDigit(currentChar)) { advanceInput(); } - if(!isWhitespace(currentChar) && !isOperator(currentChar)){ - LexicalError.UnexpectedChar(currentChar); + if (!isWhitespace(currentChar) && !isOperator(currentChar)) { + LexicalError.unexpectedChar(currentChar); } updateLexeme(); @@ -114,8 +119,8 @@ public Token getNextToken() { if (currentChar == '=') { advanceInput(); return new Token(TokenType.NOT_EQUAL); - }else{ - LexicalError.UnexpectedChar('!'); + } else { + LexicalError.unexpectedChar('!'); } } else if (currentChar == '+') { advanceInput(); @@ -132,37 +137,37 @@ public Token getNextToken() { } else if (currentChar == '%') { advanceInput(); return new Token(TokenType.MOD); - }else if (currentChar == '('){ + } else if (currentChar == '(') { advanceInput(); return new Token(TokenType.LPAREN); - }else if (currentChar == ')'){ + } else if (currentChar == ')') { advanceInput(); return new Token(TokenType.RPAREN); - }else if (currentChar == '{'){ + } else if (currentChar == '{') { advanceInput(); return new Token(TokenType.LBRACE); - }else if (currentChar == '}'){ + } else if (currentChar == '}') { advanceInput(); return new Token(TokenType.RBRACE); - }else if (currentChar == '['){ + } else if (currentChar == '[') { advanceInput(); return new Token(TokenType.LBRACKET); - }else if (currentChar == ']'){ + } else if (currentChar == ']') { advanceInput(); return new Token(TokenType.RBRACKET); - }else if (currentChar == ';'){ + } else if (currentChar == ';') { advanceInput(); return new Token(TokenType.SEMICOLON); - }else if (currentChar == '.'){ + } else if (currentChar == '.') { advanceInput(); return new Token(TokenType.DOT); - }else if (currentChar == ','){ + } else if (currentChar == ',') { advanceInput(); return new Token(TokenType.COMMA); - }else if (currentChar == '"'){ + } else if (currentChar == '"') { advanceInput(); return new Token(TokenType.DOUBLE_QUOTATION); - }else if (currentChar == '$'){ + } else if (currentChar == '$') { return new Token(TokenType.EOF); } @@ -186,7 +191,7 @@ private void updateLexeme() { private void advanceInput() { if (currentLinePosition + 1 != currentLine.length()) { currentLinePosition++; - }else{ + } else { currentLine = scanner.nextLine(); currentLinePosition = 0; } @@ -194,6 +199,7 @@ private void advanceInput() { while (isWhitespace(currentChar)) advanceInput(); } + /** * @param scanner reference. */ @@ -201,9 +207,11 @@ public void setScanner(Scanner scanner) { this.scanner = scanner; } - private boolean isOperator(char expectedOp){ - return OpString.contains("" + expectedOp); + /** + * @param expectedOp char to evaluate. + * @return true if the expectedOp is an operator.. + */ + private boolean isOperator(char expectedOp) { + return OpString.contains("" + expectedOp); } - - } diff --git a/src/java/TokenType.java b/src/java/TokenType.java index 43d69f6..4163183 100644 --- a/src/java/TokenType.java +++ b/src/java/TokenType.java @@ -3,7 +3,7 @@ * * @author Gabriel Chiquetto, Leonardo-Rocha */ -public enum TokenType { +enum TokenType { /** * Undefined - used when doesn't match any type of token. */ @@ -105,21 +105,9 @@ public enum TokenType { */ COMMA, /** - * String literal delimitation - e.g. String test = "teste". + * String literal delimitation - e.g. String test = "test". */ DOUBLE_QUOTATION, - /** - * Cpp-style single-line comment using "//". - */ - LINE_COMMENT, - /** - * C-style multi-line comment start using "/*". - */ - LBLOCK_COMMENT, - /** - * C-style multi-line comment end using "* /". - */ - RBLOCK_COMMENT, /** * End of file indicator. */ From 708263101fcb999ec29acd7c09b52e50789bb3d9 Mon Sep 17 00:00:00 2001 From: Leonardo Cavalcante Almeida Rocha <201719050465@lab01-l148.dominio.facom.ufms.br> Date: Thu, 2 May 2019 10:34:16 -0400 Subject: [PATCH 13/22] Add eclipse files. Fix block comment without end in preprocessor. --- .gitignore | 1 + bin/test | 8 ++++---- bin/test_preprocessed.txt | 1 - src/java/.classpath | 6 ++++++ src/java/.project | 17 +++++++++++++++++ src/java/Main.java | 2 +- src/java/Preprocessor.java | 17 +++++++++++------ 7 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 src/java/.classpath create mode 100644 src/java/.project diff --git a/.gitignore b/.gitignore index f4669f3..3112014 100644 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,4 @@ fabric.properties # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml hs_err_pid* +/.metadata/ diff --git a/bin/test b/bin/test index 33a4183..7ad6fc5 100644 --- a/bin/test +++ b/bin/test @@ -2,7 +2,7 @@ x = /*fucking comment */ 10; int x = 10; -/* another comment */ -/* strange comment -*/ -int y = 10; \ No newline at end of file +/* +int y = 10; +batata = 20; +triangulo = 70; diff --git a/bin/test_preprocessed.txt b/bin/test_preprocessed.txt index 0166d7e..7a01b3c 100644 --- a/bin/test_preprocessed.txt +++ b/bin/test_preprocessed.txt @@ -1,4 +1,3 @@ x = 10; int x = 10; -int y = 10; $ diff --git a/src/java/.classpath b/src/java/.classpath new file mode 100644 index 0000000..3f3893a --- /dev/null +++ b/src/java/.classpath @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/java/.project b/src/java/.project new file mode 100644 index 0000000..f9a9e1e --- /dev/null +++ b/src/java/.project @@ -0,0 +1,17 @@ + + + java + + + + + + org.eclipse.jdt.core.javabuilder + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/src/java/Main.java b/src/java/Main.java index b10194f..ae9059b 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -9,7 +9,7 @@ class Main { public static void main(String[] args) { - File filePath = openFile("bin\\test"); + File filePath = openFile("../../bin/test"); filePath = openFile(preprocessFile(filePath)); diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index e2afbf6..4ac10e7 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -10,6 +10,7 @@ class Preprocessor { * Definition of End Of File Marker. */ private static final char EOF = '$'; + /** * Maximum number of lines in the source code. */ @@ -128,9 +129,11 @@ private boolean isBeginMultiLineComment() { * @return true if the current line NOT ends a Multi-line comment */ private boolean isNotEndMultiLineComment() { - return currentLine != null && !currentLine.contains("*/"); + String EOF = "" + Preprocessor.EOF; + boolean NotEndMultiLineComment = currentLine != null && !currentLine.contains("*/"); + return NotEndMultiLineComment && !currentLine.contains(EOF); } - + /** * @return processed String[] without Multi-line comment. * @throws IOException if something goes wrong during buffer line reading or string splitting.. @@ -143,12 +146,14 @@ private String[] splitMultiLineComment() throws IOException { output[0] = getValidStatementAtPosition(splitStrings, 0); linesDistance = findEndMultiLineComment(linesDistance); - - if (linesDistance == 0) { + + if (currentLine == null) { + output[1] = ""; + } else if (linesDistance == 0) { splitStrings = currentLine.split("\\*/"); output[0] += getValidStatementAtPosition(splitStrings, 1); } else { - splitStrings = currentLine.split("\\*/"); + splitStrings = currentLine.split("\\*/"); output[1] = getValidStatementAtPosition(splitStrings, 1); } @@ -178,7 +183,7 @@ private int findEndMultiLineComment(int linesDistance) throws IOException { private static String getValidStatementAtPosition(String[] splitStrings, int position) { String validStatement; - if (splitStrings.length > position) + if (splitStrings != null && splitStrings.length > position) validStatement = splitStrings[position]; else validStatement = ""; From 54f4f45a6bed9560a74b521ebe3e9ab43be829bd Mon Sep 17 00:00:00 2001 From: Leonardo Cavalcante Almeida Rocha <201719050465@lab01-l143.dominio.facom.ufms.br> Date: Thu, 2 May 2019 12:59:55 -0400 Subject: [PATCH 14/22] Change Token constructors. Change token generator next token and updateLexeme. --- src/java/Token.java | 66 +++++++++++++----- src/java/TokenGenerator.java | 131 ++++++++++++++++++++--------------- 2 files changed, 124 insertions(+), 73 deletions(-) diff --git a/src/java/Token.java b/src/java/Token.java index 371b681..f60f3c3 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -16,47 +16,75 @@ class Token { /** * Sequence of characters that represents the token in the source code. */ - private final String lexeme; + private String lexeme; /** * Initializes a token with the main fields. * * @param tokenType type of the generated token. * @param attribute value of the token. - * @param lexeme sequence of characters that represents the token in the source code. */ - public Token(TokenType tokenType, TokenType attribute, String lexeme) { + public Token(TokenType tokenType, TokenType attribute) { this.tokenType = tokenType; this.attribute = attribute; - this.lexeme = lexeme; } /** * Initializes a token without an attribute. * * @param tokenType type of the generated token. - * @param lexeme sequence of characters that represents the token in the source code. */ - public Token(TokenType tokenType, String lexeme) { - this(tokenType, TokenType.UNDEF, lexeme); + public Token(TokenType tokenType) { + this(tokenType, TokenType.UNDEF); } /** - * Initializes a token only with a token type. - * - * @param tokenType type of the generated token. + * Override of equals method. + * @param obj Object to evaluate. + * @return true if they're equal. */ - public Token(TokenType tokenType) { - this(tokenType, TokenType.UNDEF, ""); - } - public boolean equals(TokenType obj) { - return this.tokenType == obj; + return this.getTokenType() == obj; } - + + /** + * Print token informations for debuggin purposes. + */ public void showCase() { - System.out.println("lexeme :" + this.lexeme); - System.out.println("TokenType : " + tokenType); - System.out.println("attribute : " + attribute); + System.out.print("lexeme:" + this.getLexeme()); + System.out.print(", TokenType: " + getTokenType()); + System.out.println(", Attribute: " + getAttribute() + '.'); } + + /** + * + * @return type of the token. + */ + public TokenType getTokenType() { + return tokenType; + } + + /** + * + * @return attribute of the token. + */ + public TokenType getAttribute() { + return attribute; + } + + /** + * + * @return lexeme of the token. + */ + public String getLexeme() { + return lexeme; + } + + /** + * Should only be used once. + * @param lexeme string to set. + */ + void setLexeme(String lexeme) { + this.lexeme = lexeme; + } } diff --git a/src/java/TokenGenerator.java b/src/java/TokenGenerator.java index 4c3d10b..708e354 100644 --- a/src/java/TokenGenerator.java +++ b/src/java/TokenGenerator.java @@ -33,7 +33,7 @@ class TokenGenerator { /** * Position of the last returned token. */ - private int lastTokenPosition; + private int lastTokenEndPosition; /** * Lexeme of the last token returned. @@ -50,22 +50,19 @@ class TokenGenerator { TokenGenerator(File sourceCode) { try { scanner = new Scanner(sourceCode); - } catch (FileNotFoundException ex) { - System.out.println("Unable to open file '" + sourceCode + "'"); - } finally { - try { - assert scanner != null; - currentLine = scanner.nextLine(); - currentLinePosition = 0; - lastTokenPosition = 0; - currentChar = currentLine.charAt(currentLinePosition); + assert scanner != null; + currentLine = scanner.nextLine(); + currentLinePosition = 0; + lastTokenEndPosition = 0; + currentChar = currentLine.charAt(currentLinePosition); - if (isWhitespace(currentChar)) { - advanceInput(); - } - } catch (NullPointerException e) { - currentChar = '$'; + if (isWhitespace(currentChar)) { + advanceInput(); } + } catch (FileNotFoundException ex) { + System.out.println("Unable to open file '" + sourceCode + "'"); + } catch (NullPointerException e) { + currentChar = '$'; } } @@ -73,15 +70,15 @@ class TokenGenerator { * @return next Token in the input. */ public Token getNextToken() { - + + Token token = null; + if (isLetter(currentChar) || currentChar == '_') { advanceInput(); while (isLetterOrDigit(currentChar) || currentChar == '_') advanceInput(); - - updateLexeme(); - - return new Token(TokenType.IDENTIFIER, lastLexeme); + + token = new Token(TokenType.IDENTIFIER); } else if (isDigit(currentChar)) { advanceInput(); while (isDigit(currentChar)) { @@ -90,99 +87,125 @@ public Token getNextToken() { if (!isWhitespace(currentChar) && !isOperator(currentChar)) { LexicalError.unexpectedChar(currentChar); } - updateLexeme(); - - return new Token(TokenType.INTEGER_LITERAL, lastLexeme); + + token = new Token(TokenType.INTEGER_LITERAL); } else if (currentChar == '<') { advanceInput(); if (currentChar == '=') { advanceInput(); - return new Token(TokenType.REL_OP, TokenType.LESS_OR_EQUAL, ""); + + token = new Token(TokenType.REL_OP, TokenType.LESS_OR_EQUAL); } - return new Token(TokenType.REL_OP, TokenType.LESS_THAN, ""); + + token = new Token(TokenType.REL_OP, TokenType.LESS_THAN); } else if (currentChar == '>') { advanceInput(); if (currentChar == '=') { advanceInput(); - return new Token(TokenType.REL_OP, TokenType.GREATER_OR_EQUAL, ""); + + token = new Token(TokenType.REL_OP, TokenType.GREATER_OR_EQUAL); } - return new Token(TokenType.REL_OP, TokenType.GREATER_THAN, ""); + + token = new Token(TokenType.REL_OP, TokenType.GREATER_THAN); } else if (currentChar == '=') { advanceInput(); if (currentChar == '=') { advanceInput(); - return new Token(TokenType.REL_OP, TokenType.EQUAL, ""); + + token = new Token(TokenType.REL_OP, TokenType.EQUAL); } - return new Token(TokenType.ATTRIB); + token = new Token(TokenType.ATTRIB); } else if (currentChar == '!') { advanceInput(); if (currentChar == '=') { advanceInput(); - return new Token(TokenType.NOT_EQUAL); + + token = new Token(TokenType.NOT_EQUAL); } else { LexicalError.unexpectedChar('!'); } } else if (currentChar == '+') { advanceInput(); - return new Token(TokenType.PLUS); + + token = new Token(TokenType.PLUS); } else if (currentChar == '-') { advanceInput(); - return new Token(TokenType.MINUS); + + token = new Token(TokenType.MINUS); } else if (currentChar == '*') { advanceInput(); - return new Token(TokenType.TIMES); + + token = new Token(TokenType.TIMES); } else if (currentChar == '/') { advanceInput(); - return new Token(TokenType.DIV); + + token = new Token(TokenType.DIV); } else if (currentChar == '%') { advanceInput(); - return new Token(TokenType.MOD); + + token = new Token(TokenType.MOD); } else if (currentChar == '(') { advanceInput(); - return new Token(TokenType.LPAREN); + + token = new Token(TokenType.LPAREN); } else if (currentChar == ')') { advanceInput(); - return new Token(TokenType.RPAREN); + + token = new Token(TokenType.RPAREN); } else if (currentChar == '{') { advanceInput(); - return new Token(TokenType.LBRACE); + + token = new Token(TokenType.LBRACE); } else if (currentChar == '}') { advanceInput(); - return new Token(TokenType.RBRACE); + + token = new Token(TokenType.RBRACE); } else if (currentChar == '[') { advanceInput(); - return new Token(TokenType.LBRACKET); + + token = new Token(TokenType.LBRACKET); } else if (currentChar == ']') { advanceInput(); - return new Token(TokenType.RBRACKET); + + token = new Token(TokenType.RBRACKET); } else if (currentChar == ';') { advanceInput(); - return new Token(TokenType.SEMICOLON); + token = new Token(TokenType.SEMICOLON); } else if (currentChar == '.') { advanceInput(); - return new Token(TokenType.DOT); + token = new Token(TokenType.DOT); } else if (currentChar == ',') { advanceInput(); - return new Token(TokenType.COMMA); + + token = new Token(TokenType.COMMA); } else if (currentChar == '"') { advanceInput(); - return new Token(TokenType.DOUBLE_QUOTATION); + + token = new Token(TokenType.DOUBLE_QUOTATION); } else if (currentChar == '$') { - return new Token(TokenType.EOF); - } - - return new Token(TokenType.UNDEF); + token = new Token(TokenType.EOF); + } else + token = new Token(TokenType.UNDEF); + + updateLexeme(); + token.setLexeme(lastLexeme); + return token; } /** * Update lexeme according to the last valid token position. */ private void updateLexeme() { - lastTokenPosition = 0; - if (lastLexeme != null && currentLine.contains(lastLexeme)) - lastTokenPosition = currentLine.indexOf(lastLexeme); - - lastLexeme = currentLine.substring(lastTokenPosition, currentLinePosition); + lastTokenEndPosition = 0; + + if (lastLexeme != null && currentLine.contains(lastLexeme)) { + lastTokenEndPosition = currentLine.indexOf(lastLexeme) + lastLexeme.length(); + } + + if (lastTokenEndPosition != 0 && lastTokenEndPosition == currentLine.length() - 1) + lastLexeme = "" + ';'; + else + lastLexeme = currentLine.substring(lastTokenEndPosition, currentLinePosition); } /** From 34da0909e3fc2225491d34b28e4f074961f2568f Mon Sep 17 00:00:00 2001 From: Gabriel da Silva Chiquetto <201719050309@lab02-l115.dominio.facom.ufms.br> Date: Mon, 6 May 2019 14:34:16 -0400 Subject: [PATCH 15/22] Add more precise error logging. Change main to print errors. Change TokenGenerator to read files using LineNumberReader. Fix lexeme calculation and minor bugs. --- bin/test | 3 +- bin/test_preprocessed.txt | 5 ++ src/java/LexicalError.java | 10 ++-- src/java/Main.java | 11 +++-- src/java/TokenGenerator.java | 93 ++++++++++++++++++++---------------- 5 files changed, 75 insertions(+), 47 deletions(-) diff --git a/bin/test b/bin/test index 7ad6fc5..7f43c86 100644 --- a/bin/test +++ b/bin/test @@ -2,7 +2,8 @@ x = /*fucking comment */ 10; int x = 10; -/* int y = 10; batata = 20; triangulo = 70; +tri%%@ 43566¨ +=== 1/**/2 \ No newline at end of file diff --git a/bin/test_preprocessed.txt b/bin/test_preprocessed.txt index 7a01b3c..5ab02dd 100644 --- a/bin/test_preprocessed.txt +++ b/bin/test_preprocessed.txt @@ -1,3 +1,8 @@ x = 10; int x = 10; +int y = 10; +batata = 20; +triangulo = 70; +tri%%@ 43566¨ +=== 12 $ diff --git a/src/java/LexicalError.java b/src/java/LexicalError.java index acb72a5..766af37 100644 --- a/src/java/LexicalError.java +++ b/src/java/LexicalError.java @@ -14,9 +14,13 @@ class LexicalError { * * @param unexpectedChar char to display why is an error. */ - public static void unexpectedChar(char unexpectedChar) { - errorLog = errorLog + ("Unexpected char found:" + - unexpectedChar + "\n"); + public static void unexpectedChar(char unexpectedChar, int line, int position) { + errorLog = errorLog + ("Unexpected char found: '" + unexpectedChar + "'in line: "+ line + ":" + position + "\n"); + errorState = true; + } + + public static void expectedChar(char expectedChar, int line, int position) { + errorLog = errorLog + ("expected char missing: '" + expectedChar + "'in line: "+ line + ":" + position + "\n"); errorState = true; } diff --git a/src/java/Main.java b/src/java/Main.java index ae9059b..4349ecb 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -7,13 +7,17 @@ */ class Main { - public static void main(String[] args) { + public static void main(String[] args) throws IOException { - File filePath = openFile("../../bin/test"); + File filePath = openFile("Xpp-compiler/bin/test"); filePath = openFile(preprocessFile(filePath)); runTest(filePath); + + LexicalError.computeErrorLog(); + + System.out.println("Process terminated."); } /** @@ -28,8 +32,9 @@ private static File openFile(String path) { * Runs a test with the TokenGenerator to visually check if it's working. * * @param source file to run the token generator. + * @throws IOException if an error occurs during getNextToken(). */ - private static void runTest(File source) { + private static void runTest(File source) throws IOException { TokenGenerator Tokenizer = new TokenGenerator(source); Token currentToken = Tokenizer.getNextToken(); while (!currentToken.equals(TokenType.EOF)) { diff --git a/src/java/TokenGenerator.java b/src/java/TokenGenerator.java index 708e354..18900fb 100644 --- a/src/java/TokenGenerator.java +++ b/src/java/TokenGenerator.java @@ -1,6 +1,8 @@ import java.io.File; import java.io.FileNotFoundException; -import java.util.Scanner; +import java.io.FileReader; +import java.io.IOException; +import java.io.LineNumberReader; import static java.lang.Character.*; @@ -13,7 +15,7 @@ class TokenGenerator { /** * Scanner to parse the input. */ - private Scanner scanner; + private LineNumberReader lineNumberReader; /** * Line being evaluated. @@ -30,6 +32,10 @@ class TokenGenerator { */ private int currentLinePosition; + private int lexemeStartPosition; + + private char lastLineEndChar; + /** * Position of the last returned token. */ @@ -39,19 +45,22 @@ class TokenGenerator { * Lexeme of the last token returned. */ private String lastLexeme; - - private static final String OpString = "+-/%*"; + + private static final String OpString = "+-/%*"; /** * Constructor. * * @param sourceCode File reference. + * @throws IOException if an error occurs during bufferedReader readline. */ - TokenGenerator(File sourceCode) { + TokenGenerator(File sourceCode) throws IOException { try { - scanner = new Scanner(sourceCode); - assert scanner != null; - currentLine = scanner.nextLine(); + FileReader fileReader = new FileReader(sourceCode); + lineNumberReader = new LineNumberReader(fileReader); + + assert lineNumberReader != null; + currentLine = lineNumberReader.readLine(); currentLinePosition = 0; lastTokenEndPosition = 0; currentChar = currentLine.charAt(currentLinePosition); @@ -68,11 +77,17 @@ class TokenGenerator { /** * @return next Token in the input. + * @throws IOException if an error occurs during advanceInput(). */ - public Token getNextToken() { + public Token getNextToken() throws IOException { Token token = null; - + + while(isWhitespace(currentChar)){ + advanceInput(); + } + lexemeStartPosition = currentLinePosition; + if (isLetter(currentChar) || currentChar == '_') { advanceInput(); while (isLetterOrDigit(currentChar) || currentChar == '_') @@ -84,9 +99,6 @@ public Token getNextToken() { while (isDigit(currentChar)) { advanceInput(); } - if (!isWhitespace(currentChar) && !isOperator(currentChar)) { - LexicalError.unexpectedChar(currentChar); - } token = new Token(TokenType.INTEGER_LITERAL); } else if (currentChar == '<') { @@ -96,8 +108,10 @@ public Token getNextToken() { token = new Token(TokenType.REL_OP, TokenType.LESS_OR_EQUAL); } + else{ + token = new Token(TokenType.REL_OP, TokenType.LESS_THAN); + } - token = new Token(TokenType.REL_OP, TokenType.LESS_THAN); } else if (currentChar == '>') { advanceInput(); if (currentChar == '=') { @@ -105,16 +119,18 @@ public Token getNextToken() { token = new Token(TokenType.REL_OP, TokenType.GREATER_OR_EQUAL); } - - token = new Token(TokenType.REL_OP, TokenType.GREATER_THAN); + else { + token = new Token(TokenType.REL_OP, TokenType.GREATER_THAN); + } } else if (currentChar == '=') { advanceInput(); if (currentChar == '=') { advanceInput(); token = new Token(TokenType.REL_OP, TokenType.EQUAL); + } else { + token = new Token(TokenType.ATTRIB); } - token = new Token(TokenType.ATTRIB); } else if (currentChar == '!') { advanceInput(); if (currentChar == '=') { @@ -122,7 +138,7 @@ public Token getNextToken() { token = new Token(TokenType.NOT_EQUAL); } else { - LexicalError.unexpectedChar('!'); + LexicalError.expectedChar('=', lineNumberReader.getLineNumber(), currentLinePosition); } } else if (currentChar == '+') { advanceInput(); @@ -184,9 +200,11 @@ public Token getNextToken() { token = new Token(TokenType.DOUBLE_QUOTATION); } else if (currentChar == '$') { token = new Token(TokenType.EOF); - } else - token = new Token(TokenType.UNDEF); - + } else{ + LexicalError.unexpectedChar(currentChar, lineNumberReader.getLineNumber(), currentLinePosition); + advanceInput(); + token = new Token(TokenType.UNDEF); + } updateLexeme(); token.setLexeme(lastLexeme); return token; @@ -196,38 +214,32 @@ public Token getNextToken() { * Update lexeme according to the last valid token position. */ private void updateLexeme() { - lastTokenEndPosition = 0; - - if (lastLexeme != null && currentLine.contains(lastLexeme)) { - lastTokenEndPosition = currentLine.indexOf(lastLexeme) + lastLexeme.length(); + if(currentLinePosition != 0){ + lastTokenEndPosition = currentLinePosition; + lastLexeme = currentLine.substring(lexemeStartPosition, lastTokenEndPosition); } - - if (lastTokenEndPosition != 0 && lastTokenEndPosition == currentLine.length() - 1) - lastLexeme = "" + ';'; else - lastLexeme = currentLine.substring(lastTokenEndPosition, currentLinePosition); + lastLexeme = "" + lastLineEndChar; + + } /** - * Advance on input incrementing the line position and updating the current char. + * Advance on input incrementing the line position and updating the current + * char. + * + * @throws IOException if an error occurs during bufferedReader readline. */ - private void advanceInput() { + private void advanceInput() throws IOException { if (currentLinePosition + 1 != currentLine.length()) { currentLinePosition++; } else { - currentLine = scanner.nextLine(); + lastLineEndChar = currentChar; + currentLine = lineNumberReader.readLine(); currentLinePosition = 0; } currentChar = currentLine.charAt(currentLinePosition); - while (isWhitespace(currentChar)) - advanceInput(); - } - /** - * @param scanner reference. - */ - public void setScanner(Scanner scanner) { - this.scanner = scanner; } /** @@ -238,3 +250,4 @@ private boolean isOperator(char expectedOp) { return OpString.contains("" + expectedOp); } } + From d516ba5106ea2976b7683c02692431c67ebd7a7c Mon Sep 17 00:00:00 2001 From: Gabriel da Silva Chiquetto <201719050309@lab02-l115.dominio.facom.ufms.br> Date: Mon, 6 May 2019 17:03:01 -0400 Subject: [PATCH 16/22] Remove KEywords. Initial implementation of Parser update TOken Refacto tokenGenerator --- src/java/Keywords.java | 64 ------------------------------------ src/java/Parser.java | 44 ++++++++++++++++++++++++- src/java/Token.java | 8 +++-- src/java/TokenGenerator.java | 35 +++++++++++++++++++- src/java/TokenType.java | 58 ++++++++++++++++++++++++++++++++ 5 files changed, 141 insertions(+), 68 deletions(-) delete mode 100644 src/java/Keywords.java diff --git a/src/java/Keywords.java b/src/java/Keywords.java deleted file mode 100644 index 31aa9f4..0000000 --- a/src/java/Keywords.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Enum which defines the language reserved keywords. - * - * @author Leonardo Rocha, Gabriel Chiquetto - */ -@SuppressWarnings("unused") -public enum Keywords { - /** - * Used for class declarations. - */ - CLASS, - /** - * Used for class inheritance. - */ - EXTENDS, - /** - * Primitive type declaration "integer". - */ - INT, - /** - * String reference declaration. - */ - STRING, - /** - * Terminates execution of current scope. - */ - BREAK, - /** - * Show a message in the standard output. - */ - PRINT, - /** - * Read from standard input. - */ - READ, - /** - * It causes program control to transfer back to the caller of the method. - */ - RETURN, - /** - * References the immediate parent of a class. - */ - SUPER, - /** - * Conditional statement that executes a block when the expression is true. - */ - IF, - /** - * Executes when the Boolean expression of the matching "if" is false. - */ - ELSE, - /** - * Iterative Loop control structure. - */ - FOR, - /** - * Memory allocation for a new object. - */ - NEW, - /** - * Method called when the object is allocated. - */ - CONSTRUCTOR -} diff --git a/src/java/Parser.java b/src/java/Parser.java index 91c1439..2852138 100644 --- a/src/java/Parser.java +++ b/src/java/Parser.java @@ -1,3 +1,45 @@ -class Parser { +import java.io.File; +import java.io.IOException; +public class Parser { + + private TokenGenerator tokenGenerator; + + private Token currentToken; + + private void advanceToken() throws IOException{ + currentToken = this.tokenGenerator.getNextToken(); + } + + public Parser(File sourceCode) throws IOException { + tokenGenerator = new TokenGenerator(sourceCode); + advanceToken(); + } + + public void match(TokenType type){ + if(type != currentToken.getAttribute()) + } + + public void program(){ + if(currentToken.equals(TokenType.CLASS)){ + classList(); + } + } + + private void classList(){ + classDecl(); + classListLinha(); + } + + private void classListLinha(){ + if(currentToken.equals(TokenType.CLASS)){ + classList(); + } + } + + private void classDecl(){ + match(CLASS); + match(IDENTIFIER); + classDeclLinha(); + } } \ No newline at end of file diff --git a/src/java/Token.java b/src/java/Token.java index f60f3c3..1d8c88a 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -12,7 +12,7 @@ class Token { /** * Optional value of a token. */ - private final TokenType attribute; + private TokenType attribute; /** * Sequence of characters that represents the token in the source code. */ @@ -71,7 +71,11 @@ public TokenType getTokenType() { public TokenType getAttribute() { return attribute; } - + + public void setAttribute(TokenType attribute){ + this.attribute = attribute; + } + /** * * @return lexeme of the token. diff --git a/src/java/TokenGenerator.java b/src/java/TokenGenerator.java index 18900fb..da4330f 100644 --- a/src/java/TokenGenerator.java +++ b/src/java/TokenGenerator.java @@ -94,6 +94,7 @@ public Token getNextToken() throws IOException { advanceInput(); token = new Token(TokenType.IDENTIFIER); + verifyKeywords(token); } else if (isDigit(currentChar)) { advanceInput(); while (isDigit(currentChar)) { @@ -249,5 +250,37 @@ private void advanceInput() throws IOException { private boolean isOperator(char expectedOp) { return OpString.contains("" + expectedOp); } -} + private void verifyKeywords(Token identifier){ + String lexeme = identifier.getLexeme(); + switch(lexeme){ + case "class": + identifier.setAttribute(TokenType.CLASS); + case "extends": + identifier.setAttribute(TokenType.EXTENDS); + case "int": + identifier.setAttribute(TokenType.INT); + case "String": + identifier.setAttribute(TokenType.STRING); + case "break": + identifier.setAttribute(TokenType.BREAK); + case "print": + identifier.setAttribute(TokenType.PRINT); + case "read": + identifier.setAttribute(TokenType.READ); + case "return": + identifier.setAttribute(TokenType.SUPER); + case "if": + identifier.setAttribute(TokenType.IF); + case "else": + identifier.setAttribute(TokenType.ELSE); + case "for": + identifier.setAttribute(TokenType.FOR); + case "new": + identifier.setAttribute(TokenType.NEW); + case "constructor": + identifier.setAttribute(TokenType.CONSTRUCTOR); + default: + } + } +} \ No newline at end of file diff --git a/src/java/TokenType.java b/src/java/TokenType.java index 4163183..3074ae6 100644 --- a/src/java/TokenType.java +++ b/src/java/TokenType.java @@ -1,3 +1,5 @@ +import com.sun.org.apache.bcel.internal.generic.IF_ACMPEQ; + /** * Enum used to group all possible token types. * @@ -12,6 +14,62 @@ enum TokenType { * A sequence of letters, numbers and underscores. */ IDENTIFIER, + /** + * Used for class declarations. + */ + CLASS, + /** + * Used for class inheritance. + */ + EXTENDS, + /** + * Primitive type declaration "integer". + */ + INT, + /** + * String reference declaration. + */ + STRING, + /** + * Terminates execution of current scope. + */ + BREAK, + /** + * Show a message in the standard output. + */ + PRINT, + /** + * Read from standard input. + */ + READ, + /** + * It causes program control to transfer back to the caller of the method. + */ + RETURN, + /** + * References the immediate parent of a class. + */ + SUPER, + /** + * Conditional statement that executes a block when the expression is true. + */ + IF, + /** + * Executes when the Boolean expression of the matching "if" is false. + */ + ELSE, + /** + * Iterative Loop control structure. + */ + FOR, + /** + * Memory allocation for a new object. + */ + NEW, + /** + * Method called when the object is allocated. + */ + CONSTRUCTOR, /** * A sequence of decimal digits. */ From b164b716b65dc8365f61c2dcfaa7fa94989d5dc7 Mon Sep 17 00:00:00 2001 From: Leonardo Cavalcante Almeida Rocha <201719050465@lab01-l46.dominio.facom.ufms.br> Date: Wed, 8 May 2019 13:49:10 -0400 Subject: [PATCH 17/22] Fix some bugs. Fix preprocessor removing blank lines. Modify some methods. Start TokenGenerator refactor. --- .vscode/launch.json | 23 ++++++++++++++++++++ bin/test | 3 ++- bin/test_preprocessed.txt | 5 ++++- src/java/Main.java | 10 ++++----- src/java/Parser.java | 12 +++++------ src/java/Preprocessor.java | 41 ++++++++++++++++++++++++++---------- src/java/Token.java | 19 +++++++++++++++-- src/java/TokenGenerator.java | 20 +++++++++++------- src/java/TokenType.java | 2 -- 9 files changed, 99 insertions(+), 36 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..11eba31 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,23 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + + { + "type": "java", + "name": "Debug (Launch) - Current File", + "request": "launch", + "mainClass": "${file}" + }, + { + "type": "java", + "name": "Debug (Launch)-Main", + "request": "launch", + "mainClass": "Main", + "projectName": "java" + } + ] +} \ No newline at end of file diff --git a/bin/test b/bin/test index 7f43c86..59392a8 100644 --- a/bin/test +++ b/bin/test @@ -1,9 +1,10 @@ //this is a simple comment to test x = /*fucking comment */ 10; -int x = 10; +int x = 10; //será que ignora? int y = 10; batata = 20; triangulo = 70; tri%%@ 43566¨ + === 1/**/2 \ No newline at end of file diff --git a/bin/test_preprocessed.txt b/bin/test_preprocessed.txt index 5ab02dd..a0bd0c5 100644 --- a/bin/test_preprocessed.txt +++ b/bin/test_preprocessed.txt @@ -1,8 +1,11 @@ + + x = 10; -int x = 10; +int x = 10; int y = 10; batata = 20; triangulo = 70; tri%%@ 43566¨ + === 12 $ diff --git a/src/java/Main.java b/src/java/Main.java index 4349ecb..df9c692 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -9,7 +9,7 @@ class Main { public static void main(String[] args) throws IOException { - File filePath = openFile("Xpp-compiler/bin/test"); + File filePath = openFile("bin/test"); filePath = openFile(preprocessFile(filePath)); @@ -35,11 +35,11 @@ private static File openFile(String path) { * @throws IOException if an error occurs during getNextToken(). */ private static void runTest(File source) throws IOException { - TokenGenerator Tokenizer = new TokenGenerator(source); - Token currentToken = Tokenizer.getNextToken(); - while (!currentToken.equals(TokenType.EOF)) { + TokenGenerator tokenizer = new TokenGenerator(source); + Token currentToken = tokenizer.getNextToken(); + while (!currentToken.equalsTokenType(TokenType.EOF)) { currentToken.showCase(); - currentToken = Tokenizer.getNextToken(); + currentToken = tokenizer.getNextToken(); } } diff --git a/src/java/Parser.java b/src/java/Parser.java index 2852138..688f49c 100644 --- a/src/java/Parser.java +++ b/src/java/Parser.java @@ -17,11 +17,11 @@ public Parser(File sourceCode) throws IOException { } public void match(TokenType type){ - if(type != currentToken.getAttribute()) + //if(type != currentToken.getAttribute()) } public void program(){ - if(currentToken.equals(TokenType.CLASS)){ + if(currentToken.equalsTokenType(TokenType.CLASS)){ classList(); } } @@ -32,14 +32,14 @@ private void classList(){ } private void classListLinha(){ - if(currentToken.equals(TokenType.CLASS)){ + if(currentToken.equalsTokenType(TokenType.CLASS)){ classList(); } } private void classDecl(){ - match(CLASS); - match(IDENTIFIER); - classDeclLinha(); + //match(CLASS); + //match(IDENTIFIER); + //classDeclLinha(); } } \ No newline at end of file diff --git a/src/java/Preprocessor.java b/src/java/Preprocessor.java index 4ac10e7..7d34687 100644 --- a/src/java/Preprocessor.java +++ b/src/java/Preprocessor.java @@ -61,8 +61,11 @@ private void removeCommentsAndAddEOF() throws IOException { while ((currentLine = lineNumberReader.readLine()) != null) { if (isBeginMultiLineComment()) { outputLineIndex = handleMultiLineComments(outputLineIndex); - } else if (isNotSingleLineComment() && !currentLine.isEmpty()) + } else if (isSingleLineComment()) { + outputLineIndex = handleSingleLineComments(outputLineIndex); + } else { output[outputLineIndex++] = currentLine; + } } output[outputLineIndex] = "" + EOF; } @@ -81,6 +84,22 @@ private int handleMultiLineComments(int outputLineIndex) throws IOException { return outputLineIndex; } + /** + * @param outputLineIndex last valid output line index. + * @return outputLineIndex. + */ + private int handleSingleLineComments(int outputLineIndex) { + String[] splitStrings; + String processedLine = ""; + if (currentLine.contains("//")){ + splitStrings = currentLine.split("//"); + processedLine = splitStrings[0]; + } + output[outputLineIndex++] = processedLine; + + return outputLineIndex; + } + /** * Print preprocessed code and write in a new file with the same name + _preprocessed.txt. * @@ -108,14 +127,7 @@ private void printOutputAndWriteToFile(File filePath) throws IOException { * @return true if the line is valid. */ private boolean isValidLine(String line) { - return line != null && !line.isEmpty(); - } - - /** - * @return true if the current line is NOT a single-line comment. - */ - private boolean isNotSingleLineComment() { - return currentLine != null && !currentLine.contains("//"); + return line != null; } /** @@ -134,6 +146,13 @@ private boolean isNotEndMultiLineComment() { return NotEndMultiLineComment && !currentLine.contains(EOF); } + /** + * @return true if the current line is a single-line comment. + */ + private boolean isSingleLineComment() { + return currentLine != null && currentLine.contains("//"); + } + /** * @return processed String[] without Multi-line comment. * @throws IOException if something goes wrong during buffer line reading or string splitting.. @@ -148,12 +167,12 @@ private String[] splitMultiLineComment() throws IOException { linesDistance = findEndMultiLineComment(linesDistance); if (currentLine == null) { - output[1] = ""; + output[1] = ""; } else if (linesDistance == 0) { splitStrings = currentLine.split("\\*/"); output[0] += getValidStatementAtPosition(splitStrings, 1); } else { - splitStrings = currentLine.split("\\*/"); + splitStrings = currentLine.split("\\*/"); output[1] = getValidStatementAtPosition(splitStrings, 1); } diff --git a/src/java/Token.java b/src/java/Token.java index 1d8c88a..dded045 100644 --- a/src/java/Token.java +++ b/src/java/Token.java @@ -43,8 +43,19 @@ public Token(TokenType tokenType) { * @param obj Object to evaluate. * @return true if they're equal. */ - public boolean equals(TokenType obj) { - return this.getTokenType() == obj; + @Override + public boolean equals(Object obj) { + Token token = (Token) obj; + return this.getTokenType() == token.getTokenType(); + } + + /** + * Compare token types. + * @param tokentype + * @return true if the type of this token equals the given token type. + */ + public boolean equalsTokenType(TokenType tokentype){ + return this.getTokenType() == tokentype; } /** @@ -72,6 +83,10 @@ public TokenType getAttribute() { return attribute; } + /** + * Set the value of the field attribute. + * @param attribute value to set. + */ public void setAttribute(TokenType attribute){ this.attribute = attribute; } diff --git a/src/java/TokenGenerator.java b/src/java/TokenGenerator.java index da4330f..0fea343 100644 --- a/src/java/TokenGenerator.java +++ b/src/java/TokenGenerator.java @@ -63,11 +63,7 @@ class TokenGenerator { currentLine = lineNumberReader.readLine(); currentLinePosition = 0; lastTokenEndPosition = 0; - currentChar = currentLine.charAt(currentLinePosition); - - if (isWhitespace(currentChar)) { - advanceInput(); - } + updateCurrentChar(); } catch (FileNotFoundException ex) { System.out.println("Unable to open file '" + sourceCode + "'"); } catch (NullPointerException e) { @@ -94,7 +90,6 @@ public Token getNextToken() throws IOException { advanceInput(); token = new Token(TokenType.IDENTIFIER); - verifyKeywords(token); } else if (isDigit(currentChar)) { advanceInput(); while (isDigit(currentChar)) { @@ -208,6 +203,9 @@ public Token getNextToken() throws IOException { } updateLexeme(); token.setLexeme(lastLexeme); + if (token.equalsTokenType(TokenType.IDENTIFIER)) { + verifyKeywords(token); + } return token; } @@ -222,7 +220,6 @@ private void updateLexeme() { else lastLexeme = "" + lastLineEndChar; - } /** @@ -239,8 +236,15 @@ private void advanceInput() throws IOException { currentLine = lineNumberReader.readLine(); currentLinePosition = 0; } - currentChar = currentLine.charAt(currentLinePosition); + updateCurrentChar(); + } + private void updateCurrentChar() { + if (currentLine != null && currentLine.isEmpty()) { + currentChar = ' '; + } else { + currentChar = currentLine.charAt(currentLinePosition); + } } /** diff --git a/src/java/TokenType.java b/src/java/TokenType.java index 3074ae6..77a1392 100644 --- a/src/java/TokenType.java +++ b/src/java/TokenType.java @@ -1,5 +1,3 @@ -import com.sun.org.apache.bcel.internal.generic.IF_ACMPEQ; - /** * Enum used to group all possible token types. * From a8b7d54994576df34e194097cc457f16f740e092 Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Fri, 17 May 2019 13:13:18 -0400 Subject: [PATCH 18/22] Change main class to receive a file from the command line. --- src/java/Main.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/java/Main.java b/src/java/Main.java index df9c692..852b263 100644 --- a/src/java/Main.java +++ b/src/java/Main.java @@ -9,15 +9,19 @@ class Main { public static void main(String[] args) throws IOException { - File filePath = openFile("bin/test"); + if (args.length > 0) { + File filePath = openFile(args[0]); - filePath = openFile(preprocessFile(filePath)); + filePath = openFile(preprocessFile(filePath)); - runTest(filePath); + runTest(filePath); - LexicalError.computeErrorLog(); + LexicalError.computeErrorLog(); - System.out.println("Process terminated."); + System.out.println("Process terminated."); + } else { + System.out.println("Please, insert a valid file path."); + } } /** From ba1fbebf06176d4993d4a2bde8800ab35863d981 Mon Sep 17 00:00:00 2001 From: LeoRocha Date: Fri, 24 May 2019 19:23:29 -0400 Subject: [PATCH 19/22] Change file structure with packages. --- .idea/codeStyles/codeStyleConfig.xml | 5 +++++ .idea/runConfigurations/XPPCompiler.xml | 2 +- src/java/.classpath | 6 ------ src/java/.project | 17 ----------------- src/java/{ => core}/Main.java | 11 +++++++++-- src/java/{ => core}/Parser.java | 2 ++ src/java/{ => core}/Token.java | 4 +++- src/java/{ => core}/TokenGenerator.java | 6 +++++- src/java/{ => core}/TokenType.java | 2 ++ src/java/{ => utils}/LexicalError.java | 4 +++- src/java/{ => utils}/Preprocessor.java | 6 ++++-- 11 files changed, 34 insertions(+), 31 deletions(-) create mode 100644 .idea/codeStyles/codeStyleConfig.xml delete mode 100644 src/java/.classpath delete mode 100644 src/java/.project rename src/java/{ => core}/Main.java (92%) rename src/java/{ => core}/Parser.java (98%) rename src/java/{ => core}/Token.java (96%) rename src/java/{ => core}/TokenGenerator.java (99%) rename src/java/{ => core}/TokenType.java (99%) rename src/java/{ => utils}/LexicalError.java (95%) rename src/java/{ => utils}/Preprocessor.java (98%) diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/runConfigurations/XPPCompiler.xml b/.idea/runConfigurations/XPPCompiler.xml index ac3582f..93a6ef6 100644 --- a/.idea/runConfigurations/XPPCompiler.xml +++ b/.idea/runConfigurations/XPPCompiler.xml @@ -1,7 +1,7 @@