Skip to content

PGtokenizer ParseError  #2050

@rtrier

Description

@rtrier

Describe the issue
If the String to parse contains brackets inside nestedDoubleQuote the result is not correct

Driver Version?
last
Java Version?
dosnt matter

OS Version?
dosnt matter

PostgreSQL Version?
dosnt matter

To Reproduce
Steps to reproduce the behaviour:

PGtokenizer tokenizer = new PGtokenizer(",,d,\"f(10\",\"(mime,pdf,pdf)\",test,2018-10-11,1010", ',');

for (int i=0, c = tokenizer.getSize(); i<c; i++) {
	System.out.println(i+"  "+tokenizer.getToken(i));
}

Results:

0  
1  
2  d
3  "f(10","(mime,pdf,pdf)",test,2018-10-11,1010

Expected behaviour

0  
1  
2  d
3  "f(10"
4  "(mime,pdf,pdf)"
5  test
6  2018-10-11
7  1010

** Solution **
quick

package de.gdiservice.wfs.test;

import java.util.HashMap;
import java.util.Map;
import java.util.Stack;

import org.postgresql.util.PGtokenizer;

public class Tokenizer extends PGtokenizer {
	
	static final Map<Character, Character> closing2OpeningCharacter = new HashMap<>();
	static {
		closing2OpeningCharacter.put(')', '(');
		closing2OpeningCharacter.put(']', '[');
		closing2OpeningCharacter.put('>', '<');
		closing2OpeningCharacter.put('"', '"');
	}

	public Tokenizer(String string, char delim) {
		super(string, delim);
		System.out.println(string);
	}
	
	  /**
	   * This resets this tokenizer with a new string and/or delimiter.
	   *
	   * @param string containing tokens
	   * @param delim single character to split the tokens
	   * @return number of tokens
	   */
	  public int tokenize(String string, char delim) {
	    tokens.clear();
	    
	    final Stack<Character> stack = new Stack<>();

	    // nest holds how many levels we are in the current token.
	    // if this is > 0 then we don't split a token when delim is matched.
	    //
	    // The Geometric datatypes use this, because often a type may have others
	    // (usualls PGpoint) imbedded within a token.
	    //
	    // Peter 1998 Jan 6 - Added < and > to the nesting rules
	    int nest = 0;
	    int p;
	    int s;
	    boolean skipChar = false;
	    boolean nestedDoubleQuote = false;
	    char c = (char)0;
	    for (p = 0, s = 0; p < string.length(); p++) {
	      c = string.charAt(p);

	      // increase nesting if an open character is found
	      if (c == '(' || c == '[' || c == '<' || (!nestedDoubleQuote && !skipChar && c == '"')) {
	        nest++;
	        stack.push(c);
	        if (c == '"') {
	          nestedDoubleQuote = true;
	          skipChar = true;
	        }
	      }

	      // decrease nesting if a close character is found
	      if (c == ')' || c == ']' || c == '>' || (nestedDoubleQuote && !skipChar && c == '"')) {
	    	  
	    	
	        if (c == '"') {
	        	while (stack.size()>0 && stack.peek().charValue()!='"') {  
		    		nest--;
		    		stack.pop();	
		    	}	
	        	nestedDoubleQuote = false;
	        	stack.pop();
		    	nest--;
	        } else {
	        	if (stack.size()>0 && stack.peek().charValue()==closing2OpeningCharacter.get(c).charValue()) {
	        		stack.pop();
			    	nest--;
	        	}
	        }
	      }

	      skipChar = c == '\\';

	      if (nest == 0 && c == delim) {
	        tokens.add(string.substring(s, p));
	        s = p + 1; // +1 to skip the delimiter
	      }

	    }

	    // Don't forget the last token ;-)
	    if (s < string.length()) {
	      tokens.add(string.substring(s));
	    }

	    // check for last token empty
	    if ( s == string.length() && c == delim) {
	      tokens.add("");
	    }

	    return tokens.size();
	  }		

	public static void main(String[] args) {
		
		PGtokenizer tokenizer = new Tokenizer(",,d,\"f(10\",\"(mime,pdf,pdf)\",test,2018-10-11,1010", ',');
		for (int i=0, c = tokenizer.getSize(); i<c; i++) {
			System.out.println(i+"  "+tokenizer.getToken(i));
		}
	}

}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions