/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.sequences;

import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.AbstractIterator;
import edu.stanford.nlp.util.PaddedList;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

public class CoNLLDocumentReaderAndWriter
implements DocumentReaderAndWriter<CoreLabel> {
    private static final long serialVersionUID = 6281374154299530460L;
    public static final String BOUNDARY = "*BOUNDARY*";
    public static final String OTHER = "O";
    private static final boolean TREAT_FILE_AS_ONE_DOCUMENT = false;
    private static final Pattern docPattern = Pattern.compile("^\\s*-DOCSTART-\\s");
    private static final Pattern white = Pattern.compile("^\\s*$");
    private SeqClassifierFlags flags;

    @Override
    public void init(SeqClassifierFlags flags) {
        this.flags = flags;
    }

    public String toString() {
        return "CoNLLDocumentReaderAndWriter[entitySubclassification: " + this.flags.entitySubclassification + ", intern: " + this.flags.intern + ']';
    }

    @Override
    public Iterator<List<CoreLabel>> getIterator(Reader r) {
        return new CoNLLIterator(r);
    }

    private static Iterator<String> splitIntoDocs(Reader r) {
        ArrayList<String> docs = new ArrayList<String>();
        ObjectBank<String> ob = ObjectBank.getLineIterator(r);
        StringBuilder current = new StringBuilder();
        for (String line : ob) {
            if (docPattern.matcher(line).lookingAt() && current.length() > 0) {
                docs.add(current.toString());
                current = new StringBuilder();
            }
            current.append(line);
            current.append('\n');
        }
        if (current.length() > 0) {
            docs.add(current.toString());
        }
        return docs.iterator();
    }

    private List<CoreLabel> processDocument(String doc) {
        String[] lines;
        ArrayList<CoreLabel> lis = new ArrayList<CoreLabel>();
        for (String line : lines = doc.split("\n")) {
            if (this.flags.deleteBlankLines && white.matcher(line).matches()) continue;
            lis.add(this.makeCoreLabel(line));
        }
        this.entitySubclassify(lis, this.flags.entitySubclassification);
        return lis;
    }

    private void entitySubclassify(List<CoreLabel> tokens, String style) {
        CoreLabel c;
        int i;
        int how;
        if ("iob1".equalsIgnoreCase(style)) {
            how = 0;
        } else if ("iob2".equalsIgnoreCase(style)) {
            how = 1;
        } else if ("ioe1".equalsIgnoreCase(style)) {
            how = 2;
        } else if ("ioe2".equalsIgnoreCase(style)) {
            how = 3;
        } else if ("io".equalsIgnoreCase(style)) {
            how = 4;
        } else if ("sbieo".equalsIgnoreCase(style)) {
            how = 5;
        } else {
            System.err.println("entitySubclassify: unknown style: " + style);
            how = 4;
        }
        tokens = new PaddedList<CoreLabel>(tokens, new CoreLabel());
        int k = tokens.size();
        String[] newAnswers = new String[k];
        for (i = 0; i < k; ++i) {
            c = tokens.get(i);
            CoreLabel p = tokens.get(i - 1);
            CoreLabel n = tokens.get(i + 1);
            String cAns = (String)c.get(CoreAnnotations.AnswerAnnotation.class);
            if (cAns.length() > 1 && cAns.charAt(1) == '-') {
                String nAns;
                String pAns = (String)p.get(CoreAnnotations.AnswerAnnotation.class);
                if (pAns == null) {
                    pAns = OTHER;
                }
                if ((nAns = (String)n.get(CoreAnnotations.AnswerAnnotation.class)) == null) {
                    nAns = OTHER;
                }
                String base = cAns.substring(2, cAns.length());
                String pBase = pAns.length() > 2 ? pAns.substring(2, pAns.length()) : pAns;
                String nBase = nAns.length() > 2 ? nAns.substring(2, nAns.length()) : nAns;
                char prefix = cAns.charAt(0);
                int pPrefix = pAns.length() > 0 ? (int)pAns.charAt(0) : 32;
                int nPrefix = nAns.length() > 0 ? (int)nAns.charAt(0) : 32;
                boolean isStartAdjacentSame = base.equals(pBase) && (prefix == 'B' || prefix == 'S' || pPrefix == 69 || pPrefix == 83);
                boolean isEndAdjacentSame = base.equals(nBase) && (prefix == 'E' || prefix == 'S' || nPrefix == 66 || pPrefix == 83);
                boolean isFirst = !base.equals(pBase) || cAns.charAt(0) == 'B';
                boolean isLast = !base.equals(nBase) || nAns.charAt(0) == 'B';
                switch (how) {
                    case 0: {
                        if (isStartAdjacentSame) {
                            newAnswers[i] = this.intern("B-" + base);
                            break;
                        }
                        newAnswers[i] = this.intern("I-" + base);
                        break;
                    }
                    case 1: {
                        if (isFirst) {
                            newAnswers[i] = this.intern("B-" + base);
                            break;
                        }
                        newAnswers[i] = this.intern("I-" + base);
                        break;
                    }
                    case 2: {
                        if (isEndAdjacentSame) {
                            newAnswers[i] = this.intern("E-" + base);
                            break;
                        }
                        newAnswers[i] = this.intern("I-" + base);
                        break;
                    }
                    case 3: {
                        if (isLast) {
                            newAnswers[i] = this.intern("E-" + base);
                            break;
                        }
                        newAnswers[i] = this.intern("I-" + base);
                        break;
                    }
                    case 4: {
                        newAnswers[i] = this.intern("I-" + base);
                        break;
                    }
                    case 5: {
                        newAnswers[i] = isFirst && isLast ? this.intern("S-" + base) : (!isFirst && isLast ? this.intern("E-" + base) : (isFirst && !isLast ? this.intern("B-" + base) : this.intern("I-" + base)));
                    }
                }
                continue;
            }
            newAnswers[i] = cAns;
        }
        for (i = 0; i < k; ++i) {
            c = tokens.get(i);
            c.set(CoreAnnotations.AnswerAnnotation.class, newAnswers[i]);
        }
    }

    private CoreLabel makeCoreLabel(String line) {
        CoreLabel wi = new CoreLabel();
        String[] bits = line.split("\\s+");
        switch (bits.length) {
            case 0: 
            case 1: {
                wi.setWord(BOUNDARY);
                wi.set(CoreAnnotations.AnswerAnnotation.class, OTHER);
                break;
            }
            case 2: {
                wi.setWord(bits[0]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[1]);
                break;
            }
            case 3: {
                wi.setWord(bits[0]);
                wi.setTag(bits[1]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[2]);
                break;
            }
            case 4: {
                wi.setWord(bits[0]);
                wi.setTag(bits[1]);
                wi.set(CoreAnnotations.ChunkAnnotation.class, bits[2]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[3]);
                break;
            }
            case 5: {
                if (this.flags.useLemmaAsWord) {
                    wi.setWord(bits[1]);
                } else {
                    wi.setWord(bits[0]);
                }
                wi.set(CoreAnnotations.LemmaAnnotation.class, bits[1]);
                wi.setTag(bits[2]);
                wi.set(CoreAnnotations.ChunkAnnotation.class, bits[3]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[4]);
                break;
            }
            default: {
                throw new RuntimeIOException("Unexpected input (many fields): " + line);
            }
        }
        wi.set(CoreAnnotations.OriginalAnswerAnnotation.class, wi.get(CoreAnnotations.AnswerAnnotation.class));
        return wi;
    }

    private String intern(String s) {
        if (this.flags.intern) {
            return s.intern();
        }
        return s;
    }

    private void deEndify(List<CoreLabel> tokens) {
        CoreLabel c;
        int i;
        if (this.flags.retainEntitySubclassification) {
            return;
        }
        tokens = new PaddedList<CoreLabel>(tokens, new CoreLabel());
        int k = tokens.size();
        String[] newAnswers = new String[k];
        for (i = 0; i < k; ++i) {
            c = tokens.get(i);
            CoreLabel p = tokens.get(i - 1);
            if (((String)c.get(CoreAnnotations.AnswerAnnotation.class)).length() > 1 && ((String)c.get(CoreAnnotations.AnswerAnnotation.class)).charAt(1) == '-') {
                boolean isStart;
                String base = ((String)c.get(CoreAnnotations.AnswerAnnotation.class)).substring(2);
                String pBase = ((String)p.get(CoreAnnotations.AnswerAnnotation.class)).length() <= 2 ? (String)p.get(CoreAnnotations.AnswerAnnotation.class) : ((String)p.get(CoreAnnotations.AnswerAnnotation.class)).substring(2);
                boolean isSecond = base.equals(pBase);
                boolean bl = isStart = ((String)c.get(CoreAnnotations.AnswerAnnotation.class)).charAt(0) == 'B' || ((String)c.get(CoreAnnotations.AnswerAnnotation.class)).charAt(0) == 'S';
                if (isSecond && isStart) {
                    newAnswers[i] = this.intern("B-" + base);
                    continue;
                }
                newAnswers[i] = this.intern("I-" + base);
                continue;
            }
            newAnswers[i] = (String)c.get(CoreAnnotations.AnswerAnnotation.class);
        }
        for (i = 0; i < k; ++i) {
            c = tokens.get(i);
            c.set(CoreAnnotations.AnswerAnnotation.class, newAnswers[i]);
        }
    }

    @Override
    public void printAnswers(List<CoreLabel> doc, PrintWriter out2) {
        if (!"iob1".equalsIgnoreCase(this.flags.entitySubclassification)) {
            this.deEndify(doc);
        }
        for (CoreLabel fl : doc) {
            String word = fl.word();
            if (word == BOUNDARY) {
                out2.println();
                continue;
            }
            String gold = (String)fl.get(CoreAnnotations.OriginalAnswerAnnotation.class);
            if (gold == null) {
                gold = "";
            }
            String guess = (String)fl.get(CoreAnnotations.AnswerAnnotation.class);
            String pos = fl.tag();
            String chunk = fl.get(CoreAnnotations.ChunkAnnotation.class) == null ? "" : (String)fl.get(CoreAnnotations.ChunkAnnotation.class);
            out2.println(fl.word() + '\t' + pos + '\t' + chunk + '\t' + gold + '\t' + guess);
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException {
        CoNLLDocumentReaderAndWriter f = new CoNLLDocumentReaderAndWriter();
        f.init(new SeqClassifierFlags());
        int numDocs = 0;
        int numTokens = 0;
        int numEntities = 0;
        String lastAnsBase = "";
        Iterator<List<CoreLabel>> it = f.getIterator(new FileReader(args[0]));
        while (it.hasNext()) {
            List<CoreLabel> doc = it.next();
            ++numDocs;
            for (CoreLabel fl : doc) {
                String ansPrefix;
                String ansBase;
                if (fl.word().equals(BOUNDARY)) continue;
                String ans = (String)fl.get(CoreAnnotations.AnswerAnnotation.class);
                String[] bits = ans.split("-");
                if (bits.length == 1) {
                    ansBase = bits[0];
                    ansPrefix = "";
                } else {
                    ansBase = bits[1];
                    ansPrefix = bits[0];
                }
                ++numTokens;
                if (ansBase.equals(OTHER)) continue;
                if (ansBase.equals(lastAnsBase)) {
                    if (!ansPrefix.equals("B")) continue;
                    ++numEntities;
                    continue;
                }
                ++numEntities;
            }
        }
        System.out.println("File " + args[0] + " has " + numDocs + " documents, " + numTokens + " (non-blank line) tokens and " + numEntities + " entities.");
    }

    private class CoNLLIterator
    extends AbstractIterator<List<CoreLabel>> {
        private Iterator<String> stringIter;

        public CoNLLIterator(Reader r) {
            this.stringIter = CoNLLDocumentReaderAndWriter.splitIntoDocs(r);
        }

        @Override
        public boolean hasNext() {
            return this.stringIter.hasNext();
        }

        @Override
        public List<CoreLabel> next() {
            return CoNLLDocumentReaderAndWriter.this.processDocument(this.stringIter.next());
        }
    }
}

