/*
 * Decompiled with CFR 0.152.
 */
package org.corpus_tools.peppermodules.treetagger.model.serialization.deserializer;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.corpus_tools.pepper.modules.exceptions.PepperModuleException;
import org.corpus_tools.peppermodules.treetagger.model.AnnotatableElement;
import org.corpus_tools.peppermodules.treetagger.model.Annotation;
import org.corpus_tools.peppermodules.treetagger.model.Document;
import org.corpus_tools.peppermodules.treetagger.model.Span;
import org.corpus_tools.peppermodules.treetagger.model.Token;
import org.corpus_tools.peppermodules.treetagger.model.TreetaggerFactory;
import org.corpus_tools.peppermodules.treetagger.model.impl.Treetagger;
import org.corpus_tools.peppermodules.treetagger.model.serialization.deserializer.XMLUtils;
import org.eclipse.emf.common.util.URI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Deserializer {
    public static final String DEFAULT_ANNOTATION_NAME = "anyAnno";
    public static final String COLUMN_SEPARATOR = "\t";
    public static final String COLUMN_TOKEN_TEXT = "pos";
    public static final String COLUMN_POS = "pos";
    public static final String COLUMN_LEMMA = "lemma";
    private static final Logger logger = LoggerFactory.getLogger(Deserializer.class);
    private static final Character utf8BOM = new Character('\ufeff');
    private String fileEncoding = "UTF-8";
    private String metaTagName = "meta";
    private URI location = null;
    private List<Document> documents = new ArrayList<Document>();
    private Document currentDocument = null;
    private List<Span> openSpans = new ArrayList<Span>();
    int lineNumber = 0;
    private boolean documentTagIsOpen = false;
    List<Integer> rowsWithTooMuchColumns = new ArrayList<Integer>();
    List<Integer> rowsWithTooLessColumns = new ArrayList<Integer>();
    List<String> columnNames = new ArrayList<String>();

    Deserializer() {
        this.setDefaultColumnNames();
    }

    public void setDefaultColumnNames() {
        this.setColumnNames(Arrays.asList("pos", "pos", COLUMN_LEMMA));
    }

    public void setMetaTagName(String metaTagName) {
        this.metaTagName = metaTagName;
    }

    public void setFileEncoding(String fileEncoding) {
        this.fileEncoding = fileEncoding;
    }

    public void setColumnNames(List<String> columnNames) {
        this.columnNames = columnNames;
        if (this.columnNames == null) {
            this.columnNames = new ArrayList<String>();
        }
    }

    public void setLocation(URI location) {
        this.location = location;
    }

    public List<Document> deserialize() {
        if (this.location == null) {
            throw new PepperModuleException("Cannot load any resource, because no uri is given.");
        }
        try (BufferedReader fileReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(this.location.toFileString()), this.fileEncoding));){
            String line = null;
            this.lineNumber = 1;
            while ((line = fileReader.readLine()) != null) {
                this.mapLine(line, this.lineNumber);
                ++this.lineNumber;
            }
            this.endDocument();
        }
        catch (IOException e) {
            throw new PepperModuleException("Cannot read treetagger file '" + this.location + "'. ", (Throwable)e);
        }
        this.setAllDocumentNames();
        if (this.rowsWithTooLessColumns.size() > 0) {
            logger.warn(String.format("%s rows in input file had less data columns than expected! (Rows %s)", this.rowsWithTooLessColumns.size(), this.rowsWithTooLessColumns.toString()));
        }
        if (this.rowsWithTooMuchColumns.size() > 0) {
            logger.warn(String.format("%s rows in input file had more data columns than expected! Additional data was ignored! (Rows %s)", this.rowsWithTooMuchColumns.size(), this.rowsWithTooMuchColumns.toString()));
        }
        return this.documents;
    }

    private String extractDocumentName(URI location) {
        String documentBaseName = "";
        documentBaseName = location.lastSegment() != null && location.lastSegment().contains(".") ? location.lastSegment().substring(0, location.lastSegment().lastIndexOf(46)) : location.lastSegment();
        return documentBaseName;
    }

    private void mapLine(String line, long lineNr) {
        if (line.trim().length() == 0) {
            return;
        }
        if (!XMLUtils.isProcessingInstructionTag(line = this.removeBOM(line))) {
            if (XMLUtils.isStartTag(line)) {
                String startTagName = XMLUtils.extractTagName(line);
                if (startTagName.equalsIgnoreCase(this.metaTagName)) {
                    this.beginDocument(line);
                } else {
                    this.beginSpan(startTagName, line);
                }
            } else if (XMLUtils.isEndTag(line)) {
                String endTagName = XMLUtils.extractTagName(line);
                if (endTagName.equalsIgnoreCase(this.metaTagName)) {
                    this.documentTagIsOpen = false;
                    this.endDocument();
                } else {
                    this.endSpan(endTagName);
                }
            } else {
                if (this.currentDocument == null) {
                    this.beginDocument(null);
                }
                Token token = this.createTokenFromLine(line, lineNr);
                this.connectTokenWithOpenSpans(token);
                this.currentDocument.getTokens().add(token);
            }
        }
    }

    private String removeBOM(String line) {
        if (this.lineNumber == 0 && line.startsWith(utf8BOM.toString())) {
            line = line.substring(utf8BOM.toString().length());
            logger.trace("recognised BOM and ignored for file '" + this.location + "'");
        }
        return line;
    }

    private void setAllDocumentNames() {
        String documentName = this.extractDocumentName(this.location);
        if (this.documents.size() == 1) {
            this.documents.get(0).setName(documentName);
        } else {
            int numberOfDocuments = 1;
            for (Document document : this.documents) {
                document.setName(documentName + "_" + numberOfDocuments);
                ++numberOfDocuments;
            }
        }
    }

    private void beginDocument(String startTag) {
        if (this.currentDocument != null) {
            this.endDocument();
        }
        this.currentDocument = TreetaggerFactory.eINSTANCE.createDocument();
        boolean bl = this.documentTagIsOpen = startTag != null;
        if (this.documentTagIsOpen) {
            this.addAttributesAsAnnotations(startTag, this.currentDocument);
        }
    }

    private void endDocument() {
        if (this.currentDocument != null) {
            if (!this.openSpans.isEmpty()) {
                String openSpanNames = "";
                for (int spanIndex = 0; spanIndex < this.openSpans.size(); ++spanIndex) {
                    Token token;
                    Span span = this.openSpans.get(spanIndex);
                    openSpanNames = openSpanNames + ",</" + span.getName() + ">";
                    for (int tokenIndex = span.getTokens().size() - 1; tokenIndex >= 0 && (token = span.getTokens().get(tokenIndex)).getSpans().contains(span); --tokenIndex) {
                        token.getSpans().remove(span);
                    }
                }
                logger.warn(String.format("input file '%s' (line %d): missing end tag(s) '%s'. tag(s) will be ignored!", this.location.lastSegment(), this.lineNumber, openSpanNames.substring(1)));
            }
            if (this.documentTagIsOpen) {
                logger.warn(String.format("input file '%s' (line %d): missing document end tag. document will be ignored!", this.location.lastSegment(), this.lineNumber));
            } else {
                this.documents.add(this.currentDocument);
            }
            this.currentDocument = null;
            this.documentTagIsOpen = false;
        }
        this.openSpans.clear();
    }

    private void addAttributesAsAnnotations(String tag, AnnotatableElement annotatableElement) {
        Map<String, String> attributeValuePairs = XMLUtils.extractAttributeValuePairs(tag);
        for (Map.Entry<String, String> attributeValuePair : attributeValuePairs.entrySet()) {
            Annotation annotation = TreetaggerFactory.eINSTANCE.createAnnotation(attributeValuePair.getKey(), attributeValuePair.getValue().replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&"));
            annotatableElement.getAnnotations().add(annotation);
        }
    }

    private void beginSpan(String spanName, String startTag) {
        if (this.currentDocument == null) {
            this.beginDocument(null);
        }
        Span span = TreetaggerFactory.eINSTANCE.createSpan();
        this.openSpans.add(0, span);
        span.setName(spanName);
        this.addAttributesAsAnnotations(startTag, span);
    }

    private void endSpan(String spanName) {
        if (this.currentDocument == null) {
            logger.warn(String.format("input file '%s' (line '%d'): end tag '</%s>' out of nowhere. tag will be ignored!", this.location.lastSegment(), this.lineNumber, spanName));
            return;
        }
        boolean matchingStartTagExists = false;
        for (int i = 0; i < this.openSpans.size(); ++i) {
            Span openSpan = this.openSpans.get(i);
            if (!openSpan.getName().equalsIgnoreCase(spanName)) continue;
            matchingStartTagExists = true;
            if (openSpan.getTokens().isEmpty()) {
                logger.warn(String.format("input file '%s' (line %d): no tokens contained in span '<%s>'. span will be ignored!", this.location.lastSegment(), this.lineNumber, openSpan.getName()));
            }
            this.openSpans.remove(i);
            break;
        }
        if (!matchingStartTagExists) {
            logger.warn(String.format("input file '%s' (line %d): no corresponding opening tag found for end tag '</%s>'. tag will be ignored!", this.location.lastSegment(), this.lineNumber, spanName));
        }
    }

    private Token createTokenFromLine(String line, long lineNr) {
        String[] tuple = line.split(COLUMN_SEPARATOR);
        this.doesTupleHasExpectedNumOfColumns(tuple);
        Token token = Treetagger.buildToken().withLine(lineNr).withText(tuple[0].trim()).build();
        this.createAnnotationsForToken(token, tuple);
        return token;
    }

    void doesTupleHasExpectedNumOfColumns(String ... tuple) {
        if (tuple.length > this.columnNames.size()) {
            this.rowsWithTooMuchColumns.add(this.lineNumber);
        } else if (tuple.length < this.columnNames.size()) {
            this.rowsWithTooLessColumns.add(this.lineNumber);
        }
    }

    void connectTokenWithOpenSpans(Token token) {
        for (Span span : this.openSpans) {
            token.getSpans().add(span);
            span.getTokens().add(token);
        }
    }

    void createAnnotationsForToken(Token token, String ... tuple) {
        for (int columnNumber = 1; columnNumber < tuple.length; ++columnNumber) {
            Annotation anno = TreetaggerFactory.eINSTANCE.createAnnotation(this.findColumnName(columnNumber), tuple[columnNumber].trim());
            token.getAnnotations().add(anno);
        }
    }

    String findColumnName(int colNumber) {
        String annoName = colNumber >= this.columnNames.size() ? DEFAULT_ANNOTATION_NAME : this.columnNames.get(colNumber);
        return annoName;
    }

    public static class Builder {
        private Deserializer deserializer = new Deserializer();

        public Builder withColumnNames(List<String> columnNames) {
            this.deserializer.setColumnNames(columnNames);
            return this;
        }

        public Builder withFileEncoding(String fileEncoding) {
            this.deserializer.setFileEncoding(fileEncoding);
            return this;
        }

        public Builder withMetaTagName(String metaTagName) {
            this.deserializer.setMetaTagName(metaTagName);
            return this;
        }

        public List<Document> from(URI location) {
            this.deserializer.setLocation(location);
            return this.deserializer.deserialize();
        }
    }
}

