/*
 * Decompiled with CFR 0.152.
 */
package org.corpus_tools.peppermodules.nlpModules;

import com.neovisionaries.i18n.LanguageCode;
import java.util.HashSet;
import java.util.List;
import org.corpus_tools.pepper.common.DOCUMENT_STATUS;
import org.corpus_tools.pepper.impl.PepperManipulatorImpl;
import org.corpus_tools.pepper.impl.PepperMapperImpl;
import org.corpus_tools.pepper.modules.PepperMapper;
import org.corpus_tools.salt.common.SSequentialDS;
import org.corpus_tools.salt.common.STextualDS;
import org.corpus_tools.salt.common.tokenizer.AbbreviationDE;
import org.corpus_tools.salt.common.tokenizer.AbbreviationEN;
import org.corpus_tools.salt.common.tokenizer.AbbreviationFR;
import org.corpus_tools.salt.common.tokenizer.AbbreviationIT;
import org.corpus_tools.salt.common.tokenizer.Tokenizer;
import org.corpus_tools.salt.graph.Identifier;
import org.corpus_tools.salt.util.DataSourceSequence;
import org.eclipse.emf.common.util.URI;
import org.osgi.service.component.annotations.Component;

@Component(name="SentencerComponent", factory="PepperManipulatorComponentFactory")
public class Sentencer
extends PepperManipulatorImpl {
    public Sentencer() {
        this.setName("Sentencer");
        this.setSupplierContact(URI.createURI((String)"saltnpepper@lists.hu-berlin.de"));
        this.setSupplierHomepage(URI.createURI((String)"https://github.com/korpling/pepperModules-nlpModules"));
        this.setDesc("The sentencer is a Pepper module to bundle tokens to sentences. Therefore it creates a {@link SSpan} object for each sentence and connects that sentence with a set of tokens, belonging to the sentence. A sentence is identified as being determined by punctuations ('.', '!' and * '?'). The sentencer uses the abbreviation lists of Salt to identify abbreviations. ");
    }

    public PepperMapper createPepperMapper(Identifier sElementId) {
        SentenceMapper mapper = new SentenceMapper();
        return mapper;
    }

    public static class SentenceMapper
    extends PepperMapperImpl {
        public DOCUMENT_STATUS mapSDocument() {
            if (this.getDocument().getDocumentGraph() != null && this.getDocument().getDocumentGraph().getTextualDSs().size() > 0) {
                for (STextualDS textualDS : this.getDocument().getDocumentGraph().getTextualDSs()) {
                    if (textualDS.getText() == null || textualDS.getText().isEmpty()) continue;
                    char[] text = textualDS.getText().toCharArray();
                    LanguageCode language = Tokenizer.checkLanguage((String)textualDS.getText());
                    HashSet abbreviations = null;
                    if (LanguageCode.de.equals((Object)language)) {
                        abbreviations = AbbreviationDE.createAbbriviations();
                    } else if (LanguageCode.en.equals((Object)language)) {
                        abbreviations = AbbreviationEN.createAbbriviations();
                    } else if (LanguageCode.fr.equals((Object)language)) {
                        abbreviations = AbbreviationFR.createAbbriviations();
                    } else if (LanguageCode.it.equals((Object)language)) {
                        abbreviations = AbbreviationIT.createAbbriviations();
                    }
                    int startOfSentence = 0;
                    StringBuilder word = new StringBuilder();
                    for (int i = 0; i <= textualDS.getText().length() - 1; ++i) {
                        word.append(text[i]);
                        if (' ' == text[i]) {
                            word = new StringBuilder();
                            continue;
                        }
                        if ('.' != text[i] && '!' != text[i] && '?' != text[i] || abbreviations.contains(word.toString())) continue;
                        DataSourceSequence sequence = new DataSourceSequence();
                        sequence.setDataSource((SSequentialDS)textualDS);
                        sequence.setStart((Number)startOfSentence);
                        sequence.setEnd((Number)(i + 1));
                        List tokens = this.getDocument().getDocumentGraph().getTokensBySequence(sequence);
                        if (tokens == null) continue;
                        this.getDocument().getDocumentGraph().createSpan(tokens).createAnnotation(null, "sentence", (Object)"sentence");
                        startOfSentence = i + 1;
                        word = new StringBuilder();
                    }
                }
            }
            return DOCUMENT_STATUS.COMPLETED;
        }
    }
}

