/*
 * Decompiled with CFR 0.152.
 */
package org.corpus_tools.peppermodules.nlpModules;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.corpus_tools.pepper.common.DOCUMENT_STATUS;
import org.corpus_tools.pepper.impl.PepperManipulatorImpl;
import org.corpus_tools.pepper.impl.PepperMapperImpl;
import org.corpus_tools.pepper.modules.PepperMapper;
import org.corpus_tools.pepper.modules.exceptions.PepperModuleDataException;
import org.corpus_tools.peppermodules.nlpModules.LemmatizerProperties;
import org.corpus_tools.salt.SaltFactory;
import org.corpus_tools.salt.common.SDocumentGraph;
import org.corpus_tools.salt.common.SToken;
import org.corpus_tools.salt.core.SAnnotation;
import org.corpus_tools.salt.core.SNode;
import org.corpus_tools.salt.graph.Identifier;
import org.eclipse.emf.common.util.URI;
import org.osgi.service.component.annotations.Component;

@Component(name="LemmatizerComponent", factory="PepperManipulatorComponentFactory")
public class Lemmatizer
extends PepperManipulatorImpl {
    public Lemmatizer() {
        this.setName("Lemmatizer");
        this.setSupplierContact(URI.createURI((String)"saltnpepper@lists.hu-berlin.de"));
        this.setSupplierHomepage(URI.createURI((String)"https://github.com/korpling/pepperModules-nlpModules"));
        this.setDesc("The lemmatizer is a Pepper module to lemmatize tokens. By default English lemmatization is supported.");
        this.setProperties(new LemmatizerProperties());
    }

    public PepperMapper createPepperMapper(Identifier sElementId) {
        LemmaMapper mapper = new LemmaMapper();
        return mapper;
    }

    public static class LemmaMapper
    extends PepperMapperImpl {
        private HashMap<String, String> wordPos2Lemma;
        private boolean userLexicon = false;

        protected void initialize() {
            InputStream is = null;
            is = ((Object)((Object)this)).getClass().getResourceAsStream("/en-lemmatizer.dict.txt");
            InputStreamReader isr = new InputStreamReader(is);
            this.wordPos2Lemma = this.getLexicon(isr);
        }

        public DOCUMENT_STATUS mapSDocument() {
            String lang = "en";
            String posAnno = (String)this.getProperties().getProperties().getOrDefault((Object)"lemmatizer.posAnno", "pos");
            boolean noLower = Boolean.valueOf(this.getProperties().getProperties().getProperty("lemmatizer.noLower"));
            boolean allowUnknown = Boolean.valueOf(this.getProperties().getProperties().getProperty("lemmatizer.allowUnknown"));
            boolean makeUnknownLower = Boolean.valueOf(this.getProperties().getProperties().getProperty("lemmatizer.makeUnknownLower"));
            String unknownString = (String)this.getProperties().getProperties().getOrDefault((Object)"lemmatizer.unknownString", (Object)null);
            String lemmaName = (String)this.getProperties().getProperties().getOrDefault((Object)"lemmatizer.lemmaName", "lemma");
            String lemmaNamespace = (String)this.getProperties().getProperties().getOrDefault((Object)"lemmatizer.lemmaNamespace", "default_ns");
            SDocumentGraph graph = this.getDocument().getDocumentGraph();
            String lexiconFile = (String)this.getProperties().getProperties().getOrDefault((Object)"lemmatizer.lexiconFile", (Object)null);
            if (lexiconFile != null && !this.userLexicon) {
                FileInputStream is;
                try {
                    is = new FileInputStream(lexiconFile);
                }
                catch (FileNotFoundException e) {
                    throw new PepperModuleDataException((PepperMapper)this, "Lexicon file not found: " + lexiconFile);
                }
                InputStreamReader isr = new InputStreamReader(is);
                this.wordPos2Lemma = this.getLexicon(isr);
                this.userLexicon = true;
            }
            if (this.getDocument().getDocumentGraph() != null && graph.getTextualDSs().size() > 0) {
                List tokens = this.getDocument().getDocumentGraph().getTokens();
                for (SToken tok : tokens) {
                    String pos = null;
                    Set tok_annos = tok.getAnnotations();
                    for (SAnnotation anno : tok_annos) {
                        if (!posAnno.equals(anno.getName())) continue;
                        pos = anno.getValue_STEXT();
                    }
                    String word = "";
                    word = graph.getText((SNode)tok);
                    String lemma = null;
                    if (pos != null) {
                        if (this.wordPos2Lemma.containsKey(word + "\t" + pos)) {
                            lemma = this.wordPos2Lemma.get(word + "\t" + pos);
                        } else if (!noLower && this.wordPos2Lemma.containsKey(word.toLowerCase() + "\t" + pos)) {
                            lemma = this.wordPos2Lemma.get(word.toLowerCase() + "\t" + pos);
                        }
                    }
                    if (lemma == null) {
                        if (this.wordPos2Lemma.containsKey(word)) {
                            lemma = this.wordPos2Lemma.get(word);
                        } else if (!noLower && this.wordPos2Lemma.containsKey(word.toLowerCase())) {
                            lemma = this.wordPos2Lemma.get(word.toLowerCase());
                        }
                    }
                    if (lemma == null && !allowUnknown && !makeUnknownLower) {
                        lemma = word;
                    } else if (lemma == null && !allowUnknown) {
                        lemma = word.toLowerCase();
                    }
                    if (unknownString != null) {
                        lemma = unknownString;
                    }
                    if (lemma == null) continue;
                    SAnnotation lemmaAnno = SaltFactory.createSAnnotation();
                    lemmaAnno.setName(lemmaName);
                    lemmaAnno.setNamespace(lemmaNamespace);
                    lemmaAnno.setValue((Object)lemma);
                    tok.addAnnotation(lemmaAnno);
                }
            }
            return DOCUMENT_STATUS.COMPLETED;
        }

        private HashMap<String, String> getLexicon(InputStreamReader strm) {
            this.wordPos2Lemma = new HashMap();
            BufferedReader reader = new BufferedReader(strm);
            try {
                String line;
                while ((line = reader.readLine()) != null) {
                    String word;
                    String[] parts = line.split("\t");
                    if (parts.length > 2) {
                        word = parts[0];
                        String pos = parts[1];
                        String lemma = parts[2];
                        this.wordPos2Lemma.put(word + "\t" + pos, lemma);
                        this.wordPos2Lemma.put(word, lemma);
                        continue;
                    }
                    if (parts.length > 1) {
                        word = parts[0];
                        String lemma = parts[1];
                        this.wordPos2Lemma.put(word, lemma);
                        continue;
                    }
                    System.out.println("Lexicon warning - ignoring line: " + line);
                }
            }
            catch (IOException ex) {
                Logger.getLogger(Lemmatizer.class.getName()).log(Level.SEVERE, null, ex);
            }
            try {
                reader.close();
            }
            catch (IOException ex) {
                Logger.getLogger(Lemmatizer.class.getName()).log(Level.SEVERE, null, ex);
            }
            return this.wordPos2Lemma;
        }
    }
}

