package de.linguatools.disco;

import ch.qos.logback.classic.net.SyslogAppender;
import de.linguatools.disco.DISCO;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.lang.CharEncoding;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.RAMDirectory;

/* loaded from: input_file:de/linguatools/disco/DISCOLuceneIndex.class */
public class DISCOLuceneIndex extends DISCO {
    public String indexDir;
    private final ConfigFile config;
    public RAMDirectory indexRAM;
    private IndexSearcher is;
    private Analyzer analyzer;
    private QueryParser parser;
    private final DISCO.WordspaceType wordspaceType;

    /* loaded from: input_file:de/linguatools/disco/DISCOLuceneIndex$VocabularyIterator.class */
    class VocabularyIterator implements Iterator<String> {
        private int i;
        private final int N;
        private final IndexReader ir;

        public VocabularyIterator() throws IOException {
            if (DISCOLuceneIndex.this.indexRAM != null) {
                this.ir = DirectoryReader.open(DISCOLuceneIndex.this.indexRAM);
            } else {
                this.ir = DirectoryReader.open(FSDirectory.open(Paths.get(DISCOLuceneIndex.this.indexDir, new String[0])));
            }
            this.N = this.ir.numDocs();
            this.i = 0;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.i < this.N;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            int i = this.i;
            this.i++;
            try {
                return this.ir.document(i).get(TypeAttribute.DEFAULT_TYPE);
            } catch (IOException e) {
                System.err.println(DISCOLuceneIndex.class.getName() + ": word " + i + ": " + e);
                return "";
            }
        }

        @Override // java.util.Iterator
        public void remove() {
        }
    }

    public DISCOLuceneIndex(String str, boolean z) throws FileNotFoundException, CorruptIndexException, IOException, CorruptConfigFileException {
        this.indexDir = null;
        this.indexRAM = null;
        this.is = null;
        this.analyzer = null;
        this.parser = null;
        this.indexDir = str;
        Path path = Paths.get(this.indexDir, new String[0]);
        this.analyzer = new WhitespaceAnalyzer();
        this.parser = new QueryParser(TypeAttribute.DEFAULT_TYPE, this.analyzer);
        if (z) {
            this.indexRAM = new RAMDirectory(FSDirectory.open(path), new IOContext());
            this.is = new IndexSearcher(DirectoryReader.open(this.indexRAM));
        } else {
            this.is = new IndexSearcher(DirectoryReader.open(FSDirectory.open(path)));
        }
        this.config = new ConfigFile(this.indexDir);
        if (this.config.dontCompute2ndOrder) {
            this.wordspaceType = DISCO.WordspaceType.COL;
        } else {
            this.wordspaceType = DISCO.WordspaceType.SIM;
        }
    }

    @Override // de.linguatools.disco.DISCO
    public DISCO.WordspaceType getWordspaceType() {
        return this.wordspaceType;
    }

    @Override // de.linguatools.disco.DISCO
    public int numberOfWords() {
        return this.config.vocabularySize;
    }

    @Override // de.linguatools.disco.DISCO
    public int numberOfFeatureWords() {
        return this.config.numberFeatureWords;
    }

    @Override // de.linguatools.disco.DISCO
    public int numberOfSimilarWords() {
        if (this.wordspaceType == DISCO.WordspaceType.COL) {
            return 0;
        }
        return this.config.numberOfSimilarWords;
    }

    @Override // de.linguatools.disco.DISCO
    public int frequency(String str) throws IOException {
        Document searchIndex = searchIndex(str);
        if (searchIndex == null) {
            return 0;
        }
        return Integer.parseInt(searchIndex.get("freq"));
    }

    @Override // de.linguatools.disco.DISCO
    public ReturnDataBN similarWords(String str) throws IOException, WrongWordspaceTypeException {
        if (this.wordspaceType != DISCO.WordspaceType.SIM) {
            throw new WrongWordspaceTypeException("This method can not be applied to word spaces of type " + this.wordspaceType);
        }
        Document searchIndex = searchIndex(str);
        if (searchIndex == null) {
            return null;
        }
        ReturnDataBN returnDataBN = new ReturnDataBN();
        String str2 = searchIndex.get("dsb");
        if (str2 == null) {
            return returnDataBN;
        }
        returnDataBN.words = str2.split(ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
        returnDataBN.values = new float[returnDataBN.words.length];
        int i = 0;
        for (String str3 : searchIndex.get("dsbSim").split(ShingleFilter.DEFAULT_TOKEN_SEPARATOR)) {
            if (i >= returnDataBN.values.length) {
                break;
            }
            returnDataBN.values[i] = Float.parseFloat(str3);
            i++;
        }
        return returnDataBN;
    }

    @Override // de.linguatools.disco.DISCO
    public float semanticSimilarity(String str, String str2, VectorSimilarity vectorSimilarity) throws IOException {
        Document searchIndex = searchIndex(str);
        Document searchIndex2 = searchIndex(str2);
        if (searchIndex == null || searchIndex2 == null) {
            return -2.0f;
        }
        return (float) vectorSimilarity.computeSimilarity(searchIndex, searchIndex2);
    }

    @Override // de.linguatools.disco.DISCO
    public float secondOrderSimilarity(String str, String str2, VectorSimilarity vectorSimilarity) throws IOException, WrongWordspaceTypeException {
        if (this.wordspaceType != DISCO.WordspaceType.SIM) {
            throw new WrongWordspaceTypeException("This method can not be appliedto word spaces of type " + this.wordspaceType);
        }
        Document searchIndex = searchIndex(str);
        Document searchIndex2 = searchIndex(str2);
        if (searchIndex == null || searchIndex2 == null) {
            return -2.0f;
        }
        return (float) vectorSimilarity.computeSimilarity(SparseVector.getSecondOrderMapVector(searchIndex), SparseVector.getSecondOrderMapVector(searchIndex2));
    }

    @Override // de.linguatools.disco.DISCO
    public Map<String, Float> getWordvector(String str) throws IOException {
        Document searchIndex = searchIndex(str);
        if (searchIndex == null) {
            return null;
        }
        HashMap hashMap = new HashMap();
        String[] split = searchIndex.get("kol").split(ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
        String[] split2 = searchIndex.get("kolSig").split(ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
        for (int i = 0; i < split.length; i++) {
            hashMap.put(split[i], Float.valueOf(Float.parseFloat(split2[i])));
        }
        return hashMap;
    }

    @Override // de.linguatools.disco.DISCO
    public Map<String, Float> getSecondOrderWordvector(String str) throws WrongWordspaceTypeException, IOException {
        if (this.wordspaceType != DISCO.WordspaceType.SIM) {
            throw new WrongWordspaceTypeException("This method can not be appliedto word spaces of type " + this.wordspaceType);
        }
        Document searchIndex = searchIndex(str);
        if (searchIndex == null) {
            return null;
        }
        return SparseVector.getSecondOrderMapVector(searchIndex);
    }

    @Override // de.linguatools.disco.DISCO
    public int wordFrequencyList(String str) {
        try {
            DirectoryReader open = this.indexRAM != null ? DirectoryReader.open(this.indexRAM) : DirectoryReader.open(FSDirectory.open(Paths.get(this.indexDir, new String[0])));
            int numDocs = open.numDocs();
            try {
                PrintWriter printWriter = new PrintWriter(str, CharEncoding.UTF_8);
                int i = 0;
                int i2 = 0;
                int i3 = 0;
                while (i3 < numDocs) {
                    try {
                        Document document = open.document(i3);
                        printWriter.write(document.get(TypeAttribute.DEFAULT_TYPE) + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + Integer.parseInt(document.get("freq")) + "\n");
                        if (i3 % 100 == 0) {
                            System.out.print("\r" + i3);
                        }
                    } catch (CorruptIndexException e) {
                        i++;
                    } catch (IOException e2) {
                        i2++;
                    }
                    i3++;
                }
                System.out.println();
                if (i > 0 || i2 > 0) {
                    System.out.println("*** WARNING! ***");
                    System.out.println("The language data packet \"" + this.indexDir + "\" has " + (i + i2) + " defect entries (" + i + " corrupt, " + i2 + " IO errors)");
                    System.out.println("All functioning words have been written to " + str);
                }
                try {
                    printWriter.close();
                    open.close();
                    return (i3 - i) - i2;
                } catch (IOException e3) {
                    System.out.println(DISCOLuceneIndex.class.getName() + ": " + e3);
                    return -1;
                }
            } catch (IOException e4) {
                System.out.println(DISCOLuceneIndex.class.getName() + ": " + e4);
                return -1;
            }
        } catch (CorruptIndexException e5) {
            System.out.println(DISCOLuceneIndex.class.getName() + ": " + e5);
            return -1;
        } catch (IOException e6) {
            System.out.println(DISCOLuceneIndex.class.getName() + ": " + e6);
            return -1;
        }
    }

    @Override // de.linguatools.disco.DISCO
    public String[] getStopwords() throws FileNotFoundException, IOException, CorruptConfigFileException {
        String trim;
        String str = this.indexDir + File.separator + "disco.config";
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str)), CharEncoding.UTF_8));
        Throwable th = null;
        do {
            try {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        if (bufferedReader != null) {
                            if (0 != 0) {
                                try {
                                    bufferedReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                bufferedReader.close();
                            }
                        }
                        throw new CorruptConfigFileException("ERROR: the stopwords could not be determined from the file " + str);
                    }
                    trim = readLine.trim();
                } finally {
                }
            } catch (Throwable th3) {
                if (bufferedReader != null) {
                    if (th != null) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                throw th3;
            }
        } while (!trim.startsWith("stopwords="));
        String[] split = trim.substring(10).trim().split("\\s+");
        if (bufferedReader != null) {
            if (0 != 0) {
                try {
                    bufferedReader.close();
                } catch (Throwable th5) {
                    th.addSuppressed(th5);
                }
            } else {
                bufferedReader.close();
            }
        }
        return split;
    }

    @Override // de.linguatools.disco.DISCO
    public long getTokenCount() {
        return this.config.tokencount;
    }

    @Override // de.linguatools.disco.DISCO
    public int getMinFreq() {
        return this.config.minFreq;
    }

    @Override // de.linguatools.disco.DISCO
    public int getMaxFreq() {
        return this.config.maxFreq;
    }

    @Override // de.linguatools.disco.DISCO
    public Iterator<String> getVocabularyIterator() throws IOException {
        return new VocabularyIterator();
    }

    @Override // de.linguatools.disco.DISCO
    public String getWord(int i) throws IOException {
        DirectoryReader open = this.indexRAM != null ? DirectoryReader.open(this.indexRAM) : DirectoryReader.open(FSDirectory.open(Paths.get(this.indexDir, new String[0])));
        if (i >= open.numDocs()) {
            return null;
        }
        try {
            Document document = open.document(i);
            open.close();
            return document.get(TypeAttribute.DEFAULT_TYPE);
        } catch (IOException e) {
            System.err.println(DISCOLuceneIndex.class.getName() + ": word " + i + ": " + e);
            return null;
        }
    }

    public Document searchIndex(String str) throws IOException {
        try {
            TopDocs search = this.is.search(this.parser.parse(str), 1);
            if (search.totalHits == 0) {
                return null;
            }
            return this.is.doc(search.scoreDocs[0].doc);
        } catch (ParseException e) {
            return null;
        }
    }

    @Override // de.linguatools.disco.DISCO
    public ReturnDataCol[] collocations(String str) throws IOException {
        Document searchIndex = searchIndex(str);
        if (searchIndex == null) {
            return null;
        }
        HashMap hashMap = new HashMap();
        String[] split = searchIndex.get("kol").split(ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
        String[] split2 = searchIndex.get("kolSig").split(ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
        for (int i = 0; i < split.length; i++) {
            int lastIndexOf = split[i].lastIndexOf(DISCO.RELATION_SEPARATOR);
            String substring = lastIndexOf == -1 ? split[i] : split[i].substring(0, lastIndexOf);
            if (hashMap.containsKey(substring)) {
                hashMap.put(substring, Float.valueOf(Float.parseFloat(split2[i]) + ((Float) hashMap.get(substring)).floatValue()));
            } else {
                hashMap.put(substring, Float.valueOf(Float.parseFloat(split2[i])));
            }
        }
        ReturnDataCol[] returnDataColArr = new ReturnDataCol[hashMap.size()];
        int i2 = 0;
        for (String str2 : hashMap.keySet()) {
            int i3 = i2;
            i2++;
            returnDataColArr[i3] = new ReturnDataCol(str2, ((Float) hashMap.get(str2)).floatValue());
        }
        Arrays.sort(returnDataColArr);
        return returnDataColArr;
    }

    public float collocationalValue(String str, String str2) throws IOException {
        ReturnDataCol[] collocations = collocations(str);
        ReturnDataCol[] collocations2 = collocations(str2);
        float f = 0.0f;
        if (collocations != null) {
            int length = collocations.length;
            int i = 0;
            while (true) {
                if (i >= length) {
                    break;
                }
                ReturnDataCol returnDataCol = collocations[i];
                if (returnDataCol.word.equals(str2)) {
                    f = returnDataCol.value;
                    break;
                }
                i++;
            }
        }
        float f2 = 0.0f;
        if (collocations2 != null) {
            int length2 = collocations2.length;
            int i2 = 0;
            while (true) {
                if (i2 >= length2) {
                    break;
                }
                ReturnDataCol returnDataCol2 = collocations2[i2];
                if (returnDataCol2.word.equals(str)) {
                    f2 = returnDataCol2.value;
                    break;
                }
                i2++;
            }
        }
        return f > f2 ? f : f2;
    }
}
