package de.linguatools.disco;

import de.linguatools.disco.DISCO;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang.CharEncoding;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.index.CorruptIndexException;

/* loaded from: input_file:de/linguatools/disco/Cluster.class */
public class Cluster {
    public static ReturnDataBN filterOutliers(DISCO disco, String str, int i) throws IOException, WrongWordspaceTypeException {
        if (disco.getWordspaceType() != DISCO.WordspaceType.SIM) {
            throw new WrongWordspaceTypeException("This method can not be appliedto word spaces of type " + disco.getWordspaceType());
        }
        ReturnDataBN similarWords = disco.similarWords(str);
        if (similarWords == null) {
            System.out.println("Word \"" + str + "\" not found in index.");
            return null;
        }
        HashMap hashMap = new HashMap();
        for (int i2 = 0; i2 < similarWords.words.length; i2++) {
            hashMap.put(similarWords.words[i2], 1);
            if (i2 >= i - 1) {
                break;
            }
        }
        for (int i3 = 0; i3 < similarWords.words.length; i3++) {
            ReturnDataBN similarWords2 = disco.similarWords(similarWords.words[i3]);
            if (similarWords2 != null) {
                for (String str2 : similarWords2.words) {
                    if (hashMap.containsKey(str2)) {
                        hashMap.put(str2, 2);
                    }
                }
                if (i3 >= i - 1) {
                    break;
                }
            }
        }
        int i4 = 0;
        Iterator it2 = hashMap.keySet().iterator();
        while (it2.hasNext()) {
            if (((Integer) hashMap.get((String) it2.next())).intValue() > 1) {
                i4++;
            } else {
                it2.remove();
            }
        }
        String[] strArr = new String[i4];
        float[] fArr = new float[i4];
        int i5 = 0;
        for (int i6 = 0; i6 < similarWords.words.length; i6++) {
            if (((Integer) hashMap.get(similarWords.words[i6])).intValue() > 1) {
                strArr[i5] = similarWords.words[i6];
                fArr[i5] = similarWords.values[i6];
                i5++;
            }
            if (i6 >= i - 1) {
                break;
            }
        }
        ReturnDataBN returnDataBN = new ReturnDataBN();
        returnDataBN.words = strArr;
        returnDataBN.values = fArr;
        return returnDataBN;
    }

    public static String[] growSet(DISCO disco, String[] strArr) throws IOException, WrongWordspaceTypeException {
        if (disco.getWordspaceType() != DISCO.WordspaceType.SIM) {
            throw new WrongWordspaceTypeException("This method can not be appliedto word spaces of type " + disco.getWordspaceType());
        }
        HashMap hashMap = new HashMap();
        for (String str : strArr) {
            hashMap.put(str, 1);
        }
        HashMap hashMap2 = new HashMap();
        for (String str2 : strArr) {
            ReturnDataBN similarWords = disco.similarWords(str2);
            if (similarWords != null) {
                for (String str3 : similarWords.words) {
                    if (hashMap2.containsKey(str3)) {
                        hashMap2.put(str3, Integer.valueOf(((Integer) hashMap2.get(str3)).intValue() + 1));
                    } else {
                        hashMap2.put(str3, 1);
                    }
                }
            }
        }
        ArrayList arrayList = new ArrayList();
        for (String str4 : hashMap2.keySet()) {
            if (((Integer) hashMap2.get(str4)).intValue() >= strArr.length && !hashMap.containsKey(str4)) {
                arrayList.add(str4);
            }
        }
        String[] strArr2 = new String[strArr.length + arrayList.size()];
        int i = 0;
        while (i < strArr.length) {
            strArr2[i] = strArr[i];
            i++;
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            int i2 = i;
            i++;
            strArr2[i2] = (String) it2.next();
        }
        return strArr2;
    }

    public void clutoClusterSimilarityGraph(DISCO disco, int i, float f, String str) throws CorruptIndexException, IOException, WrongWordspaceTypeException {
        if (disco.getWordspaceType() != DISCO.WordspaceType.SIM) {
            throw new WrongWordspaceTypeException("This method can not be appliedto word spaces of type " + disco.getWordspaceType());
        }
        int numberOfWords = disco.numberOfWords();
        if (i > numberOfWords) {
            System.out.println("Error: there are only " + numberOfWords + " words in the index.");
            return;
        }
        System.out.println("create word-ID mapping for first " + i + " words in index...");
        HashMap hashMap = new HashMap();
        Iterator<String> vocabularyIterator = disco.getVocabularyIterator();
        while (vocabularyIterator.hasNext() && 0 < i) {
            hashMap.put(vocabularyIterator.next(), Integer.valueOf(0 + 1));
            if (0 % 10 == 0) {
                System.out.print("\r0");
            }
        }
        System.out.println("   OK.");
        System.out.flush();
        PrintWriter printWriter = new PrintWriter(str + File.separator + "sparseGraph.dat", CharEncoding.UTF_8);
        Throwable th = null;
        try {
            try {
                PrintWriter printWriter2 = new PrintWriter(str + File.separator + "rowLabels.dat", CharEncoding.UTF_8);
                System.out.println("create similarity graph for first " + i + " words...");
                int i2 = 0;
                int i3 = 0;
                Iterator<String> vocabularyIterator2 = disco.getVocabularyIterator();
                while (vocabularyIterator2.hasNext() && 0 < i) {
                    String next = vocabularyIterator2.next();
                    ReturnDataBN similarWords = disco.similarWords(next);
                    boolean z = true;
                    for (int i4 = 0; i4 < similarWords.words.length && similarWords.values[i4] >= f && hashMap.containsKey(similarWords.words[i4]); i4++) {
                        if (z) {
                            printWriter.write(hashMap.get(similarWords.words[i4]) + ShingleFilter.DEFAULT_TOKEN_SEPARATOR + similarWords.values[i4]);
                            z = false;
                        } else {
                            printWriter.write(ShingleFilter.DEFAULT_TOKEN_SEPARATOR + hashMap.get(similarWords.words[i4]) + ShingleFilter.DEFAULT_TOKEN_SEPARATOR + similarWords.values[i4]);
                        }
                        i3++;
                    }
                    printWriter.write("\n");
                    printWriter2.write(next + "\n");
                    if (z) {
                        i2++;
                    }
                    if (0 % 10 == 0) {
                        System.out.print("\r0");
                    }
                }
                System.out.println("   OK.\nempty rows = " + i2);
                System.out.println("numberOfVertices = " + i);
                System.out.println("numberOfEntries = " + i3);
                if (printWriter != null) {
                    if (0 != 0) {
                        try {
                            printWriter.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        printWriter.close();
                    }
                }
                printWriter2.close();
            } finally {
            }
        } catch (Throwable th3) {
            if (printWriter != null) {
                if (th != null) {
                    try {
                        printWriter.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    printWriter.close();
                }
            }
            throw th3;
        }
    }

    public void clutoClusterVectors(DISCO disco, ArrayList<String> arrayList, String str) throws IOException {
        int i;
        PrintWriter printWriter = new PrintWriter(str + File.separator + "sparseMatrix.dat", CharEncoding.UTF_8);
        Throwable th = null;
        try {
            try {
                PrintWriter printWriter2 = new PrintWriter(str + File.separator + "rowLabels.dat", CharEncoding.UTF_8);
                HashMap hashMap = new HashMap();
                int i2 = 1;
                int i3 = 0;
                int i4 = 0;
                int i5 = 0;
                System.out.println("Creating word vectors for " + arrayList.size() + " words");
                Iterator<String> it2 = arrayList.iterator();
                while (it2.hasNext()) {
                    String next = it2.next();
                    Map<String, Float> wordvector = disco.getWordvector(next);
                    if (wordvector == null) {
                        System.out.println("word " + next + " not found in index -- word ignored");
                    } else {
                        if (wordvector.isEmpty()) {
                            i5++;
                        }
                        boolean z = true;
                        for (String str2 : wordvector.keySet()) {
                            if (hashMap.containsKey(str2)) {
                                i = ((Integer) hashMap.get(str2)).intValue();
                            } else {
                                hashMap.put(str2, Integer.valueOf(i2));
                                i = i2;
                                i2++;
                            }
                            if (z) {
                                printWriter.write(i + ShingleFilter.DEFAULT_TOKEN_SEPARATOR + wordvector.get(str2));
                                z = false;
                            } else {
                                printWriter.write(ShingleFilter.DEFAULT_TOKEN_SEPARATOR + i + ShingleFilter.DEFAULT_TOKEN_SEPARATOR + wordvector.get(str2));
                            }
                            i4++;
                        }
                        printWriter.write("\n");
                        printWriter2.write(next + "\n");
                        i3++;
                        if (i3 % 10 == 0) {
                            System.out.print("\r" + i3);
                        }
                    }
                }
                if (printWriter != null) {
                    if (0 != 0) {
                        try {
                            printWriter.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        printWriter.close();
                    }
                }
                printWriter2.close();
                System.out.println("\nSparse matrix and labels written (emptyRows = " + i5 + DefaultExpressionEngine.DEFAULT_INDEX_END);
                System.out.println("Please verify if the first line of the output file \"sparseMatrix.dat\" contains the following values:");
                System.out.println("NumberOfRows = " + (i3 - 1));
                System.out.println("NumberOfColumns = " + hashMap.size());
                System.out.println("NumberOfNonZeroEntries = " + i4);
            } finally {
            }
        } catch (Throwable th3) {
            if (printWriter != null) {
                if (th != null) {
                    try {
                        printWriter.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    printWriter.close();
                }
            }
            throw th3;
        }
    }
}
