package de.linguatools.disco;

import ch.qos.logback.classic.net.SyslogAppender;
import ch.qos.logback.core.joran.util.beans.BeanUtil;
import de.linguatools.disco.Compositionality;
import de.linguatools.disco.DISCO;
import de.linguatools.disco.Rank;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Scanner;
import org.apache.lucene.index.CorruptIndexException;

/* loaded from: input_file:de/linguatools/disco/Main.class */
public class Main {
    private static void printUsage() {
        System.out.println("disco V3.0 -- www.linguatools.de/disco");
        System.out.println("Usage: java -jar disco-3.0.jar <indexDir> <option>");
        System.out.println("Options:   NOTE THAT <w>, <w1>, <w2> have to be single tokens!");
        System.out.println("\t\t-f <w>\t\treturn corpus frequency of word <w>");
        System.out.println("\t\t-s <w1> <w2> <simMeasure>\treturn semantic similarity between words <w1> and <w2>");
        System.out.println("\t\t             simMeasure = {COSINE, KOLB}, default is COSINE.");
        System.out.println("\t\t-s2 <w1> <w2>\treturn second order similarity between words <w1> and <w2>");
        System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
        System.out.println("\t\t-cv <w1> <w2>\treturn collocational value between words <w1> and <w2>");
        System.out.println("\t\t\t\tonly works with DISCOLuceneIndex word spaces!");
        System.out.println("\t\t-bn <w> <n>\treturn the <n> most similar words for word <w>");
        System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
        System.out.println("\t\t-bs <w> <s>\treturn all words that are at least <s> similar to word <w>");
        System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
        System.out.println("\t\t-bc <w> <n>\treturn the <n> best collocations for word <w>");
        System.out.println("\t\t-cc <w1> <w2>\treturn the common context for <w1> and <w2>");
        System.out.println("\t\t-n\t\treturn the number of words in the index");
        System.out.println("\t\t-wl <file>\t\twrite word frequency list to file");
        System.out.println("\t\t-cs \"<p1>\" \"<p2>\"\tcompute semantic similarity between multi-word terms or phrases\n\t\t\t<p1> and <p2> using vector composition");
        System.out.println("\t\t-a \"<w1>\" \"<w2>\" \"<w3>\"\tcompute word x that completes the analogy \"<w1> is to <w2> as x is to <w3>\".");
        System.out.println("\t\t-ds <inputFile> <simMeasure>\toutput semantic similarity for all word pairs in input file");
        System.out.println("\t\t-dr <inputFile> <simMeasure>\toutput rank of w2 in similarity list of w1 for all word pairs in input file");
        System.out.println("\t\t-dbn <inputFile> <simMeasure>\toutput all words with similarity >= 0.01 for every word in input file");
        System.out.println("\t\t-dbc <inputFile> <simMeasure>\toutput collocations for every word in input file");
        System.out.println("\t\t-ts \"<text1>\" \"<text2>\" <simMeasure>\tcompute semantic relatedness between <text1> and <text2>");
        System.out.println("\t\t-tsd \"<text>\" \"<hypothesis>\" <simMeasure>\tcompute the directed semantic relatedness between the <text>\n\t\t\tand the <hypothesis>");
        System.out.println("\t\t-fo <w> <n>\tfilter outliers from the similar words of word <w>");
        System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
        System.out.println("\t\t-gs <w1> <w2> ... <wN>\tgrow set of input words");
        System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
        System.out.println("\t\t-rs <w1> <w2> ... <wN>\tfind words for which the input words rank highest (sim)");
        System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
        System.out.println("\t\t-rc <w1> <w2> ... <wN>\tfind words for which the input words rank highest (col)");
        System.out.println("\t\t-cg <n> <minSim> <outputDir>\tcreates sparse graph file (for the first n words) that\n\t\t\t can be clustered with CLUTO's scluster program.");
        System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
        System.out.println("\t\t-cu <wordlist> <outputDir>\tcreates sparse matrix file (with word vector for every\n\t\t\t word in wordlist) that can be clustered with CLUTO's vcluster program.");
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:20:0x0134. Please report as an issue. */
    /* JADX WARN: Removed duplicated region for block: B:27:0x02e1 A[Catch: IOException -> 0x036b, WrongWordspaceTypeException -> 0x038a, TryCatch #2 {WrongWordspaceTypeException -> 0x038a, IOException -> 0x036b, blocks: (B:19:0x0108, B:20:0x0134, B:22:0x0158, B:51:0x016e, B:66:0x017e, B:54:0x01a2, B:55:0x01b2, B:57:0x01bd, B:59:0x01ce, B:62:0x01fe, B:33:0x0206, B:44:0x0216, B:36:0x023a, B:39:0x0256, B:41:0x028c, B:47:0x0294, B:27:0x02e1, B:31:0x032f), top: B:18:0x0108 }] */
    /* JADX WARN: Removed duplicated region for block: B:31:0x032f A[Catch: IOException -> 0x036b, WrongWordspaceTypeException -> 0x038a, TryCatch #2 {WrongWordspaceTypeException -> 0x038a, IOException -> 0x036b, blocks: (B:19:0x0108, B:20:0x0134, B:22:0x0158, B:51:0x016e, B:66:0x017e, B:54:0x01a2, B:55:0x01b2, B:57:0x01bd, B:59:0x01ce, B:62:0x01fe, B:33:0x0206, B:44:0x0216, B:36:0x023a, B:39:0x0256, B:41:0x028c, B:47:0x0294, B:27:0x02e1, B:31:0x032f), top: B:18:0x0108 }] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private static void readFile(java.lang.String r6, java.io.File r7, int r8, de.linguatools.disco.DISCO.SimilarityMeasure r9) throws java.io.IOException, java.io.FileNotFoundException, org.apache.lucene.index.CorruptIndexException, de.linguatools.disco.CorruptConfigFileException {
        /*
            Method dump skipped, instructions count: 925
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.linguatools.disco.Main.readFile(java.lang.String, java.io.File, int, de.linguatools.disco.DISCO$SimilarityMeasure):void");
    }

    public static void main(String[] strArr) throws IOException, WrongWordspaceTypeException, FileNotFoundException, CorruptIndexException, CorruptConfigFileException {
        ArrayList<Rank.WordAndRank> highestRankingCol;
        Compositionality.VectorCompositionMethod vectorCompositionMethod;
        DISCO.SimilarityMeasure similarityMeasure;
        if (strArr.length < 2) {
            printUsage();
            return;
        }
        if (strArr[1].equals("-ds") || strArr[1].equals("-dbn") || strArr[1].equals("-dbc") || strArr[1].equals("-dr")) {
            File file = new File(strArr[2]);
            if (!file.canRead()) {
                System.out.println("Error: can't open file " + strArr[2]);
                printUsage();
                return;
            }
            DISCO.SimilarityMeasure similarityMeasure2 = DISCO.getSimilarityMeasure(strArr[3]);
            if (similarityMeasure2 == null) {
                System.out.println("Error: unknown similarity measure " + strArr[3] + " - use COSINE or KOLB.");
                return;
            }
            String str = strArr[1];
            boolean z = -1;
            switch (str.hashCode()) {
                case 46459:
                    if (str.equals("-dr")) {
                        z = 3;
                        break;
                    }
                    break;
                case 46460:
                    if (str.equals("-ds")) {
                        z = false;
                        break;
                    }
                    break;
                case 1439832:
                    if (str.equals("-dbc")) {
                        z = 2;
                        break;
                    }
                    break;
                case 1439843:
                    if (str.equals("-dbn")) {
                        z = true;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    readFile(strArr[0], file, 1, similarityMeasure2);
                    return;
                case true:
                    readFile(strArr[0], file, 3, similarityMeasure2);
                    return;
                case true:
                    readFile(strArr[0], file, 4, similarityMeasure2);
                    return;
                case true:
                    readFile(strArr[0], file, 5, similarityMeasure2);
                    return;
                default:
                    return;
            }
        }
        try {
            DISCO open = DISCO.open(strArr[0]);
            if (open == null) {
                System.out.println("Error opening DISCO word space file " + strArr[0]);
                return;
            }
            if (strArr[1].equals("-f")) {
                if (strArr[2] == null) {
                    printUsage();
                    return;
                } else {
                    System.out.println(open.frequency(strArr[2]));
                    return;
                }
            }
            if (strArr[1].equals("-s")) {
                if (strArr.length < 5) {
                    printUsage();
                    return;
                }
                if (strArr[2] == null || strArr[3] == null || strArr[4] == null) {
                    printUsage();
                    return;
                }
                DISCO.SimilarityMeasure similarityMeasure3 = DISCOLuceneIndex.getSimilarityMeasure(strArr[4]);
                if (similarityMeasure3 == null) {
                    System.out.println("Error: unknown similarity measure: " + strArr[4]);
                    return;
                }
                float semanticSimilarity = open.semanticSimilarity(strArr[2], strArr[3], DISCO.getVectorSimilarity(similarityMeasure3));
                if (semanticSimilarity == -2.0f) {
                    System.out.println("Error: Word not found in index.");
                    return;
                } else if (semanticSimilarity == -3.0f) {
                    System.out.println("Error: unknown similarity measure: " + strArr[4]);
                    return;
                } else {
                    System.out.println(semanticSimilarity);
                    return;
                }
            }
            if (strArr[1].equals("-s2")) {
                if (strArr.length < 4) {
                    printUsage();
                    return;
                }
                if (strArr[2] == null || strArr[3] == null) {
                    printUsage();
                    return;
                }
                float secondOrderSimilarity = open.secondOrderSimilarity(strArr[2], strArr[3], DISCO.getVectorSimilarity(DISCO.SimilarityMeasure.KOLB));
                if (secondOrderSimilarity == -1.0f) {
                    System.out.println("Error: Word not found in index.");
                    return;
                } else {
                    System.out.println(secondOrderSimilarity);
                    return;
                }
            }
            if (strArr[1].equals("-cv")) {
                if (strArr.length < 4) {
                    printUsage();
                    return;
                }
                if (strArr[2] == null || strArr[3] == null) {
                    printUsage();
                    return;
                } else if (!(open instanceof DISCOLuceneIndex)) {
                    System.out.println("Error: option -cv only allowed with DISCOLuceneIndex!");
                    return;
                } else {
                    System.out.println(((DISCOLuceneIndex) open).collocationalValue(strArr[2], strArr[3]));
                    return;
                }
            }
            if (strArr[1].equals("-bn")) {
                if (strArr.length < 4) {
                    printUsage();
                    return;
                }
                if (strArr[2] == null || strArr[3] == null) {
                    printUsage();
                    return;
                }
                ReturnDataBN similarWords = open.similarWords(strArr[2]);
                if (similarWords == null) {
                    System.out.println("The word \"" + strArr[2] + "\" was not found.");
                    return;
                }
                int parseInt = Integer.parseInt(strArr[3]) - 1;
                for (int i = 0; i < similarWords.words.length; i++) {
                    System.out.println(similarWords.words[i] + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + similarWords.values[i]);
                    if (i >= parseInt) {
                        return;
                    }
                }
                return;
            }
            if (strArr[1].equals("-bs")) {
                if (strArr.length < 4) {
                    printUsage();
                    return;
                }
                if (strArr[2] == null || strArr[3] == null) {
                    printUsage();
                    return;
                }
                ReturnDataBN similarWords2 = open.similarWords(strArr[2]);
                if (similarWords2 == null) {
                    System.out.println("The word \"" + strArr[2] + "\" was not found.");
                    return;
                }
                float parseFloat = Float.parseFloat(strArr[3]);
                for (int i2 = 0; i2 < similarWords2.words.length && similarWords2.values[i2] >= parseFloat; i2++) {
                    System.out.println(similarWords2.words[i2] + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + similarWords2.values[i2]);
                }
                return;
            }
            if (strArr[1].equals("-bc")) {
                if (strArr.length < 4) {
                    printUsage();
                    return;
                }
                if (strArr[2] == null || strArr[3] == null) {
                    printUsage();
                    return;
                }
                ReturnDataCol[] collocations = open.collocations(strArr[2]);
                if (collocations == null) {
                    System.out.println("The word \"" + strArr[2] + "\" was not found.");
                    return;
                }
                int parseInt2 = Integer.parseInt(strArr[3]) - 1;
                for (int i3 = 0; i3 < collocations.length; i3++) {
                    System.out.println(collocations[i3].word + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + collocations[i3].value);
                    if (i3 >= parseInt2) {
                        return;
                    }
                }
                return;
            }
            if (strArr[1].equals("-cc")) {
                if (strArr.length < 4) {
                    printUsage();
                    return;
                }
                if (strArr[2] == null || strArr[3] == null) {
                    printUsage();
                    return;
                }
                HashMap hashMap = new HashMap();
                ReturnDataCol[] collocations2 = open.collocations(strArr[2]);
                if (collocations2 == null) {
                    System.out.println("The word \"" + strArr[2] + "\" was not found.");
                    return;
                }
                for (ReturnDataCol returnDataCol : collocations2) {
                    hashMap.put(returnDataCol.word, Float.valueOf(returnDataCol.value));
                }
                HashMap hashMap2 = new HashMap();
                ReturnDataCol[] collocations3 = open.collocations(strArr[3]);
                if (collocations3 == null) {
                    System.out.println("The word \"" + strArr[3] + "\" was not found.");
                    return;
                }
                for (ReturnDataCol returnDataCol2 : collocations3) {
                    if (hashMap.containsKey(returnDataCol2.word)) {
                        hashMap2.put(returnDataCol2.word, Float.valueOf(returnDataCol2.value));
                    }
                }
                ReturnDataCol[] returnDataColArr = new ReturnDataCol[hashMap2.size()];
                int i4 = 0;
                for (String str2 : hashMap2.keySet()) {
                    int i5 = i4;
                    i4++;
                    returnDataColArr[i5] = new ReturnDataCol(str2, ((Float) hashMap2.get(str2)).floatValue());
                }
                Arrays.sort(returnDataColArr);
                for (ReturnDataCol returnDataCol3 : returnDataColArr) {
                    System.out.println(returnDataCol3.word + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + returnDataCol3.value);
                }
                return;
            }
            if (strArr[1].equals("-n")) {
                System.out.println(open.numberOfWords());
                return;
            }
            if (strArr[1].equals("-wl")) {
                System.out.println(open.wordFrequencyList(strArr[2]) + " of " + open.numberOfWords() + " words were written.");
                return;
            }
            if (strArr[1].equals("-ts")) {
                if (strArr.length < 5) {
                    System.out.println("Error: Too few arguments.");
                    printUsage();
                    return;
                } else {
                    if (strArr.length > 5) {
                        System.out.println("Too many arguments (enclose each of TEXT1 and TEXT2 in double quotes)!");
                        printUsage();
                        return;
                    }
                    DISCO.SimilarityMeasure similarityMeasure4 = DISCOLuceneIndex.getSimilarityMeasure(strArr[4]);
                    if (similarityMeasure4 == null) {
                        System.out.println("Error: unknown similarity measure: " + strArr[4]);
                        return;
                    } else {
                        System.out.println(TextSimilarity.textSimilarity(strArr[2], strArr[3], open, similarityMeasure4));
                        return;
                    }
                }
            }
            if (strArr[1].equals("-tsd")) {
                if (strArr.length < 5) {
                    System.out.println("Error: Too few arguments.");
                    printUsage();
                    return;
                } else {
                    if (strArr.length > 5) {
                        System.out.println("Too many arguments (enclose TEXT1 and TEXT2 in double quotes)!");
                        printUsage();
                        return;
                    }
                    DISCO.SimilarityMeasure similarityMeasure5 = DISCOLuceneIndex.getSimilarityMeasure(strArr[4]);
                    if (similarityMeasure5 == null) {
                        System.out.println("Error: unknown similarity measure: " + strArr[4]);
                        return;
                    } else {
                        System.out.println(TextSimilarity.directedTextSimilarity(strArr[2], strArr[3], open, similarityMeasure5));
                        return;
                    }
                }
            }
            if (strArr[1].equals("-fo")) {
                try {
                    if (strArr.length < 4) {
                        System.out.println("Error: Too few arguments.");
                        printUsage();
                        return;
                    }
                    ReturnDataBN filterOutliers = Cluster.filterOutliers(open, strArr[2], Integer.parseInt(strArr[3]));
                    if (filterOutliers == null) {
                        System.out.println("The word \"" + strArr[2] + "\" was not found in the index.");
                        return;
                    }
                    for (int i6 = 0; i6 < filterOutliers.words.length; i6++) {
                        System.out.println(filterOutliers.words[i6] + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + filterOutliers.values[i6]);
                    }
                    return;
                } catch (WrongWordspaceTypeException e) {
                    System.out.println("Error: Wrong wordspace type: only works with wordspaces of type SIM!");
                    return;
                }
            }
            if (strArr[1].equals("-gs")) {
                try {
                    if (strArr.length < 3) {
                        System.out.println("Error: Too few arguments.");
                        printUsage();
                        return;
                    }
                    String[] strArr2 = new String[strArr.length - 2];
                    int i7 = 0;
                    for (int i8 = 2; i8 < strArr.length; i8++) {
                        int i9 = i7;
                        i7++;
                        strArr2[i9] = strArr[i8];
                    }
                    for (String str3 : Cluster.growSet(open, strArr2)) {
                        System.out.println(str3);
                    }
                    return;
                } catch (WrongWordspaceTypeException e2) {
                    System.out.println("Error: Wrong wordspace type: only works with wordspaces of type SIM!");
                    return;
                }
            }
            if (strArr[1].equals("-rs") || strArr[1].equals("-rc")) {
                if (strArr.length < 3) {
                    System.out.println("Error: Too few arguments.");
                    printUsage();
                    return;
                }
                HashSet hashSet = new HashSet();
                for (int i10 = 2; i10 < strArr.length; i10++) {
                    hashSet.add(strArr[i10]);
                }
                try {
                    Rank rank = new Rank();
                    if (strArr[1].equals("-rs")) {
                        highestRankingCol = rank.highestRankingSim(open, hashSet);
                    } else {
                        if (!(open instanceof DISCOLuceneIndex)) {
                            System.out.println("Error: -rc does not support DenseMatrix wordspaces!");
                            return;
                        }
                        highestRankingCol = rank.highestRankingCol((DISCOLuceneIndex) open, hashSet);
                    }
                    for (int i11 = 0; i11 < highestRankingCol.size(); i11++) {
                        System.out.println(highestRankingCol.get(i11).word + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + highestRankingCol.get(i11).rank);
                        if (i11 >= 100) {
                            break;
                        }
                    }
                    return;
                } catch (WrongWordspaceTypeException e3) {
                    System.out.println("Error: Wrong wordspace type: only works with wordspaces of type SIM!");
                    return;
                }
            }
            if (strArr[1].equals("-cg")) {
                try {
                    new Cluster().clutoClusterSimilarityGraph(open, Integer.parseInt(strArr[2]), Float.parseFloat(strArr[3]), strArr[4]);
                    return;
                } catch (WrongWordspaceTypeException e4) {
                    System.out.println("Error: Wrong wordspace type: only works with wordspaces of type SIM!");
                    return;
                }
            }
            if (!strArr[1].equals("-cu")) {
                if (strArr[1].equals("-cs")) {
                    if (strArr.length < 4) {
                        System.out.println("Error: Too few arguments.");
                        printUsage();
                        return;
                    }
                    if (strArr[4].equalsIgnoreCase(BeanUtil.PREFIX_ADDER)) {
                        vectorCompositionMethod = Compositionality.VectorCompositionMethod.ADDITION;
                    } else if (strArr[4].equalsIgnoreCase("mult")) {
                        vectorCompositionMethod = Compositionality.VectorCompositionMethod.MULTIPLICATION;
                    } else if (strArr[4].equalsIgnoreCase("combi")) {
                        vectorCompositionMethod = Compositionality.VectorCompositionMethod.COMBINED;
                    } else if (strArr[4].equalsIgnoreCase("dilat")) {
                        vectorCompositionMethod = Compositionality.VectorCompositionMethod.DILATION;
                    } else {
                        if (!strArr[4].equalsIgnoreCase("extrem")) {
                            System.out.println("Error: Unknown composition method \"" + strArr[4] + "\".");
                            printUsage();
                            return;
                        }
                        vectorCompositionMethod = Compositionality.VectorCompositionMethod.EXTREMA;
                    }
                    if (strArr[5].equalsIgnoreCase("kolb")) {
                        similarityMeasure = DISCO.SimilarityMeasure.KOLB;
                    } else {
                        if (!strArr[5].equalsIgnoreCase("cos")) {
                            System.out.println("Error: Unknown similarity measure \"" + strArr[5] + "\".");
                            printUsage();
                            return;
                        }
                        similarityMeasure = DISCO.SimilarityMeasure.COSINE;
                    }
                    System.out.println(Compositionality.compositionalSemanticSimilarity(strArr[2], strArr[3], vectorCompositionMethod, similarityMeasure, open, Float.valueOf(Float.parseFloat(strArr[6])), Float.valueOf(Float.parseFloat(strArr[7])), Float.valueOf(Float.parseFloat(strArr[8])), Float.valueOf(Float.parseFloat(strArr[9]))));
                    return;
                }
                if (!strArr[1].equals("-a")) {
                    System.out.println("Error: unknown command line option: " + strArr[1]);
                    printUsage();
                    return;
                }
                if (strArr.length < 5) {
                    System.out.println("Error: Too few arguments.");
                    printUsage();
                    return;
                }
                String str4 = strArr[2];
                String str5 = strArr[3];
                String str6 = strArr[4];
                do {
                    List<ReturnDataCol> solveAnalogy = Compositionality.solveAnalogy(str4, str5, str6, open);
                    if (solveAnalogy == null) {
                        System.out.println("one of the input words was not found in the DISCO index.");
                    } else if (solveAnalogy.isEmpty()) {
                        System.out.println("No x solving the analogy was found.");
                    } else {
                        for (ReturnDataCol returnDataCol4 : solveAnalogy) {
                            System.out.println(returnDataCol4.word + SyslogAppender.DEFAULT_STACKTRACE_PATTERN + returnDataCol4.value);
                        }
                    }
                    System.out.print("\nPlay again? (empty input to quit)\nw1: ");
                    Scanner scanner = new Scanner(System.in);
                    str4 = scanner.nextLine().trim();
                    if (str4.isEmpty()) {
                        return;
                    }
                    System.out.print("w2: ");
                    str5 = scanner.nextLine().trim();
                    System.out.print("w3: ");
                    str6 = scanner.nextLine().trim();
                } while (!str4.equals(""));
                return;
            }
            System.out.print("Reading word list... ");
            ArrayList<String> arrayList = new ArrayList<>();
            BufferedReader bufferedReader = new BufferedReader(new FileReader(strArr[2]));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    System.out.println(arrayList.size() + " words read.");
                    System.out.flush();
                    new Cluster().clutoClusterVectors(open, arrayList, strArr[3]);
                    return;
                }
                arrayList.add(readLine.trim());
            }
        } catch (CorruptConfigFileException | IOException e5) {
            System.out.println(e5);
        }
    }
}
