package com.rtg.reader;

import com.rtg.launcher.AbstractCli;
import com.rtg.launcher.CommonFlags;
import com.rtg.mode.DNA;
import com.rtg.mode.Protein;
import com.rtg.mode.SequenceType;
import com.rtg.reference.ReferenceGenome;
import com.rtg.reference.ReferenceSequence;
import com.rtg.reference.Sex;
import com.rtg.sam.SamUtils;
import com.rtg.taxonomy.Taxonomy;
import com.rtg.taxonomy.TaxonomyUtils;
import com.rtg.util.MathUtils;
import com.rtg.util.StringUtils;
import com.rtg.util.Utils;
import com.rtg.util.cli.AnonymousFlag;
import com.rtg.util.cli.CFlags;
import com.rtg.util.cli.CommonFlagCategories;
import com.rtg.util.cli.Validator;
import com.rtg.util.diagnostic.ErrorType;
import com.rtg.util.diagnostic.NoTalkbackSlimException;
import com.rtg.util.diagnostic.SlimException;
import com.rtg.vcf.VcfRecord;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.fastq.FastqConstants;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.MissingResourceException;

/* loaded from: input_file:com/rtg/reader/SdfStatistics.class */
public final class SdfStatistics extends AbstractCli {
    private static final String MODULE_NAME = "sdfstats";
    private static final String SDF_FLAG = "SDF";
    private static final String NS_FLAG = "unknowns";
    private static final String POSITIONS_FLAG = "position";
    private static final String QS_FLAG = "quality";
    private static final String SEX_FLAG = "sex";
    private static final String TAXONOMY_FLAG = "taxonomy";
    private static final String NAMES_AND_LENGTHS_FLAG = "lengths";
    private static final String SAM_FLAG = "Xsam-header";
    private static final String BED_FLAG = "Xbed";
    private static final String SPECIFIED = "Xspecified";
    private static final Validator VALIDATOR = new Validator() { // from class: com.rtg.reader.SdfStatistics.1
        @Override // com.rtg.util.cli.Validator
        public boolean isValid(CFlags cFlags) {
            Iterator<?> it = cFlags.getAnonymousValues(0).iterator();
            while (it.hasNext()) {
                File file = (File) it.next();
                if (!CommonFlags.validateSDF(file)) {
                    return false;
                }
                if (cFlags.isSet("sex") && !CommonFlags.validateSexTemplateReference(cFlags, "sex", (String) null, file)) {
                    return false;
                }
                if (cFlags.isSet(SdfStatistics.TAXONOMY_FLAG)) {
                    if (!new File(file, TaxonomyUtils.TAXONOMY_FILE).isFile()) {
                        cFlags.setParseMessage("--taxonomy was specified but " + file + " is missing a '" + TaxonomyUtils.TAXONOMY_FILE + "'");
                        return false;
                    }
                    if (!new File(file, TaxonomyUtils.TAXONOMY_TO_SEQUENCE_FILE).isFile()) {
                        cFlags.setParseMessage("--taxonomy was specified but " + file + " is missing a '" + TaxonomyUtils.TAXONOMY_TO_SEQUENCE_FILE + "'");
                        return false;
                    }
                }
            }
            return true;
        }
    };

    @Override // com.rtg.launcher.AbstractCli
    public String moduleName() {
        return MODULE_NAME;
    }

    @Override // com.rtg.launcher.AbstractCli
    public String description() {
        return "print statistics about an SDF";
    }

    @Override // com.rtg.launcher.AbstractCli
    protected void initFlags() {
        initFlags(this.mFlags);
    }

    public void initFlags(CFlags cFlags) {
        cFlags.setDescription("Print statistics that describe a directory of SDF formatted data.");
        CommonFlagCategories.setCategories(cFlags);
        try {
            AnonymousFlag registerRequired = cFlags.registerRequired(File.class, "SDF", "SDF directories");
            registerRequired.setMinCount(1);
            registerRequired.setMaxCount(Integer.MAX_VALUE);
            registerRequired.setCategory(CommonFlagCategories.INPUT_OUTPUT);
            cFlags.registerOptional('n', NS_FLAG, "display info about unknown bases (Ns)").setCategory(CommonFlagCategories.REPORTING);
            cFlags.registerOptional('p', POSITIONS_FLAG, "only display info about unknown bases (Ns) by read position").setCategory(CommonFlagCategories.REPORTING);
            cFlags.registerOptional('q', QS_FLAG, "display mean of quality").setCategory(CommonFlagCategories.REPORTING);
            cFlags.registerOptional("sex", Sex.class, "sex", "display reference sequence list for the given sex, if defined").setCategory(CommonFlagCategories.REPORTING).setMaxCount(Integer.MAX_VALUE).enableCsv();
            cFlags.registerOptional(TAXONOMY_FLAG, "display information about taxonomy").setCategory(CommonFlagCategories.REPORTING);
            cFlags.registerOptional(NAMES_AND_LENGTHS_FLAG, "print out the name and length of each sequence. (Not recommended for read sets)").setCategory(CommonFlagCategories.REPORTING);
            cFlags.registerOptional(SAM_FLAG, "print out a SAM format header corresponding to this SDF").setCategory(CommonFlagCategories.REPORTING);
            cFlags.registerOptional(BED_FLAG, "print out BED format regions corresponding to sequences in this SDF").setCategory(CommonFlagCategories.REPORTING);
            cFlags.registerOptional(SPECIFIED, "BED/SAM output should only include sequences specified in the reference configuration").setCategory(CommonFlagCategories.REPORTING);
            cFlags.setValidator(VALIDATOR);
        } catch (MissingResourceException e) {
            throw new SlimException(e);
        }
    }

    public static void performStatistics(AnnotatedSequencesReader annotatedSequencesReader, File file, PrintStream printStream, boolean z, boolean z2, boolean z3) throws IOException {
        printStream.append("Location           : ");
        printStream.append((CharSequence) file.getAbsolutePath());
        printStream.append((CharSequence) StringUtils.LS);
        if (annotatedSequencesReader.commandLine() != null) {
            printStream.append("Parameters         : ");
            printStream.append((CharSequence) annotatedSequencesReader.commandLine());
            printStream.append((CharSequence) StringUtils.LS);
        }
        if (annotatedSequencesReader.comment() != null) {
            printStream.append("Comment            : ");
            printStream.append((CharSequence) annotatedSequencesReader.comment());
            printStream.append((CharSequence) StringUtils.LS);
        }
        if (annotatedSequencesReader.samReadGroup() != null) {
            printStream.append("SAM read group     : ");
            printStream.append((CharSequence) annotatedSequencesReader.samReadGroup());
            printStream.append((CharSequence) StringUtils.LS);
        }
        printStream.append("SDF Version        : ");
        printStream.append((CharSequence) Long.toString(annotatedSequencesReader.sdfVersion()));
        printStream.append((CharSequence) StringUtils.LS);
        if (z2) {
            printPositionBlock(annotatedSequencesReader, printStream);
        } else {
            printStream.append("Type               : ");
            printStream.append((CharSequence) annotatedSequencesReader.type().toString());
            printStream.append((CharSequence) StringUtils.LS);
            printStream.append("Source             : ");
            printStream.append((CharSequence) annotatedSequencesReader.getPrereadType().toString());
            printStream.append((CharSequence) StringUtils.LS);
            printStream.append("Paired arm         : ");
            printStream.append((CharSequence) annotatedSequencesReader.getArm().toString());
            printStream.append((CharSequence) StringUtils.LS);
            SdfId sdfId = annotatedSequencesReader.getSdfId();
            if (sdfId.available()) {
                printStream.append("SDF-ID             : ");
                printStream.append((CharSequence) sdfId.toString());
                printStream.append((CharSequence) StringUtils.LS);
            }
            printStream.append("Number of sequences: ");
            printStream.append((CharSequence) Long.toString(annotatedSequencesReader.numberSequences()));
            printStream.append((CharSequence) StringUtils.LS);
            long maxLength = annotatedSequencesReader.maxLength();
            long minLength = annotatedSequencesReader.minLength();
            if (maxLength >= minLength) {
                printStream.append("Maximum length     : ");
                printStream.append((CharSequence) Long.toString(maxLength));
                printStream.append((CharSequence) StringUtils.LS);
                printStream.append("Minimum length     : ");
                printStream.append((CharSequence) Long.toString(minLength));
                printStream.append((CharSequence) StringUtils.LS);
            }
            printStream.append("Sequence names     : ");
            printStream.append((CharSequence) (annotatedSequencesReader.hasNames() ? "yes" : "no"));
            printStream.append((CharSequence) StringUtils.LS);
            printStream.append("Sex metadata       : ");
            printStream.append((CharSequence) (ReferenceGenome.hasReferenceFile(annotatedSequencesReader) ? "yes" : "no"));
            printStream.append((CharSequence) StringUtils.LS);
            printStream.append("Taxonomy metadata  : ");
            printStream.append((CharSequence) (TaxonomyUtils.hasTaxonomyInfo(annotatedSequencesReader) ? "yes" : "no"));
            printStream.append((CharSequence) StringUtils.LS);
            long[] residueCounts = annotatedSequencesReader.residueCounts();
            long j = 0;
            for (int i = 0; i < residueCounts.length; i++) {
                printStream.append((CharSequence) (annotatedSequencesReader.type() == SequenceType.DNA ? DNA.values()[i].toString() : Protein.values()[i].toString()));
                printStream.append("                  : ");
                printStream.append((CharSequence) Long.toString(residueCounts[i]));
                printStream.append((CharSequence) StringUtils.LS);
                j += residueCounts[i];
            }
            printStream.append("Total residues     : ");
            printStream.append((CharSequence) Long.toString(j));
            printStream.append((CharSequence) StringUtils.LS);
            if (z3) {
                printQualityHistogram(annotatedSequencesReader, printStream);
            }
            printStream.append("Residue qualities  : ");
            printStream.append((CharSequence) ((annotatedSequencesReader.hasQualityData() && annotatedSequencesReader.hasHistogram()) ? "yes" : "no"));
            printStream.append((CharSequence) StringUtils.LS);
            if (z) {
                printNBlocks(annotatedSequencesReader, printStream);
            }
            printStream.append((CharSequence) StringUtils.LS);
        }
        printReadMe(annotatedSequencesReader, printStream);
    }

    static void printSAMHeader(SequencesReader sequencesReader, Appendable appendable, boolean z) throws IOException {
        ReferenceGenome referenceGenome = new ReferenceGenome(sequencesReader, ReferenceGenome.SEX_ALL, ReferenceGenome.ReferencePloidy.AUTO);
        SAMFileHeader sAMFileHeader = new SAMFileHeader();
        sAMFileHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        SamUtils.addProgramRecord(sAMFileHeader);
        int[] sequenceLengths = sequencesReader.sequenceLengths(0L, sequencesReader.numberSequences());
        for (int i = 0; i < sequenceLengths.length; i++) {
            String name = sequencesReader.hasNames() ? sequencesReader.name(i) : "sequence_" + i;
            ReferenceSequence sequence = z ? referenceGenome.sequence(name) : null;
            if (sequence == null || sequence.isSpecified()) {
                sAMFileHeader.addSequence(new SAMSequenceRecord(name, sequenceLengths[i]));
            }
        }
        if (sequencesReader.getSdfId().available()) {
            sAMFileHeader.addComment(SamUtils.TEMPLATE_SDF_ATTRIBUTE + sequencesReader.getSdfId());
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        new SAMFileWriterFactory().makeSAMWriter(sAMFileHeader, true, (OutputStream) byteArrayOutputStream).close();
        appendable.append(byteArrayOutputStream.toString());
    }

    static void printBed(SequencesReader sequencesReader, PrintStream printStream, boolean z) throws IOException {
        ReferenceGenome referenceGenome = new ReferenceGenome(sequencesReader, ReferenceGenome.SEX_ALL, ReferenceGenome.ReferencePloidy.AUTO);
        long j = 0;
        while (true) {
            long j2 = j;
            if (j2 >= sequencesReader.numberSequences()) {
                return;
            }
            String name = sequencesReader.hasNames() ? sequencesReader.name(j2) : "sequence_" + j2;
            ReferenceSequence sequence = z ? referenceGenome.sequence(name) : null;
            if (sequence == null || sequence.isSpecified()) {
                printStream.println(name + "\t0\t" + sequencesReader.length(j2));
            }
            j = j2 + 1;
        }
    }

    static void printReferenceSequences(SequencesReader sequencesReader, Sex sex, Appendable appendable) throws IOException {
        ReferenceGenome referenceGenome = new ReferenceGenome(sequencesReader, sex);
        appendable.append("Sequences for sex=").append(String.valueOf(sex)).append(VcfRecord.FORMAT_AND_SAMPLE_SEPARATOR).append(StringUtils.LS);
        appendable.append(referenceGenome.toString()).append(StringUtils.LS);
        appendable.append(StringUtils.LS);
        appendable.append(StringUtils.LS);
    }

    private static void printPositionBlock(SequencesReader sequencesReader, Appendable appendable) throws IOException {
        if (!sequencesReader.hasHistogram()) {
            appendable.append("Histogram of N position frequencies is not available for this SDF").append(StringUtils.LS);
            return;
        }
        long[] posHistogram = sequencesReader.posHistogram();
        appendable.append("Histogram of N position frequencies");
        appendable.append(StringUtils.LS);
        appendable.append(printHistogram(posHistogram, true));
    }

    private static void printQualityHistogram(SequencesReader sequencesReader, Appendable appendable) throws IOException {
        if (!sequencesReader.hasQualityData() || !sequencesReader.hasHistogram()) {
            appendable.append("Quality statistics are not available for this SDF").append(StringUtils.LS);
            return;
        }
        appendable.append("Average quality    : ").append(Utils.realFormat(MathUtils.phred(sequencesReader.globalQualityAverage()), 1));
        appendable.append(StringUtils.LS);
        appendable.append("Average qual / pos : ");
        appendable.append(StringUtils.LS);
        appendable.append(printQualityHistogram(sequencesReader.positionQualityAverage()));
    }

    private static String printQualityHistogram(double[] dArr) {
        StringBuilder sb = new StringBuilder();
        int i = -1;
        int length = dArr.length - 1;
        while (true) {
            if (length < 0) {
                break;
            }
            if (dArr[length] != 0.0d) {
                i = length;
                break;
            }
            length--;
        }
        for (int i2 = 0; i2 <= i; i2++) {
            String l = ((long) i2) == 1000 ? Long.toString(i2 + 1) + FastqConstants.QUALITY_HEADER : Long.toString(i2 + 1);
            for (int i3 = 0; i3 < 18 - l.length(); i3++) {
                sb.append(" ");
            }
            sb.append(l);
            sb.append(" : ");
            sb.append(Utils.realFormat(MathUtils.phred(dArr[i2]), 1));
            sb.append(StringUtils.LS);
        }
        return sb.toString();
    }

    private static void printNBlocks(SequencesReader sequencesReader, Appendable appendable) throws IOException {
        long nBlockCount = sequencesReader.nBlockCount();
        long longestNBlock = sequencesReader.longestNBlock();
        if (!sequencesReader.hasHistogram()) {
            appendable.append("N counts are not available on this SDF").append(StringUtils.LS);
            return;
        }
        appendable.append("Blocks of Ns       : ");
        appendable.append(Long.toString(nBlockCount));
        appendable.append(StringUtils.LS);
        appendable.append("Longest block of Ns: ");
        appendable.append(Long.toString(longestNBlock));
        appendable.append(StringUtils.LS);
        appendable.append("Histogram of N frequencies");
        appendable.append(StringUtils.LS);
        appendable.append(printHistogram(sequencesReader.histogram(), false));
    }

    static void printSequenceNameAndLength(SequencesReader sequencesReader, PrintStream printStream, boolean z) throws IOException {
        ReferenceGenome referenceGenome = new ReferenceGenome(sequencesReader, ReferenceGenome.SEX_ALL, ReferenceGenome.ReferencePloidy.AUTO);
        printStream.println("Sequence lengths: ");
        long j = 0;
        while (true) {
            long j2 = j;
            if (j2 >= sequencesReader.numberSequences()) {
                return;
            }
            String name = sequencesReader.hasNames() ? sequencesReader.name(j2) : "sequence_" + j2;
            ReferenceSequence sequence = z ? referenceGenome.sequence(name) : null;
            if (sequence == null || sequence.isSpecified()) {
                printStream.println(name + "\t" + sequencesReader.length(j2));
            }
            j = j2 + 1;
        }
    }

    static void printReadMe(SequencesReader sequencesReader, PrintStream printStream) throws IOException {
        String readMe = sequencesReader.getReadMe();
        if (readMe != null) {
            printStream.println("Additional Info:");
            printStream.println(readMe);
            printStream.println();
        }
    }

    static void printTaxonomyStatistics(SequencesReader sequencesReader, PrintStream printStream) throws IOException {
        Taxonomy loadTaxonomy = TaxonomyUtils.loadTaxonomy(sequencesReader);
        printStream.append("Taxonomy nodes     : ");
        printStream.append((CharSequence) Integer.toString(loadTaxonomy.size()));
        printStream.append((CharSequence) StringUtils.LS);
        HashSet hashSet = new HashSet(TaxonomyUtils.loadTaxonomyMapping(sequencesReader).values());
        printStream.append("Sequence nodes     : ");
        printStream.append((CharSequence) Integer.toString(hashSet.size()));
        printStream.append((CharSequence) StringUtils.LS);
        printStream.append("Other nodes        : ");
        printStream.append((CharSequence) Integer.toString(loadTaxonomy.size() - hashSet.size()));
        printStream.append((CharSequence) StringUtils.LS);
    }

    private static String printHistogram(long[] jArr, boolean z) {
        StringBuilder sb = new StringBuilder();
        int i = -1;
        int length = jArr.length - 1;
        while (true) {
            if (length < 0) {
                break;
            }
            if (jArr[length] != 0) {
                i = length;
                break;
            }
            length--;
        }
        long j = z ? 1L : 0L;
        for (int i2 = 0; i2 <= i; i2++) {
            String l = ((long) i2) == 1000 ? Long.toString(i2 + j) + FastqConstants.QUALITY_HEADER : Long.toString(i2 + j);
            for (int i3 = 0; i3 < 18 - l.length(); i3++) {
                sb.append(" ");
            }
            sb.append(l);
            sb.append(" : ");
            sb.append(Long.toString(jArr[i2]));
            sb.append(StringUtils.LS);
        }
        return sb.toString();
    }

    public static void main(String[] strArr) {
        new SdfStatistics().mainExit(strArr);
    }

    @Override // com.rtg.launcher.AbstractCli
    protected int mainExec(OutputStream outputStream, PrintStream printStream) throws IOException {
        List<?> anonymousValues = this.mFlags.getAnonymousValues(0);
        LinkedList<File> linkedList = new LinkedList();
        Iterator<?> it = anonymousValues.iterator();
        while (it.hasNext()) {
            File file = (File) it.next();
            if (ReaderUtils.isPairedEndDirectory(file)) {
                linkedList.add(ReaderUtils.getLeftEnd(file));
                linkedList.add(ReaderUtils.getRightEnd(file));
            } else {
                linkedList.add(file);
            }
        }
        if (linkedList.size() == 0) {
            throw new NoTalkbackSlimException(ErrorType.NO_VALID_INPUTS, new String[0]);
        }
        PrintStream printStream2 = new PrintStream(outputStream);
        try {
            for (File file2 : linkedList) {
                AnnotatedSequencesReader createDefaultSequencesReader = SequencesReaderFactory.createDefaultSequencesReader(file2);
                Throwable th = null;
                try {
                    try {
                        if (this.mFlags.isSet(BED_FLAG)) {
                            printBed(createDefaultSequencesReader, printStream2, this.mFlags.isSet(SPECIFIED));
                        } else if (this.mFlags.isSet(SAM_FLAG)) {
                            printSAMHeader(createDefaultSequencesReader, printStream2, this.mFlags.isSet(SPECIFIED));
                        } else {
                            performStatistics(createDefaultSequencesReader, file2, printStream2, this.mFlags.isSet(NS_FLAG), this.mFlags.isSet(POSITIONS_FLAG), this.mFlags.isSet(QS_FLAG));
                            if (this.mFlags.isSet(NAMES_AND_LENGTHS_FLAG)) {
                                printSequenceNameAndLength(createDefaultSequencesReader, printStream2, this.mFlags.isSet(SPECIFIED));
                            }
                            if (this.mFlags.isSet("sex")) {
                                Iterator<?> it2 = this.mFlags.getValues("sex").iterator();
                                while (it2.hasNext()) {
                                    printReferenceSequences(createDefaultSequencesReader, (Sex) it2.next(), printStream2);
                                }
                            }
                            if (this.mFlags.isSet(TAXONOMY_FLAG)) {
                                if (!TaxonomyUtils.hasTaxonomyInfo(createDefaultSequencesReader)) {
                                    throw new NoTalkbackSlimException("The supplied SDF does not contain taxonomy information");
                                }
                                printTaxonomyStatistics(createDefaultSequencesReader, printStream2);
                            }
                        }
                        if (createDefaultSequencesReader != null) {
                            if (0 != 0) {
                                try {
                                    createDefaultSequencesReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                createDefaultSequencesReader.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            }
            return 0;
        } finally {
            printStream2.flush();
        }
    }
}
