package juicebox.tools.clt.juicer;

import htsjdk.tribble.bed.BEDCodec;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import juicebox.data.ChromosomeHandler;
import juicebox.data.HiCFileTools;
import juicebox.data.anchor.MotifAnchor;
import juicebox.data.anchor.MotifAnchorParser;
import juicebox.data.anchor.MotifAnchorTools;
import juicebox.data.feature.GenomeWideList;
import juicebox.tools.clt.CommandLineParserForJuicer;
import juicebox.tools.clt.JuicerCLT;
import juicebox.track.feature.Feature2DList;
import juicebox.track.feature.Feature2DParser;
import juicebox.track.feature.FeatureFilter;

/* loaded from: input_file:juicebox/tools/clt/juicer/MotifFinder.class */
public class MotifFinder extends JuicerCLT {
    private final List<String> tierOneFiles;
    private final List<String> tierTwoFiles;
    private final List<String> tierThreeFiles;
    private String outputPath;
    private String loopListPath;
    private String genomeID;
    private List<String> proteinsForUniqueMotifPaths;
    private List<String> proteinsForInferredMotifPaths;
    private String globalMotifListPath;
    private GenomeWideList<MotifAnchor> genomeWideAnchorsList;

    public MotifFinder() {
        super("motifs <genomeID> <bed_file_dir> <looplist> [custom_global_motif_list]");
        this.tierOneFiles = new ArrayList();
        this.tierTwoFiles = new ArrayList();
        this.tierThreeFiles = new ArrayList();
        this.genomeWideAnchorsList = new GenomeWideList<>();
        MotifAnchor.uniquenessShouldSupercedeConvergentRule = true;
    }

    private static GenomeWideList<MotifAnchor> getIntersectionOfBEDFiles(ChromosomeHandler chromosomeHandler, List<String> list) {
        GenomeWideList<MotifAnchor> loadFromBEDFile = MotifAnchorParser.loadFromBEDFile(chromosomeHandler, list.get(0));
        for (int i = 1; i < list.size(); i++) {
            MotifAnchorTools.intersectLists(loadFromBEDFile, MotifAnchorParser.loadFromBEDFile(chromosomeHandler, list.get(i)), false);
        }
        return loadFromBEDFile;
    }

    @Override // juicebox.tools.clt.JuicerCLT
    protected void readJuicerArguments(String[] strArr, CommandLineParserForJuicer commandLineParserForJuicer) {
        if (strArr.length != 4 && strArr.length != 5) {
            printUsageAndExit();
        }
        int i = 1 + 1;
        this.genomeID = strArr[1];
        int i2 = i + 1;
        String str = strArr[i];
        int i3 = i2 + 1;
        this.loopListPath = strArr[i2];
        if (strArr.length == 5) {
            int i4 = i3 + 1;
            this.globalMotifListPath = strArr[i3];
        }
        if (this.loopListPath.endsWith(".txt")) {
            this.outputPath = this.loopListPath.substring(0, this.loopListPath.length() - 4) + "_with_motifs.bedpe";
        } else if (this.loopListPath.endsWith(".bedpe")) {
            this.outputPath = this.loopListPath.substring(0, this.loopListPath.length() - 6) + "_with_motifs.bedpe";
        } else {
            this.outputPath = this.loopListPath + "_with_motifs.bedpe";
        }
        try {
            retrieveAllBEDFiles(str);
        } catch (Exception e) {
            System.err.println("Unable to locate BED files");
            System.err.println("All BED files should include the '.bed' extension");
            System.err.println("BED files for locating unique motifs should be located in " + str + "/unique/*.bed");
            System.err.println("BED files for locating inferred motifs should be located in " + str + "/inferred/*.bed");
            System.exit(54);
        }
    }

    @Override // juicebox.tools.clt.JuiceboxCLT
    public void run() {
        ChromosomeHandler loadChromosomes = HiCFileTools.loadChromosomes(this.genomeID);
        Feature2DList loadFeatures = Feature2DParser.loadFeatures(this.loopListPath, loadChromosomes, true, (FeatureFilter) null, true);
        findUniqueMotifs(loadChromosomes, loadFeatures);
        findInferredMotifs(loadChromosomes, loadFeatures);
        loadFeatures.exportFeatureList(new File(this.outputPath), false, Feature2DList.ListFormat.NA);
        System.out.println("Motif Finder complete");
    }

    private void findInferredMotifs(ChromosomeHandler chromosomeHandler, Feature2DList feature2DList) {
        GenomeWideList<MotifAnchor> intersectionOfBEDFiles = getIntersectionOfBEDFiles(chromosomeHandler, this.proteinsForInferredMotifPaths);
        GenomeWideList<MotifAnchor> extractAnchorsFromIntrachromosomalFeatures = MotifAnchorTools.extractAnchorsFromIntrachromosomalFeatures(feature2DList, true, chromosomeHandler);
        GenomeWideList<MotifAnchor> retrieveFreshMotifs = retrieveFreshMotifs();
        MotifAnchorTools.retainProteinsInLocus(intersectionOfBEDFiles, MotifAnchorTools.extractDirectionalAnchors(extractAnchorsFromIntrachromosomalFeatures, true), false, true);
        MotifAnchorTools.retainBestMotifsInLocus(retrieveFreshMotifs, intersectionOfBEDFiles);
        MotifAnchorTools.updateOriginalFeatures(retrieveFreshMotifs, false, 1);
        GenomeWideList<MotifAnchor> intersectionOfBEDFiles2 = getIntersectionOfBEDFiles(chromosomeHandler, this.proteinsForInferredMotifPaths);
        GenomeWideList<MotifAnchor> retrieveFreshMotifs2 = retrieveFreshMotifs();
        MotifAnchorTools.retainProteinsInLocus(intersectionOfBEDFiles2, MotifAnchorTools.extractDirectionalAnchors(extractAnchorsFromIntrachromosomalFeatures, false), false, true);
        MotifAnchorTools.retainBestMotifsInLocus(retrieveFreshMotifs2, intersectionOfBEDFiles2);
        MotifAnchorTools.updateOriginalFeatures(retrieveFreshMotifs2, false, 1);
        MotifAnchorTools.updateOriginalFeatures(retrieveFreshMotifs2, false, -1);
    }

    private void setUpThreeTieredFiltration() {
        for (String str : this.proteinsForUniqueMotifPaths) {
            String lowerCase = str.toLowerCase();
            if (lowerCase.contains("ctcf") || lowerCase.startsWith("1")) {
                this.tierOneFiles.add(str);
            } else if (lowerCase.contains("rad21") || lowerCase.startsWith("2")) {
                this.tierTwoFiles.add(str);
            } else {
                this.tierThreeFiles.add(str);
            }
        }
    }

    private GenomeWideList<MotifAnchor> getThreeTierFilteredProteinTrack(ChromosomeHandler chromosomeHandler, GenomeWideList<MotifAnchor> genomeWideList) {
        if (this.tierOneFiles.size() <= 0) {
            System.err.println("No CTCF files provided");
            System.exit(55);
            return null;
        }
        GenomeWideList<MotifAnchor> intersectionOfBEDFiles = getIntersectionOfBEDFiles(chromosomeHandler, this.tierOneFiles);
        MotifAnchorTools.retainProteinsInLocus(intersectionOfBEDFiles, genomeWideList, true, true);
        if (this.tierTwoFiles.size() > 0) {
            GenomeWideList<MotifAnchor> intersectionOfBEDFiles2 = getIntersectionOfBEDFiles(chromosomeHandler, this.tierTwoFiles);
            if (intersectionOfBEDFiles2.size() > 0) {
                MotifAnchorTools.preservativeIntersectLists(intersectionOfBEDFiles, intersectionOfBEDFiles2, false);
            }
        }
        if (this.tierThreeFiles.size() > 0) {
            GenomeWideList<MotifAnchor> intersectionOfBEDFiles3 = getIntersectionOfBEDFiles(chromosomeHandler, this.tierThreeFiles);
            if (intersectionOfBEDFiles3.size() > 0) {
                MotifAnchorTools.preservativeIntersectLists(intersectionOfBEDFiles, intersectionOfBEDFiles3, false);
            }
        }
        return intersectionOfBEDFiles;
    }

    private void findUniqueMotifs(ChromosomeHandler chromosomeHandler, Feature2DList feature2DList) {
        setUpThreeTieredFiltration();
        GenomeWideList<MotifAnchor> threeTierFilteredProteinTrack = getThreeTierFilteredProteinTrack(chromosomeHandler, MotifAnchorTools.extractAnchorsFromIntrachromosomalFeatures(feature2DList, false, chromosomeHandler));
        GenomeWideList<MotifAnchor> retrieveFreshMotifs = retrieveFreshMotifs();
        MotifAnchorTools.retainBestMotifsInLocus(retrieveFreshMotifs, threeTierFilteredProteinTrack);
        MotifAnchorTools.updateOriginalFeatures(retrieveFreshMotifs, true, 0);
    }

    private GenomeWideList<MotifAnchor> retrieveFreshMotifs() {
        if (this.genomeWideAnchorsList.size() >= 10) {
            return new GenomeWideList<>(this.genomeWideAnchorsList);
        }
        GenomeWideList<MotifAnchor> loadMotifsFromGenomeID = (this.globalMotifListPath == null || this.globalMotifListPath.length() < 1) ? MotifAnchorParser.loadMotifsFromGenomeID(this.genomeID, null) : this.globalMotifListPath.contains("http") ? MotifAnchorParser.loadMotifsFromURL(this.globalMotifListPath, this.genomeID, null) : MotifAnchorParser.loadMotifsFromLocalFile(this.globalMotifListPath, this.genomeID, null);
        this.genomeWideAnchorsList = new GenomeWideList<>(loadMotifsFromGenomeID);
        return loadMotifsFromGenomeID;
    }

    private void retrieveAllBEDFiles(String str) throws IOException {
        if (!new File(str).exists()) {
            throw new IOException("BED files directory not valid");
        }
        String str2 = str + "/unique";
        String str3 = str + "/inferred";
        if (str.endsWith("/")) {
            str2 = str + "unique";
            str3 = str + "inferred";
        }
        this.proteinsForUniqueMotifPaths = retrieveBEDFilesByExtensionInFolder(str2, "Unique");
        this.proteinsForInferredMotifPaths = retrieveBEDFilesByExtensionInFolder(str3, "Inferred");
    }

    private List<String> retrieveBEDFilesByExtensionInFolder(String str, String str2) throws IOException {
        ArrayList arrayList = new ArrayList();
        for (File file : new File(str).listFiles()) {
            if (file.isFile()) {
                String absolutePath = file.getAbsolutePath();
                if (absolutePath.endsWith(BEDCodec.BED_EXTENSION)) {
                    arrayList.add(absolutePath);
                }
            }
        }
        if (arrayList.size() < 1) {
            throw new IOException(str2 + " BED files not found");
        }
        return arrayList;
    }
}
