% Modified version of Yarpiz Team's DBSCAN algorithm. The modifications
% aims to to improve the computational and memory efficiencies.
% Before the DBSCAN it sorts the data according to the "x" and "y"
% coordinates. Then for every data points it searches on the sorted data
% which other data points fall within an epsilon x epsilon sized square
% around the given data point. Later it performs the Eucledian distance
% calculation only on these data points.


function [IDX, isnoise]=DBSCAN_boundaryIndexPrecalculation(X,epsilon,MinPts)

    C=0;
    
    n=size(X,1);
    IDX=zeros(n,1);
    
    epsilon2 = epsilon^2;

    tic()
    % sorting the "x" coordinates:
    [sortedData_x, sortingOrder_x]=sort(X(:,1));
    [~, inverseOrder_x]=sort(sortingOrder_x);
    
    % sorting the "y" coordinates after sorting them in "x"
    % ascending order:
    y_coord_xSorted=X(sortingOrder_x,2);
    [sortedData_y, sortingOrder_y]=sort(y_coord_xSorted);
    % sorting order to get back the "x" sorted "y" coordinates from
    % the "y" sorted "y" coordinates:
    [~, inverseOrder_y]=sort(sortingOrder_y);
    toc()
    fprintf('Subtask: Sorting is finished.\n')
    
    tic()
    xBounds_xSorted = [sortedData_x-epsilon, sortedData_x+epsilon];
    yBounds_ySorted = [sortedData_y-epsilon, sortedData_y+epsilon];

    lowerIndex_starting = 1;
    indexRanges_xSorted = sortedPointillisticData_divide.slideBoundaryIndex(sortedData_x, xBounds_xSorted, lowerIndex_starting);
    indexRanges_ySorted = sortedPointillisticData_divide.slideBoundaryIndex(sortedData_y, yBounds_ySorted, lowerIndex_starting);
    toc()
    fprintf('Subtask: finding the boundaries is finished.\n')
    tic()
    
    visited=false(n,1);
    isnoise=false(n,1);
    
    for i=1:n
    %for i = sortingOrder_x'
        if ~visited(i)
            visited(i)=true;
            
            Neighbors=RegionQuery(i);
            if numel(Neighbors)<MinPts
                % X(i,:) is NOISE
                isnoise(i)=true;
            else
                C=C+1;
                ExpandCluster(i,Neighbors,C);
            end
            
        end
    
    end
    
    toc()
    fprintf('DBSCAN clusterization is finished.\n') 
    
    function ExpandCluster(i,Neighbors,C)
        IDX(i)=C;
        
        k = 1;
        while true
            j = Neighbors(k);
            
            if ~visited(j)
                visited(j)=true;
                Neighbors2=RegionQuery(j);
                if numel(Neighbors2)>=MinPts
                    Neighbors=[Neighbors; Neighbors2];   %#ok
                    % TODO: remove the duplicates
                end
            end
            if IDX(j)==0
                IDX(j)=C;
            end
            
            k = k + 1;
            if k > numel(Neighbors)
                break;
            end
        end
    end


    function Neighbors=RegionQuery(i)

        xSortedIndex = inverseOrder_x(i);
        ySortedIndex = inverseOrder_y(xSortedIndex);

        % lower and upper indices of the "x" sorted coordiantes of that
        % fall between the given "x" bounds:
        indexRanges_xDivision=indexRanges_xSorted(xSortedIndex,:);
        
        % lower and upper indices of the "y" sorted coordiantes of that
        % fall between the given "y" bounds:
        indexRanges_yDivision=indexRanges_ySorted(ySortedIndex,:);

        xSortedIndices_yCut = sortingOrder_y(indexRanges_yDivision(1):indexRanges_yDivision(2));

        xSortedIndices_xyCut = xSortedIndices_yCut(xSortedIndices_yCut>=indexRanges_xDivision(1) & xSortedIndices_yCut<=indexRanges_xDivision(2));
        
        %% Y selection
        % the indices of the data points falling in the
        % affected zone of the currect pixel:
        originalDataIndices_xyCut=sortingOrder_x(xSortedIndices_xyCut);

        % selected region
        X_region=X(originalDataIndices_xyCut,:);

        D2=sum((X_region-X(i,:)).^2, 2);

        % find coordinates closer than epsilon
        Neighbors_inRegion=D2<=epsilon2;

        % appropriate coordinates in the original X dataset
        Neighbors=originalDataIndices_xyCut(Neighbors_inRegion);

    end


end



% Project Code: YPML110
% Project Title: Implementation of DBSCAN Clustering in MATLAB
% Publisher: Yarpiz (www.yarpiz.com)
%
% Developer: S. Mostapha Kalami Heris (Member of Yarpiz Team)
%
% Contact Info: sm.kalami@gmail.com, info@yarpiz.com
%
%
% Copyright (c) 2015, Yarpiz (www.yarpiz.com)
% All rights reserved.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are
% met:
%
%     * Redistributions of source code must retain the above copyright
%       notice, this list of conditions and the following disclaimer.
%
%     * Redistributions in binary form must reproduce the above copyright
%       notice, this list of conditions and the following disclaimer in
%       the documentation and/or other materials provided with the distribution
%
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
% POSSIBILITY OF SUCH DAMAGE.
