/**
 * [SCstream.java] for Subspace MOA
 * 
 * SCStream: Main class
 * 
 * @author Stephan Wels
 * Data Management and Data Exploration Group, RWTH Aachen University
 */

package moa.clusterers.scstream;

import java.util.ArrayList;
import java.util.List;
import java.util.Vector;

import moa.cluster.Cluster;
import moa.cluster.Clustering;
import moa.cluster.SubspaceClustering;
import moa.clusterers.AbstractSubspaceClusterer;
import moa.core.Measurement;
import moa.options.FloatOption;
import moa.options.IntOption;
import weka.core.DenseInstance;
import weka.core.Instance;

/**
 * Adapted denstream online clustering part. Additional parameters added which are used for the offline algorithm.
 * Used Offline Algorithm:Predecon / incPredecon
 */

/**
 * DenStream: Density-based clustering over an evolving data stream with noise
 * (2006)
 * 
 * This implementation only creates a microclustering. For now all
 * microclustering algorithms will be clustered in RunVisualizer with the
 * general kMeans-microclustering implementation to make all clustering
 * approaches comparable.
 * 
 * TODO: see KMeans.java: need of MacroCluster interface TODO: implementation of
 * DBScan as general macro clustering approach
 * 
 */
public class SCstream extends AbstractSubspaceClusterer {

	

//	public IntOption horizonOption = new IntOption("horizon", 'h',
//			"Range of the window.", 1000);
//	public FloatOption epsilonOption = new FloatOption("epsilon", 'e',
//			"Defines the epsilon neighbourhood", 0.01, 0, 50);
//	public IntOption minPointsOption = new IntOption("minPoints", 'p',
//			"Minimal number of points cluster has to contain.", 2);
//
//	public FloatOption betaOption = new FloatOption("beta", 'b', "", 0.001, 0,
//			10);
//	public FloatOption muOption = new FloatOption("mu", 'm', "", 2, 0,
//			Double.MAX_VALUE);
//
//	public FloatOption lambdaOption = new FloatOption("lambda", 'l', "", 0.25,
//			0, 1);
//
//	public IntOption initPointsOption = new IntOption("initPoints", 'i',
//			"Number of points to use for initialization.", 1000);
//
//	public IntOption tauOption = new IntOption("tau", 't',
//			"Number of maximal subspace dimensionality", 2);
//
//	public IntOption kappaOption = new IntOption("kappa", 'k',
//			"Parameter to define preference weighted vector", 1);
//
//	public FloatOption deltaOption = new FloatOption("delta", 'd',
//			"defines the threshold for the variance.", 1, 0.001,
//			Double.MAX_VALUE);
//
//	public FloatOption offlineOption = new FloatOption("offline", 'o',
//			"offline multiplier for epsilion.", 2, 2, 20);

	private static final long serialVersionUID = 1L;
	
	public IntOption horizonOption = new IntOption("horizon", 'h',
			"Range of the window.", 1);
	public FloatOption epsilonOption = new FloatOption("epsilon", 'e',
			"Defines the epsilon neighbourhood", 16, 0, 50);
	public IntOption minPointsOption = new IntOption("minPoints", 'p',
			"Minimal number of points cluster has to contain.", 10);

	public FloatOption betaOption = new FloatOption("beta", 'b', "", 0.2, 0, 10);
	public FloatOption muOption = new FloatOption("mu", 'm', "", 10, 0,
			Double.MAX_VALUE);

	public FloatOption lambdaOption = new FloatOption("lambda", 'l', "", 0.25,
			0, 1);

	public IntOption initPointsOption = new IntOption("initPoints", 'i',
			"Number of points to use for initialization.", 2000);

	public IntOption tauOption = new IntOption("tau", 't',
			"Number of maximal subspace dimensionality", 30);

	public IntOption kappaOption = new IntOption("kappa", 'k',
			"Parameter to define preference weighted vector", 20);

	public FloatOption deltaOption = new FloatOption("delta", 'd',
			"defines the threshold for the variance.", 0.001, 0.001, Double.MAX_VALUE);

	public FloatOption offlineOption = new FloatOption("offline", 'o',
			"offline multiplier for epsilion.", 2, 2, 20);

	private double weightThreshold = 0.01;
	double lambda;
	double epsilon;
	int minPoints;
	double mu;
	double beta;
	int tau;
	int kappa;
	double delta;

	Clustering p_micro_cluster;
	Clustering o_micro_cluster;
	Clustering new_pMicroClusters;
	Clustering removed_pMicroClusters;

	ArrayList<DenPoint> initBuffer;

	boolean initialized;
	private long timestamp = 0;
	Timestamp currentTimestamp;
	long tp;

	private class DenPoint extends DenseInstance {
		protected boolean covered;

		public DenPoint(Instance nextInstance, Long timestamp) {
			super(nextInstance);
			this.setDataset(nextInstance.dataset());
		}
	}

	@Override
	public void resetLearningImpl() {
		// init DenStream
		currentTimestamp = new Timestamp();
		lambda = lambdaOption.getValue();
		// -Math.log(weightThreshold) / Math.log(2)
		// / (double) horizonOption.getValue();
		epsilon = epsilonOption.getValue();
		minPoints = (int) muOption.getValue();// minPointsOption.getValue();
		mu = (int) muOption.getValue();
		beta = betaOption.getValue();
		tau = tauOption.getValue();
		kappa = kappaOption.getValue();
		delta = deltaOption.getValue();

		initialized = false;
		p_micro_cluster = new MicroClusterList();
		o_micro_cluster = new MicroClusterList();

		new_pMicroClusters = new Clustering();
		removed_pMicroClusters = new Clustering();

		initBuffer = new ArrayList<DenPoint>();
		tp = Math.round(1 / lambda * Math.log((beta * mu) / (beta * mu - 1))) + 1;

	}

	public void initialDBScan() {
		for (int p = 0; p < initBuffer.size(); p++) {
			DenPoint point = initBuffer.get(p);
			if (!point.covered) {
				point.covered = true;
				ArrayList<Integer> neighbourhood = getNeighbourhoodIDs(point,
						initBuffer, epsilon);
				if (neighbourhood.size() > minPoints) {
					MicroCluster mc = new MicroCluster(point,
							point.numAttributes(), timestamp, lambda,
							currentTimestamp);
					expandCluster(mc, initBuffer, neighbourhood);
					p_micro_cluster.add(mc);
				} else {
					point.covered = false;
				}
			}
		}
	}

	@Override
	public void trainOnInstanceImpl(Instance inst) {
		timestamp++;
		currentTimestamp.setTimestamp(timestamp);
		DenPoint point = new DenPoint(inst, timestamp);
		// ////////////////
		// Initialization//
		// ////////////////
		if (!initialized) {
			initBuffer.add(point);
			if (initBuffer.size() >= initPointsOption.getValue()) {
				initialDBScan();
				initialized = true;
			}
		} else {
			// ////////////
			// Merging(p)//
			// ////////////
			boolean merged = false;
			if (p_micro_cluster.getClustering().size() != 0) {
				Clustering clustering = new Clustering();
				for (Cluster c : p_micro_cluster.getClustering()) {
					clustering.add(c);
				}
				for (Cluster c : new_pMicroClusters.getClustering()) {
					clustering.add(c);
				}
				MicroCluster x = nearestCluster(point, clustering);
				MicroCluster xCopy = x.copy();
				xCopy.insert(point, timestamp);
				if (xCopy.getRadius(timestamp) <= epsilon) {
					x.insert(point, timestamp);
					merged = true;
				}
				// MicroCluster x = nearestCluster(point, p_micro_cluster);
				// MicroCluster xCopy = x.copy();
				// xCopy.insert(point, timestamp);
				// if (xCopy.getRadius(timestamp) <= epsilon) {
				// x.insert(point, timestamp);
				// merged = true;
				// }
			}
			// durchsuche auch die liste der neu entstandenen Microcluster.
			// if (new_pMicroClusters.getClustering().size() != 0) {
			// MicroCluster x = nearestCluster(point, new_pMicroClusters);
			// MicroCluster xCopy = x.copy();
			// xCopy.insert(point, timestamp);
			// if (xCopy.getRadius(timestamp) <= epsilon) {
			// x.insert(point, timestamp);
			// merged = true;
			// }
			// }

			// hier entstehen aus outlierMicroCluster, Potential MicroCluster
			if (!merged && (o_micro_cluster.getClustering().size() != 0)) {
				MicroCluster x = nearestCluster(point, o_micro_cluster);
				MicroCluster xCopy = x.copy();
				xCopy.insert(point, timestamp);

				if (xCopy.getRadius(timestamp) <= epsilon) {
					x.insert(point, timestamp);
					merged = true;
					if (x.getWeight() > beta * mu) {
						o_micro_cluster.getClustering().remove(x);
						// Buffere neu entstandene MicroClusters um sie später
						// ins finale Clustering einzufügen. Würde man sie
						// in p_microcluster hinzufügen könnte man sie
						// schwierig
						// von den schon geclusterten Microcluster unterscheiden
						new_pMicroClusters.getClustering().add(x);
						// p_micro_cluster.getClustering().add(x);
					}
				}
			}
			if (!merged) { // outlier micro cluster uninteressant für finale
							// clustering
				o_micro_cluster.getClustering().add(
						new MicroCluster(point.toDoubleArray(), point
								.toDoubleArray().length, timestamp, lambda,
								currentTimestamp));
			}

			// //////////////////////////
			// Periodic cluster removal//
			// //////////////////////////
			if (timestamp % tp == 0) {
				// ArrayList<MicroCluster> removalList = new
				// ArrayList<MicroCluster>();
				// for (Cluster c : p_micro_cluster.getClustering()) {
				// if (((MicroCluster) c).getWeight() < beta * mu) {
				// removalList.add((MicroCluster) c);
				// }
				// }
				// for (Cluster c : new_pMicroClusters.getClustering()) {
				// if (((MicroCluster) c).getWeight() < beta * mu) {
				// removalList.add((MicroCluster) c);
				// }
				// }
				// for (Cluster c : removalList) {
				// p_micro_cluster.getClustering().remove(c);
				// // Buffere die gelöschten micro cluster um sie später aus
				// // dem finalen clustering zu entfernen
				// removed_pMicroClusters.add(c);
				// }

				List<MicroCluster> removalList = new Vector<MicroCluster>();
				for (Cluster c : p_micro_cluster.getClustering()) {
					if (((MicroCluster) c).getWeight() < beta * mu) {
						removalList.add((MicroCluster) c);
					}
				}
				for (Cluster c : new_pMicroClusters.getClustering()) {
					if (((MicroCluster) c).getWeight() < beta * mu) {
						removalList.add((MicroCluster) c);
					}
				}
				for (Cluster c : removalList) {
					boolean t1 = p_micro_cluster.getClustering().remove(c);
					boolean t2 = new_pMicroClusters.getClustering().remove(c);
					// Buffere die gelöschten micro cluster um sie später aus
					// dem finalen clustering zu entfernen
					if (t1)
						removed_pMicroClusters.add(c);
				}

				// uninteressant fürs finale clustering
				for (Cluster c : o_micro_cluster.getClustering()) {
					long t0 = ((MicroCluster) c).getCreationTime();
					double xsi1 = Math
							.pow(2, (-lambda * (timestamp - t0 + tp))) - 1;
					double xsi2 = Math.pow(2, -lambda * tp) - 1;
					double xsi = xsi1 / xsi2;
					if (((MicroCluster) c).getWeight() < xsi) {
						removalList.add((MicroCluster) c);
					}
				}
				for (Cluster c : removalList) {
					o_micro_cluster.getClustering().remove(c);
				}

				READY = true;
			}

		}
	}

	private void expandCluster(MicroCluster mc, ArrayList<DenPoint> points,
			ArrayList<Integer> neighbourhood) {
		for (int p : neighbourhood) {
			DenPoint npoint = points.get(p);
			if (!npoint.covered) {
				npoint.covered = true;
				mc.insert(npoint, timestamp);
				ArrayList<Integer> neighbourhood2 = getNeighbourhoodIDs(npoint,
						initBuffer, epsilon);
				if (neighbourhood.size() > minPoints) {
					expandCluster(mc, points, neighbourhood2);
				}
			}
		}
	}

	private ArrayList<Integer> getNeighbourhoodIDs(DenPoint point,
			ArrayList<DenPoint> points, double eps) {
		ArrayList<Integer> neighbourIDs = new ArrayList<Integer>();
		for (int p = 0; p < points.size(); p++) {
			DenPoint npoint = points.get(p);
			if (!npoint.covered) {
				double dist = distance(point.toDoubleArray(), points.get(p)
						.toDoubleArray());
				if (dist < eps) {
					neighbourIDs.add(p);
				}
			}
		}
		return neighbourIDs;
	}

	private MicroCluster nearestCluster(DenPoint p, Clustering cl) {
		MicroCluster min = null;
		double minDist = 0;
		for (int c = 0; c < cl.size(); c++) {
			MicroCluster x = (MicroCluster) cl.get(c);
			if (min == null) {
				min = x;
			}
			double dist = distance(p.toDoubleArray(), x.getCenter());
			dist -= x.getRadius(timestamp);
			if (dist < minDist) {
				minDist = dist;
				min = x;
			}
		}
		return min;

	}

	private double distance(double[] pointA, double[] pointB) {
		double distance = 0.0;
		for (int i = 0; i < pointA.length; i++) {
			double d = pointA[i] - pointB[i];
			distance += d * d;
		}
		return Math.sqrt(distance);
	}

	IncPreDeCon predecon;
	boolean init;
	Clustering clustering;
	boolean READY;

	public SubspaceClustering getClusteringResult() {

		if (!init) {
			predecon = new IncPreDeCon(p_micro_cluster, delta, tau, (int) mu,
					offlineOption.getValue() * epsilon, kappa);
			clustering = new Clustering();
		}

		// update clustering nur jedes "tp"-te mal
		if (READY || !init) {
			init = true;
			// p_micro_cluster wird in dieser funktion geupdated
			clustering = predecon.getClustering(new_pMicroClusters,
					removed_pMicroClusters);

			READY = false;
			new_pMicroClusters = new Clustering();
			removed_pMicroClusters = new Clustering();
		}
		// System.out.println("Timestamp= " +
		// this.currentTimestamp.getTimestamp()
		// + "\t" + "potentialMC= " + p_micro_cluster.size() + "\t"
		// + "outlierMC= " + o_micro_cluster.size());
		return new SubspaceClustering(clustering);
	}

	@Override
	public boolean implementsMicroClusterer() {
		return true;
	}

	@Override
	public Clustering getMicroClusteringResult() {
		for (Cluster c : new_pMicroClusters.getClustering())
			p_micro_cluster.add(c);
		for (Cluster c : removed_pMicroClusters.getClustering())
			p_micro_cluster.add(c);
		new_pMicroClusters = new Clustering();
		removed_pMicroClusters = new Clustering();
		return p_micro_cluster;
	}

	@Override
	protected Measurement[] getModelMeasurementsImpl() {
		throw new UnsupportedOperationException("Not supported yet.");
	}

	@Override
	public void getModelDescription(StringBuilder out, int indent) {
	}

	public boolean isRandomizable() {
		return true;
	}

	public double[] getVotesForInstance(Instance inst) {
		return null;
	}
	
	public String getParameterString() {
		StringBuffer sb = new StringBuffer();
		sb.append(this.getClass().getSimpleName() + " ");

		sb.append("-" + horizonOption.getCLIChar() + " ");
		sb.append(horizonOption.getValueAsCLIString() + " ");

		sb.append("-" + epsilonOption.getCLIChar() + " ");
		sb.append(epsilonOption.getValueAsCLIString() + " ");

		sb.append("-" + minPointsOption.getCLIChar() + " ");
		sb.append(minPointsOption.getValueAsCLIString() + " ");

		sb.append("-" + betaOption.getCLIChar() + " ");
		sb.append(betaOption.getValueAsCLIString() + " ");

		sb.append("-" + muOption.getCLIChar() + " ");
		sb.append(muOption.getValueAsCLIString() + " ");

		sb.append("-" + lambdaOption.getCLIChar() + " ");
		sb.append(lambdaOption.getValueAsCLIString() + " ");

		sb.append("-" + initPointsOption.getCLIChar() + " ");
		sb.append(initPointsOption.getValueAsCLIString() + " ");

		sb.append("-" + tauOption.getCLIChar() + " ");
		sb.append(tauOption.getValueAsCLIString() + " ");

		sb.append("-" + kappaOption.getCLIChar() + " ");
		sb.append(kappaOption.getValueAsCLIString() + " ");
		
		sb.append("-" + deltaOption.getCLIChar() + " ");
		
		// NO " " at the end! results in errors on windows systems
		sb.append(deltaOption.getValueAsCLIString());

		return sb.toString();
	}
}
