package com.yahoo.labs.samoa.instances;

/*
 * #%L
 * SAMOA
 * %%
 * Copyright (C) 2013 Yahoo! Inc.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * 
 * @author abifet
 */
public class ArffLoader implements Serializable {

  protected InstanceInformation instanceInformation;

  transient protected StreamTokenizer streamTokenizer;

  protected Reader reader;

  protected int size;

  protected int classAttribute;

  public ArffLoader() {
  }

  public ArffLoader(Reader reader, int size, int classAttribute) {
    this.reader = reader;
    this.size = size;
    this.classAttribute = classAttribute;
    initStreamTokenizer(reader);
  }

  public InstanceInformation getStructure() {
    return this.instanceInformation;
  }

  public Instance readInstance(Reader reader) {
    if (streamTokenizer == null) {
      initStreamTokenizer(reader);
    }
    while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) {
      try {
        streamTokenizer.nextToken();
      } catch (IOException ex) {
        Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
      }
    }
    if (streamTokenizer.ttype == '{') {
      return readInstanceSparse();
      // return readDenseInstanceSparse();
    } else {
      return readInstanceDense();
    }

  }

  public Instance readInstanceDense() {
    Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1);
    // System.out.println(this.instanceInformation.numAttributes());
    int numAttribute = 0;
    try {
      while (numAttribute == 0 && streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
        // For each line
        while (streamTokenizer.ttype != StreamTokenizer.TT_EOL
            && streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
          // For each item
          if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
            // System.out.println(streamTokenizer.nval + "Num ");
            this.setValue(instance, numAttribute, streamTokenizer.nval, true);
            numAttribute++;

          } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD
              || streamTokenizer.ttype == 34)) {
            // System.out.println(streamTokenizer.sval + "Str");
            boolean isNumeric = attributes.get(numAttribute).isNumeric();
            double value;
            if ("?".equals(streamTokenizer.sval)) {
              value = Double.NaN; // Utils.missingValue();
            } else if (isNumeric == true) {
              value = Double.valueOf(streamTokenizer.sval).doubleValue();
            } else {
              value = this.instanceInformation.attribute(numAttribute).indexOfValue(streamTokenizer.sval);
            }

            this.setValue(instance, numAttribute, value, isNumeric);
            numAttribute++;
          }
          streamTokenizer.nextToken();
        }
        streamTokenizer.nextToken();
        // System.out.println("EOL");
      }

    } catch (IOException ex) {
      Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
    }
    return (numAttribute > 0) ? instance : null;
  }

  private void setValue(Instance instance, int numAttribute, double value, boolean isNumber) {
    double valueAttribute;
    if (isNumber && this.instanceInformation.attribute(numAttribute).isNominal) {
      valueAttribute = this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value));
      // System.out.println(value +"/"+valueAttribute+" ");

    } else {
      valueAttribute = value;
      // System.out.println(value +"/"+valueAttribute+" ");
    }
    if (this.instanceInformation.classIndex() == numAttribute) {
      instance.setClassValue(valueAttribute);
      // System.out.println(value
      // +"<"+this.instanceInformation.classIndex()+">");
    } else {
      instance.setValue(numAttribute, valueAttribute);
    }
  }

  private Instance readInstanceSparse() {
    // Return a Sparse Instance
    Instance instance = new SparseInstance(1.0, null); // (this.instanceInformation.numAttributes()
                                                       // + 1);
    // System.out.println(this.instanceInformation.numAttributes());
    int numAttribute;
    ArrayList<Double> attributeValues = new ArrayList<Double>();
    List<Integer> indexValues = new ArrayList<Integer>();
    try {
      // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
      streamTokenizer.nextToken(); // Remove the '{' char
      // For each line
      while (streamTokenizer.ttype != StreamTokenizer.TT_EOL
          && streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
        while (streamTokenizer.ttype != '}') {
          // For each item
          // streamTokenizer.nextToken();
          // while (streamTokenizer.ttype != '}'){
          // System.out.println(streamTokenizer.nval +"-"+
          // streamTokenizer.sval);
          // numAttribute = (int) streamTokenizer.nval;
          if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
            numAttribute = (int) streamTokenizer.nval;
          } else {
            numAttribute = Integer.parseInt(streamTokenizer.sval);
          }
          streamTokenizer.nextToken();

          if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
            // System.out.print(streamTokenizer.nval + " ");
            this.setSparseValue(instance, indexValues, attributeValues, numAttribute, streamTokenizer.nval, true);
            // numAttribute++;

          } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD
              || streamTokenizer.ttype == 34)) {
            // System.out.print(streamTokenizer.sval + "-");
            if (attributes.get(numAttribute).isNumeric()) {
              this.setSparseValue(instance, indexValues, attributeValues, numAttribute,
                  Double.valueOf(streamTokenizer.sval).doubleValue(), true);
            } else {
              this.setSparseValue(instance, indexValues, attributeValues, numAttribute, this.instanceInformation
                  .attribute(numAttribute).indexOfValue(streamTokenizer.sval), false);
            }
          }
          streamTokenizer.nextToken();
        }
        streamTokenizer.nextToken(); // Remove the '}' char
      }
      streamTokenizer.nextToken();
      // System.out.println("EOL");
      // }

    } catch (IOException ex) {
      Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
    }
    int[] arrayIndexValues = new int[attributeValues.size()];
    double[] arrayAttributeValues = new double[attributeValues.size()];
    for (int i = 0; i < arrayIndexValues.length; i++) {
      arrayIndexValues[i] = indexValues.get(i).intValue();
      arrayAttributeValues[i] = attributeValues.get(i).doubleValue();
    }
    instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes());
    return instance;

  }

  private void setSparseValue(Instance instance, List<Integer> indexValues, List<Double> attributeValues,
      int numAttribute, double value, boolean isNumber) {
    double valueAttribute;
    if (isNumber && this.instanceInformation.attribute(numAttribute).isNominal) {
      valueAttribute = this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value));
    } else {
      valueAttribute = value;
    }
    if (this.instanceInformation.classIndex() == numAttribute) {
      instance.setClassValue(valueAttribute);
    } else {
      // instance.setValue(numAttribute, valueAttribute);
      indexValues.add(numAttribute);
      attributeValues.add(valueAttribute);
    }
    // System.out.println(numAttribute+":"+valueAttribute+","+this.instanceInformation.classIndex()+","+value);
  }

  private Instance readDenseInstanceSparse() {
    // Returns a dense instance
    Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1);
    // System.out.println(this.instanceInformation.numAttributes());
    int numAttribute;
    try {
      // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
      streamTokenizer.nextToken(); // Remove the '{' char
      // For each line
      while (streamTokenizer.ttype != StreamTokenizer.TT_EOL
          && streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
        while (streamTokenizer.ttype != '}') {
          // For each item
          // streamTokenizer.nextToken();
          // while (streamTokenizer.ttype != '}'){
          // System.out.print(streamTokenizer.nval+":");
          numAttribute = (int) streamTokenizer.nval;
          streamTokenizer.nextToken();

          if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
            // System.out.print(streamTokenizer.nval + " ");
            this.setValue(instance, numAttribute, streamTokenizer.nval, true);
            // numAttribute++;

          } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD
              || streamTokenizer.ttype == 34)) {
            // System.out.print(streamTokenizer.sval +
            // "/"+this.instanceInformation.attribute(numAttribute).indexOfValue(streamTokenizer.sval)+" ");
            if (attributes.get(numAttribute).isNumeric()) {
              this.setValue(instance, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true);
            } else {
              this.setValue(instance, numAttribute,
                  this.instanceInformation.attribute(numAttribute).indexOfValue(streamTokenizer.sval), false);
              // numAttribute++;
            }
          }
          streamTokenizer.nextToken();
        }
        streamTokenizer.nextToken(); // Remove the '}' char
      }
      streamTokenizer.nextToken();
      // System.out.println("EOL");
      // }

    } catch (IOException ex) {
      Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
    }
    return instance;
  }

  protected List<Attribute> attributes;

  private InstanceInformation getHeader() {

    String relation = "file stream";
    // System.out.println("RELATION " + relation);
    attributes = new ArrayList<Attribute>();
    try {
      streamTokenizer.nextToken();
      while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
        // For each line
        // if (streamTokenizer.ttype == '@') {
        if (streamTokenizer.ttype == StreamTokenizer.TT_WORD && streamTokenizer.sval.startsWith("@") == true) {
          // streamTokenizer.nextToken();
          String token = streamTokenizer.sval.toUpperCase();
          if (token.startsWith("@RELATION")) {
            streamTokenizer.nextToken();
            relation = streamTokenizer.sval;
            // System.out.println("RELATION " + relation);
          } else if (token.startsWith("@ATTRIBUTE")) {
            streamTokenizer.nextToken();
            String name = streamTokenizer.sval;
            // System.out.println("* " + name);
            if (name == null) {
              name = Double.toString(streamTokenizer.nval);
            }
            streamTokenizer.nextToken();
            String type = streamTokenizer.sval;
            // System.out.println("* " + name + ":" + type + " ");
            if (streamTokenizer.ttype == '{') {
              streamTokenizer.nextToken();
              List<String> attributeLabels = new ArrayList<String>();
              while (streamTokenizer.ttype != '}') {

                if (streamTokenizer.sval != null) {
                  attributeLabels.add(streamTokenizer.sval);
                  // System.out.print(streamTokenizer.sval + ",");
                } else {
                  attributeLabels.add(Double.toString(streamTokenizer.nval));
                  // System.out.print(streamTokenizer.nval + ",");
                }

                streamTokenizer.nextToken();
              }
              // System.out.println();
              attributes.add(new Attribute(name, attributeLabels));
            } else {
              // Add attribute
              attributes.add(new Attribute(name));
            }

          } else if (token.startsWith("@DATA")) {
            // System.out.print("END");
            streamTokenizer.nextToken();
            break;
          }
        }
        streamTokenizer.nextToken();
      }

    } catch (IOException ex) {
      Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
    }
    return new InstanceInformation(relation, attributes);
  }

  private void initStreamTokenizer(Reader reader) {
    BufferedReader br = new BufferedReader(reader);

    // Init streamTokenizer
    streamTokenizer = new StreamTokenizer(br);

    streamTokenizer.resetSyntax();
    streamTokenizer.whitespaceChars(0, ' ');
    streamTokenizer.wordChars(' ' + 1, '\u00FF');
    streamTokenizer.whitespaceChars(',', ',');
    streamTokenizer.commentChar('%');
    streamTokenizer.quoteChar('"');
    streamTokenizer.quoteChar('\'');
    streamTokenizer.ordinaryChar('{');
    streamTokenizer.ordinaryChar('}');
    streamTokenizer.eolIsSignificant(true);

    this.instanceInformation = this.getHeader();
    if (classAttribute < 0) {
      this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1);
      // System.out.print(this.instanceInformation.classIndex());
    } else if (classAttribute > 0) {
      this.instanceInformation.setClassIndex(classAttribute - 1);
    }
  }
}
