1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| from pyspark import SparkContext from pyspark.mllib.clustering import KMeans from numpy import array
sc = SparkContext()
#12 records with height, weight data data = array([185,72, 170,56, 168,60, 179,68, 182,72, 188,77, 180,71, 180,70, 183,84, 180,88, 180,67, 177,76]).reshape(12,2)
#Generate Kmeans model = KMeans.train(sc.parallelize(data), 2, runs=50, initializationMode="random")
model.save(sc, "savedModelDir")
This will create a directory, _savedModelDir_ with two subdirectories _data_ and _metadata_ where the model is stored. **Using Already Trained Model for Predicting Clusters** Now, let's use trained model by loading it. We need to import KMeansModel in order to use it for loading the model from file.
from pyspark import SparkContext from pyspark.mllib.clustering import KMeans, KMeansModel from numpy import array
sc = SparkContext()
#Generate Kmeans model = KMeansModel.load(sc, "savedModelDir")
#Print out the cluster of each data point print (model.predict(array([185, 71]))) print (model.predict(array([170, 56]))) print (model.predict(array([168, 60]))) print (model.predict(array([179, 68]))) print (model.predict(array([182, 72]))) print (model.predict(array([188, 77]))) print (model.predict(array([180, 71]))) print (model.predict(array([180, 70]))) print (model.predict(array([183, 84]))) print (model.predict(array([180, 88]))) print (model.predict(array([180, 67]))) print (model.predict(array([177, 76])))
|