260 lines
5.0 KiB
Go
260 lines
5.0 KiB
Go
// pam 用Go来写PAM
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"math"
|
|
"math/rand"
|
|
"time"
|
|
)
|
|
|
|
//先设置一些参数
|
|
|
|
const (
|
|
dataN = 50
|
|
dim = 3
|
|
clusterN = 5
|
|
)
|
|
|
|
type Ctx struct {
|
|
data [][]float64
|
|
SetS map[int]interface{}
|
|
}
|
|
|
|
func initData() [][]float64 { //初始数据集
|
|
src := rand.NewSource(time.Now().UnixNano())
|
|
rnd := rand.New(src)
|
|
r := make([][]float64, dataN)
|
|
for n := 0; n < dataN; n++ {
|
|
r[n] = make([]float64, dim)
|
|
for d := 0; d < dim; d++ {
|
|
r[n][d] = rnd.Float64()
|
|
}
|
|
}
|
|
|
|
return r
|
|
}
|
|
|
|
func copyIntMap(src map[int]interface{}, dst *map[int]interface{}) {
|
|
for k, v := range src {
|
|
(*dst)[k] = v
|
|
}
|
|
}
|
|
|
|
func minDistance2Mat(vec []float64, mat [][]float64) float64 {
|
|
var r float64
|
|
r = 1e20
|
|
var t float64
|
|
for m := 0; m < len(mat); m++ {
|
|
t = 0
|
|
for v := 0; v < len(vec); v++ {
|
|
t += math.Pow(vec[v]-mat[m][v], 2)
|
|
}
|
|
t = math.Sqrt(t)
|
|
if t < r {
|
|
r = t
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
func minDistance2Vec(vec []float64, mat []float64) float64 {
|
|
t := make([][]float64, 1)
|
|
t[0] = mat
|
|
return minDistance2Mat(vec, t)
|
|
}
|
|
|
|
func min2minDistance(vec []float64, mat [][]float64) (min float64, min2 float64) {
|
|
var r float64
|
|
r = 1e20
|
|
var r2 float64
|
|
r2 = r
|
|
var t float64
|
|
for m := 0; m < len(mat); m++ {
|
|
t = 0
|
|
for v := 0; v < len(vec); v++ {
|
|
t += math.Pow(vec[v]-mat[m][v], 2)
|
|
}
|
|
t = math.Sqrt(t)
|
|
if t < r {
|
|
r2 = r
|
|
r = t
|
|
} else if t < r2 {
|
|
r2 = t
|
|
}
|
|
|
|
}
|
|
min = r
|
|
min2 = r2
|
|
return
|
|
}
|
|
|
|
func subDataMap(set map[int]interface{}, ctx *Ctx) [][]float64 {
|
|
r := make([][]float64, len(set))
|
|
ind := 0
|
|
for k, _ := range set {
|
|
r[ind] = ctx.data[k]
|
|
ind++
|
|
}
|
|
return r
|
|
}
|
|
|
|
func subDataInt(i int, ctx *Ctx) []float64 {
|
|
return ctx.data[i]
|
|
}
|
|
|
|
func ones(m int, n int, val float64) [][]float64 {
|
|
r := make([][]float64, n)
|
|
for i := 0; i < n; i++ {
|
|
r[i] = make([]float64, m)
|
|
for j := 0; j < m; j++ {
|
|
r[i][j] = val
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
func maxInd(vec []float64) int {
|
|
var r float64
|
|
r = -1.1e20
|
|
var ri int
|
|
for i, v := range vec {
|
|
if v > r {
|
|
r = v
|
|
ri = i
|
|
}
|
|
}
|
|
return ri
|
|
}
|
|
|
|
func selectInitCluster(ctx *Ctx) {
|
|
for clusterI := 0; clusterI < clusterN-1; clusterI++ {
|
|
maxG := ones(dataN, 1, -100) //等价于maxG=-100*ones(dataN,1);
|
|
for cluster := 0; cluster < dataN; cluster++ {
|
|
if _, has := ctx.SetS[cluster]; has {
|
|
continue
|
|
}
|
|
//选一个候选数据
|
|
cddtI := cluster
|
|
cadSetS := make(map[int]interface{})
|
|
copyIntMap(ctx.SetS, &cadSetS) //src,dst
|
|
cadSetS[cddtI] = nil
|
|
Cij := 0.0
|
|
for J := 0; J < dataN; J++ {
|
|
if _, has := cadSetS[J]; has {
|
|
continue
|
|
}
|
|
d := ctx.data[J]
|
|
//寻找最短距离
|
|
minD := minDistance2Mat(d, subDataMap(ctx.SetS, ctx))
|
|
distanceIJ := minDistance2Vec(d, subDataInt(cddtI, ctx))
|
|
if minD-distanceIJ > 0 {
|
|
Cij += minD - distanceIJ
|
|
}
|
|
}
|
|
maxG[0][cluster] = Cij
|
|
}
|
|
maxGInd := maxInd(maxG[0]) //等价于maxGInd=find(maxG==max(maxG));
|
|
ctx.SetS[maxGInd] = nil
|
|
}
|
|
}
|
|
|
|
func getUSetS(ctx *Ctx) map[int]interface{} {
|
|
r := make(map[int]interface{})
|
|
for i := 0; i < dataN; i++ {
|
|
if _, has := ctx.SetS[i]; !has {
|
|
r[i] = nil
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
func swap(ctx *Ctx) {
|
|
USetS := getUSetS(ctx)
|
|
var minKjih float64
|
|
|
|
var minSetS map[int]interface{}
|
|
var minUSetS map[int]interface{}
|
|
for {
|
|
minKjih = 1e20
|
|
for I, _ := range ctx.SetS {
|
|
for H, _ := range USetS {
|
|
cadSetS := make(map[int]interface{})
|
|
copyIntMap(ctx.SetS, &cadSetS) //src,dst
|
|
cadUSetS := make(map[int]interface{}) //src,dst
|
|
copyIntMap(USetS, &cadUSetS)
|
|
cadSetS[H] = nil
|
|
cadUSetS[I] = nil
|
|
delete(cadSetS, I)
|
|
delete(cadUSetS, H)
|
|
sumKjih := 0.0
|
|
for D, _ := range cadUSetS {
|
|
J := D
|
|
if J == I {
|
|
|
|
continue
|
|
}
|
|
//log.Println("for ", D, "in cadUSetS")
|
|
d := subDataInt(J, ctx)
|
|
minD, min2D := min2minDistance(d, subDataMap(ctx.SetS, ctx))
|
|
distanceIJ := minDistance2Vec(d, subDataInt(I, ctx))
|
|
distanceHJ := minDistance2Vec(d, subDataInt(H, ctx))
|
|
Kjih := 0.0
|
|
//log.Println("distanceIJ minD", distanceIJ, minD)
|
|
//log.Println("distanceHJ min2D", distanceHJ, min2D)
|
|
if distanceIJ > minD {
|
|
if distanceHJ-minD < 0 {
|
|
Kjih = distanceHJ - minD
|
|
} else {
|
|
Kjih = 0
|
|
}
|
|
}
|
|
if math.Abs(distanceIJ-minD) < 1e-5 {
|
|
Kjih = -minD
|
|
if distanceHJ < min2D {
|
|
Kjih += distanceHJ
|
|
} else {
|
|
Kjih += min2D
|
|
}
|
|
} else if distanceIJ < minD {
|
|
log.Fatalln("Input must be a string")
|
|
}
|
|
sumKjih += Kjih
|
|
|
|
}
|
|
if sumKjih < minKjih {
|
|
minKjih = sumKjih
|
|
minSetS = cadSetS
|
|
minUSetS = cadUSetS
|
|
}
|
|
|
|
}
|
|
}
|
|
if minKjih < 0 {
|
|
log.Println("minKjih", minKjih)
|
|
ctx.SetS = minSetS
|
|
USetS = minUSetS
|
|
//SetS
|
|
} else {
|
|
log.Println("minKjih", minKjih)
|
|
//SetS
|
|
log.Println("clustering is done.")
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
sT := time.Now()
|
|
ctx := Ctx{}
|
|
ctx.data = initData()
|
|
ctx.SetS = make(map[int]interface{})
|
|
ctx.SetS[1] = nil
|
|
ctx.SetS[2] = nil
|
|
selectInitCluster(&ctx)
|
|
swap(&ctx)
|
|
fmt.Println("Elapse: ", time.Since(sT).Seconds())
|
|
fmt.Println("Hello World!")
|
|
}
|