123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- //#include "stdafx.h"
- #include<iostream>
- #include<map>
- #include<vector>
- #include<stdio.h>
- #include<cmath>
- #include<cstdlib>
- #include<algorithm>
- #include<fstream>
- #include "./../CSVparser/CSVparser.h"
- #include "KNN.h"
- using namespace std;
- ifstream fin;
- ofstream fout;
- string Trim(string& str)
- {
- str.erase(0,str.find_first_not_of(" \t\r\n"));
- str.erase(str.find_last_not_of(" \t\r\n") + 1);
- return str;
- }
- KNN::KNN(int magic_x, int magic_y, int magic_z)
- {
- int k = 10;
- this->k = k;
- csv::Parser file = csv::Parser("d:/magic_map/trainning_data_magic.csv");
-
- std::cout << file[0][0] << std::endl; // display : 1997
- std::cout << file[0] << std::endl; // display : 1997 | Ford | E350
- cout<<file[1]<<endl;
- int row_count = file.rowCount();
- /* input the dataSet */
- for(int i=0;i<rowLen;i++)
- {
- for(int j=0;j<colLen;j++)
- {
- if(i < row_count)
- {
- dataSet[i][j] = atof((file[i][j]).c_str());
- }
- }
-
- if(i < row_count)
- {
- labels[i].x = atof((file[i][3]).c_str());
- labels[i].y = atof((file[i][4]).c_str());
- }
-
- }
-
- cout<<"please input the test data :"<<endl;
- /* inuput the test data */
- testData[0] = magic_x;
- testData[1] = magic_y;
- testData[2] = magic_z;
- }
-
- /*
- * calculate the distance between test data and dataSet[i]
- */
- double KNN:: get_distance(tData *d1,tData *d2)
- {
- double sum = 0;
- for(int i=0;i<colLen;i++)
- {
- sum += pow( (d1[i]-d2[i]) , 2 );
- }
-
- // cout<<"the sum is = "<<sum<<endl;
- return sqrt(sum);
- }
-
- /*
- * calculate all the distance between test data and each training data
- */
- void KNN:: get_all_distance()
- {
- double distance;
- int i;
- for(i=0;i<rowLen;i++)
- {
- distance = get_distance(dataSet[i],testData);
- //<key,value> => <i,distance>
- map_index_dis[i] = distance;
- }
-
- //traverse the map to print the index and distance
- map<int,double>::const_iterator it = map_index_dis.begin();
- while(it!=map_index_dis.end())
- {
- //cout<<"index = "<<it->first<<" distance = "<<it->second<<endl;
- it++;
- }
- }
-
- /*
- * check which label the test data belongs to to classify the test data
- */
- tLabel KNN:: get_max_freq_label()
- {
- //transform the map_index_dis to vec_index_dis
- vector<PAIR> vec_index_dis( map_index_dis.begin(),map_index_dis.end() );
- //sort the vec_index_dis by distance from low to high to get the nearest data
- sort(vec_index_dis.begin(),vec_index_dis.end(),CmpByValue());
-
- for(int i=0;i<k;i++)
- {
- cout<<"the index = "<<vec_index_dis[i].first<<" the distance = "<<vec_index_dis[i].second<<" the label x= "<<labels[vec_index_dis[i].first].x<<" the label y= "<<labels[vec_index_dis[i].first].y<<" the coordinate ( "<<dataSet[ vec_index_dis[i].first ][0]<<","<<dataSet[ vec_index_dis[i].first ][1]<<" )"<<endl;
- //calculate the count of each label
- map_label_freq[ labels[ vec_index_dis[i].first ]]++;
- }
-
- map<tLabel,int>::const_iterator map_it = map_label_freq.begin();
- tLabel label;
- int max_freq = 0;
- //find the most frequent label
- while( map_it != map_label_freq.end() )
- {
- if( map_it->second > max_freq )
- {
- max_freq = map_it->second;
- label = map_it->first;
- }
- map_it++;
- }
- cout<<"The test data belongs to the x:"<<label.x<<"y:"<<label.y<<" label"<<endl;
- return label;
- }
- #if 0
- int main()
- {
- int k ;
- cout<<"please input the k value : "<<endl;
- cin>>k;
- KNN knn(k);
- knn.get_all_distance();
- knn.get_max_freq_label();
- system("pause");
- return 0;
- }
- #endif
|