实现大数据处理的基本方法是分治法+heapsort
/**
* @100 000个整形数据,范围在0~32768
* 1.输出最大
* 2.输出次数前100的数
*/
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <time.h>
typedef struct
{
int num[100];
int times[100];
}Date;
typedef struct
{
int num;
int times;
}Time;
void myheap(Time arr[],int len,int cur)
{
int maxnode = cur*2+1;
if(maxnode+1<len && arr[maxnode].times<arr[maxnode+1].times)
{
maxnode++;
}
for(int fnode=cur; maxnode<len;)
{
int temp;
if(arr[maxnode].times > arr[fnode].times)
{
temp = arr[maxnode].times;
arr[maxnode].times = arr[fnode].times;
arr[fnode].times = temp;
temp = arr[maxnode].num;
arr[maxnode].num = arr[fnode].num;
arr[fnode].num = temp;
}
fnode = maxnode;
maxnode = fnode*2+1;
if(maxnode<len && maxnode<fnode*2+2 && arr[maxnode].times<arr[maxnode+1].times)
{
maxnode++;
}
}
}
void show(Time arr[],int len)
{
for(int i=0; i<len/100; i++)
{
printf("%3d ",arr[i]);
}
printf("\n");
}
void heapsort(Time arr[],int len)
{
for(int i = (len-1)/2; i>=0; i--)
{
myheap(arr,len,i);
}
//show(arr,len);
int temp;
for(int i=len-1; i>0; i--)
{
temp = arr[0].times;
arr[0].times = arr[i].times;
arr[i].times = temp;
temp = arr[0].num;
arr[0].num = arr[i].num;
arr[i].num = temp;
//show(arr,len);
myheap(arr,i,0);
//show(arr,len);
}
}
void Createdate(char* path)//创建数据
{
FILE *fw = fopen(path,"wb");
assert(fw != NULL);
int temp;
srand(time(NULL));//随机种子
for(int i=0; i<1000000; i++)
{
temp = rand();
fwrite(&temp,sizeof(int),1,fw);
}
fclose(fw);
}
void MyHasefile(char *path,Date *d)
{
FILE *fr = fopen(path,"rb");
assert(fr != NULL);
int temp;
int arr[10000] = {0};
Time brr[10000] ;
while(fread(&temp,sizeof(int),1,fr) > 0)
{
arr[temp/10] += 1;
}
/*Date d ={0,0};
d->times[0] = 0;
d->num[0] = 0;*/
for(int i=0; i<10000; i++)
{
brr[i].times = arr[i];
brr[i].num = i*10+temp%10;
}
heapsort(brr,sizeof(brr)/sizeof(Time));
for(int i=9999,j=0; i>=9900;i--,j++)
{
d->num[j] = brr[i].num;
d->times[j] = brr[i].times;
}
fclose(fr);
return ;
}
void Divdate(char* path)//拆分数据
{
char *mypath[10] = {"0.txt","1.txt","2.txt","3.txt","4.txt","5.txt","6.txt","7.txt","8.txt","9.txt"};
FILE *fr = fopen(path,"rb");
assert(fr != NULL);
FILE *fw[10];
for(int i=0; i<10; i++)
{
fw[i] = fopen(mypath[i],"wb");
assert(fw[i] != NULL);
}
int temp;
while(fread(&temp,sizeof(int),1,fr) > 0)
{
fwrite(&temp,sizeof(int),1,fw[temp%10]);
}
for(int i=0; i<10; i++)
{
fclose(fw[i]);
}
Date d[10] ;
for(int i=0; i<10; i++)
{
MyHasefile(mypath[i],&(d[i]));
}
Date dmax;
for(int i=0; i<10; i++)
{
for(int j=0; j<100; j++)
if(d[i].times[j] >dmax.times[j])
{
dmax.times[j] = d[i].times[j];
dmax.num[j] = d[i].num[j];
}
}
for(int i=0; i<10; i++)
{
remove(mypath[i]);
}
for(int i=0; i<100; i++)
printf("num:%3d,times:%3d\n",dmax.num[i],dmax.times[i]);
return ;
}
int main()
{
char *path = "D://date.txt";
Createdate(path);//创建数据
Divdate(path);//拆分数据
}
版权声明:本文为Teemo_king原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。