大数据：随机生成10万个整数找出出现次数前一百的数

Post author:xfxia
Post published:2023年9月11日
Post category:其他
实现大数据处理的基本方法是分治法+heapsort
/**
* @100 000个整形数据，范围在0~32768
* 1.输出最大
* 2.输出次数前100的数
*/

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <time.h>

typedef struct
{
	int num[100];
	int times[100];
}Date;
typedef struct
{
	int num;
	int times;
}Time;

void myheap(Time arr[],int len,int cur)
{

	int maxnode = cur*2+1;
	if(maxnode+1<len && arr[maxnode].times<arr[maxnode+1].times)
	{
		maxnode++;
	}
	
	for(int fnode=cur; maxnode<len;)
	{
		int temp;
		if(arr[maxnode].times > arr[fnode].times)
		{
			temp = arr[maxnode].times;
			arr[maxnode].times = arr[fnode].times;
			arr[fnode].times = temp;
			temp = arr[maxnode].num;
			arr[maxnode].num = arr[fnode].num;
			arr[fnode].num = temp;
		}
		fnode = maxnode;
		maxnode = fnode*2+1;
		if(maxnode<len && maxnode<fnode*2+2 && arr[maxnode].times<arr[maxnode+1].times)
		{
			maxnode++;
		}
	}
}
void show(Time arr[],int len)
{
	for(int i=0; i<len/100; i++)
	{
		printf("%3d ",arr[i]);
	}
	printf("\n");
}
void heapsort(Time arr[],int len)
{
	for(int i = (len-1)/2; i>=0; i--)
	{
		myheap(arr,len,i);
	}
	//show(arr,len);
	int temp;
	for(int i=len-1; i>0; i--)
	{
		temp = arr[0].times;
		arr[0].times = arr[i].times;
		arr[i].times = temp;
		temp = arr[0].num;
		arr[0].num = arr[i].num;
		arr[i].num = temp;
		//show(arr,len);
		myheap(arr,i,0);
		//show(arr,len);
	}
	
}
void Createdate(char* path)//创建数据
{
	FILE *fw = fopen(path,"wb");
	assert(fw != NULL);
	
	int temp;
	srand(time(NULL));//随机种子
	for(int i=0; i<1000000; i++)
	{
		temp = rand();
		fwrite(&temp,sizeof(int),1,fw);
	}
	fclose(fw);
}
void  MyHasefile(char *path,Date *d)
{
	FILE *fr = fopen(path,"rb");
	assert(fr != NULL);
	int temp;
	int arr[10000] = {0};
	Time brr[10000] ;
	while(fread(&temp,sizeof(int),1,fr) > 0)
	{
		arr[temp/10] += 1;
	}
	/*Date d ={0,0};
	d->times[0] = 0;
	d->num[0] = 0;*/
	for(int i=0; i<10000; i++)
	{
			brr[i].times = arr[i];
			brr[i].num = i*10+temp%10;	
	}
	heapsort(brr,sizeof(brr)/sizeof(Time));

	for(int i=9999,j=0; i>=9900;i--,j++)
	{
		d->num[j] = brr[i].num;
		d->times[j] = brr[i].times;
	}
	fclose(fr);
	return ;
}
void Divdate(char* path)//拆分数据
{
	char *mypath[10] = {"0.txt","1.txt","2.txt","3.txt","4.txt","5.txt","6.txt","7.txt","8.txt","9.txt"}; 
	FILE *fr = fopen(path,"rb");
	assert(fr != NULL);
	FILE *fw[10];
	for(int i=0; i<10; i++)
	{
		fw[i] = fopen(mypath[i],"wb");
		assert(fw[i] != NULL);
	}
	int temp;
	while(fread(&temp,sizeof(int),1,fr) > 0)
	{
		fwrite(&temp,sizeof(int),1,fw[temp%10]);
	}
	for(int i=0; i<10; i++)
	{
		fclose(fw[i]);
	}
	Date d[10] ;
	for(int i=0; i<10; i++)
	{
		MyHasefile(mypath[i],&(d[i]));
	}
	
	Date dmax;

	for(int i=0; i<10; i++)
	{
		for(int j=0; j<100; j++)
		if(d[i].times[j] >dmax.times[j])
		{
			dmax.times[j] = d[i].times[j];
			dmax.num[j] = d[i].num[j];
		}
	}
	for(int i=0; i<10; i++)
	{
		remove(mypath[i]);
	}
	for(int i=0; i<100; i++)
	printf("num:%3d,times:%3d\n",dmax.num[i],dmax.times[i]);
	return ;
}

int main()
{
	char *path = "D://date.txt";
	Createdate(path);//创建数据
	Divdate(path);//拆分数据

}
原文链接：https://blog.csdn.net/Teemo_king/article/details/78639237
你可能也喜欢