Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
197 views
in Technique[技术] by (71.8m points)

c - Sorting letters in an array by frequency in within a struct

I am trying to sort letters by frequency in a string. In the case that two or more letters have the same frequency, the letters with the same frequency would be sorted in alphabetical order.

This is what I managed to get so far

void get_text_statistics(const char *text, size_t len, statistics_t *data)
{
    *data = (statistics_t)
        {
            .sentences          = count_sentences(text, len),
            .words              = count_words(text, len),
            .most_freq_chars    = {/*something needs to be here*/}
        }

        get_letter_frequencies(text, len, &data -> freq[0], &data -> max_freq)
}

As you can see, my problem here is trying to sort the letters in a string by freq. I've tried looking up some tutorials but I couldn't find something similar to this specific example. Here is the struct concerned.

typedef struct statistics
{
    char_counts_t char_info;
    int sentences;
    int words;
    int freq[26];
    int max_freq;
    char most_freq_chars[27];
} statistics_t;

A bit earlier, I managed to make this function which may help.

void get_letter_frequencies(const char *text, size_t len, int freq[26], int *max_freq)
{

    for (int i = 0; i < 26; i++)
        freq[i] = 0;

    for (int i = 0; i < len; i++) {
        if ((text[i] >= 97) && (text[i] <= 122))
            freq[text[i] - 97]++;

    *max_freq = 0;
    for (int i = 0; i < 26; i++)
        if (*max_freq < freq[i])
            *max_freq = freq[i];
}

How would I go about this? TIA

p.s: count_sentences and count_words are functions which count sentences and words in the string.

question from:https://stackoverflow.com/questions/65875212/sorting-letters-in-an-array-by-frequency-in-within-a-struct

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)

One solution I thought of creates a structure that holds both the character and the frequency and did sort using qsort with a custom comparison function. This way the frequency limit is INT_MAX.

A more hacky way would be to sort an array of integers and for each poison use freq*128 + ('a' + i), do a normal integer array sort using greater and then get the characters using most_freq_chars = freq_array[i]%128

I hope it helps you =]

#include <stdio.h>      /* printf */
#include <stdlib.h>     /* qsort */
#include <string.h>     /* strlen */

typedef struct freq_pair {
  char c;
  int freq;
} freq_pair_t;

typedef struct statistics {
  char_counts_t char_info;
  int sentences;
  int words;
  int freq[26];
  int max_freq;
  char most_freq_chars[26]; // You don't need 27 spaces here
} statistics_t;

void get_letter_frequencies(const char* text, size_t len, int freq[26], int* max_freq) {
  for (int i = 0; i < 26; i++) {
    freq[i] = 0;
  }

  for (int i = 0; i < len; i++) {
    if ((text[i] >= 'a') && (text[i] <= 'z')) {
      freq[text[i] - 'a']++;
    }
  }

  *max_freq = 0;
  for (int i = 0; i < 26; i++) {
    if (*max_freq < freq[i]) {
      *max_freq = freq[i];
    }
  }
}

int compare(const void* a, const void* b) {
  freq_pair_t* pa = (freq_pair_t*)a;
  freq_pair_t* pb = (freq_pair_t*)b;

  if (pa->freq > pb->freq) {
    return -1;
  }

  if (pa->freq == pb->freq) {
    if (pa->c < pb->c) {
      return -1;
    }
    if (pa->c > pb->c) {
      return 1;
    }

    return 0;
  }

  if (pa->freq < pb->freq) {
    return 1;
  }
}

void get_text_statistics(const char* text, size_t len, statistics_t* data) {
  *data = (statistics_t){
      .sentences = count_sentences(text, len),
      .words = count_words(text, len),
      /* Do not init most_freq_chars here, let it for after you calc all the frequencies */
  };

  get_letter_frequencies(text, len, &data->freq[0], &data->max_freq);
  freq_pair_t freq_pairs[26];

  for (int i = 0; i < 26; i++) {
    freq_pairs[i].freq = data->freq[i];
    freq_pairs[i].c = 'a' + i;
  }

  qsort(freq_pairs, 26, sizeof(freq_pair_t), compare);
  for (int i = 0; i < 26; i++) {
    data->most_freq_chars[i] = freq_pairs[i].c;
  }
}

int main() {
  char* s = "quero mudar o mundo, cruzar os ceus sem nada temer";
  statistics_t data;
  get_text_statistics(s, strlen(s), &data);

  for (int i = 0; i < 26; i++) {
    printf("%c ", data.most_freq_chars[i]);
  }
  printf("
");

  for (int i = 0; i < 26; i++) {
    printf("%c-%d ", 'a' + i, data.freq[i]);
  }
  printf("
");
}

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...