Jump to content
Fórum Script Brasil
  • 0

Medir memória cache


Question

Por favor, alguém consegue me explicar o que acontece exatamente no main e na função int i386_cpuid_caches( size_t *data_caches)

 

/*medindo latências de cache

Prefiro tentar usar o galo de hardware como uma medida.
A instrução rdtsc informa a contagem atual de ciclos desde que a CPU foi ligada. Além disso,
é melhor usar asm para garantir que sempre as mesmas instruções sejam usadas nas corridas medidas e secas.
Usando isso e algumas estatísticas inteligentes, eu fiz isso há muito tempo*/

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>

int i386_cpuid_caches (size_t *);
int test_cache(size_t,size_t,int *, size_t);

int main(){
    size_t cache_sizes[32];
    int num_data_caches = i386_cpuid_caches(cache_sizes);

    // é usado 0x400 em hexa para facilitar o trabalho do compilador. 0x400 = 1024
    int latencies[0x400];

    // seta o primeiro numero de bytes no bloco de memória.
    memset(latencies,0,sizeof(latencies));

    int empty_cycles = 0;

    int i;
    int attempts = 1000000;
    for(i=0; i< attempts; i++){ // mede quanto sobrecarga nós temos para os ciclos de contatem, baseado em tentativas
     int32_t cycles_used, edx, temp1, temp2;
     asm("mfence\n\t" // limita a area de memória
         "rdtsc\n\t" // pega a quantidade de ciclos da CPU
         "mov %%edx, %2\n\t"
         "mov %%eax, %3\n\t"
         "mfence\n\t" // limita a area de memória
         "mfence\n\t"
         "rdtsc\n\t"
         "sub %2, %%edx\n\t" //subtrai a quantidade de ciclo
         "sbb %3, %%eax" // substrai a quantidade de ciclo
         : "= a" (cycles_used)
         , "= d" (edx)
         , "= r" (temp1)
         ,"= r"  (temp2)
         :
         );
         //printf("\n\nCICLOSUSADOS:   %d\n\n", cycles_used);
          printf("\nLATENCIA: %d\n", sizeof(*latencies));

         // pega o tamanho de lantencies e divide pelo tamanho do ponteiro latencies,e verifica se é maior que
         // ciclos usados
         if (cycles_used < sizeof(latencies) / sizeof(*latencies))
          latencies[cycles_used]++;
          else
         latencies[sizeof(latencies) / sizeof(*latencies) - 1]++;
         }

         {
          int j;
          size_t sum = 0;
          size_t sum2 = 0;

          for (j=0; j<sizeof(latencies) / sizeof(*latencies); j++){
           sum+= latencies[j];
          }

          for (j=0; j<sizeof(latencies) / sizeof(*latencies); j++){
           sum2+= latencies[j];
           if(sum2 >= sum * .75){
           empty_cycles = j;
           fprintf(stderr, "Empty counting takes %d cycles\n", empty_cycles);
           break;
           }
          }
         }

         for(i=0; i < num_data_caches; i++){
          int j;
          size_t sum = 0;
          size_t sum2 = 0;

          test_cache(attempts, cache_sizes * 4, latencies, sizeof(latencies) / sizeof(*latencies));

          for(j=0; j < sizeof(latencies) / sizeof(*latencies); j++){
           sum += latencies[j];
          }

          for(j=0; j < sizeof(latencies) / sizeof(*latencies); j++){
           sum2 += latencies[j];

          if(sum2 >= sum * .75){
          fprintf(stderr, "Cache ID %i has latency %d cycles\n", i, j - empty_cycles);
          break;
          }

         }
    }

    return(0);

}

int i386_cpuid_caches( size_t *data_caches){
    int i;
    int num_data_caches = 0;

    for (i=0; i < 32; i++){
        uint32_t eax, ebx, ecx, edx;

        eax = 4;
        ecx = i;

        asm (
        "cpuid"
        : "+a"(eax)
        , "=b"(ebx)
        , "+c"(ecx)
        , "=d"(edx)
        );

        int cache_type = eax & 0x1F;


        if (cache_type == 0)
        break;

         char * cache_type_string;
         switch (cache_type){
            case 1: cache_type_string = "Data Cache"; break;
            case 2: cache_type_string = "Instruction Cache"; break;
            case 3: cache_type_string = "Unified Cache"; break;
            default: cache_type_string = "Unkown Type Cache"; break;
         }

         int cache_level = (eax >>= 5) & 0x7;

         int cache_is_self_initializing = (eax >>= 3) & 0x1;
         int cache_is_fully_associativity = (eax >>= 1) & 0x1;

         unsigned int cache_sets = ecx + 1;
         unsigned int cache_coherency_line_size = (ebx & 0x3FFF) + 1;
         unsigned int cache_physical_line_partitions = ((ebx >>= 12) & 0x3FF) + 1;
         unsigned int cache_ways_of_associativity = ((ebx >>= 10) & 0x3FF) + 1;

         size_t cache_total_size = cache_ways_of_associativity * cache_physical_line_partitions * cache_coherency_line_size *
         cache_sets;

         if (cache_type == 1 || cache_type ==3){
          data_caches[num_data_caches++] = cache_total_size;
         }

         printf(
          "Cache ID %d: \n"
          "-level: %d\n"
          "-Type: %s\n"
          "-Sets: %d\n"
          "- System Coherency Line Size: %d bytes\n"
          "- Physical Line partitions: %d\n"
          "- Ways of associativity: %d\n"
          "-Total Size: %zu bytes (%zu kb) \n"
          "- Is fully associative:%s\n"
          "- Is self initializing: %s\n"
          "\n"
          ,i
          ,cache_level
          ,cache_type_string
          ,cache_sets
          ,cache_coherency_line_size
          ,cache_physical_line_partitions
          ,cache_ways_of_associativity
          ,cache_total_size, cache_total_size >> 10
          ,cache_is_fully_associativity ? "true" : "false"
          ,cache_is_self_initializing ? "true" : "false"
          );

         }

         return(num_data_caches);
}

int test_cache(size_t attempts, size_t lower_cache_size, int *latencies , size_t max_latency){
    size_t i;
    int64_t random_offset = 0;

    int fd = open("/dev/urandom", O_RDONLY);

    if (fd < 0){
        perror("open");
        abort();
    }

    char *random_data = mmap(
        NULL
        ,lower_cache_size
        ,PROT_READ | PROT_WRITE
        ,MAP_PRIVATE | MAP_ANON
        ,-1
        ,0
        );

     if(random_data == MAP_FAILED){
        perror("mmap");
        abort();
     }

    for(i=0; i < lower_cache_size; i += sysconf(_SC_PAGESIZE)){
     random_data = 1;
    }

    while (attempts--){
        random_offset += rand();
        random_offset %= lower_cache_size;
        int32_t cycles_used, edx, temp1, temp2;

        asm(
        "mfence\n\t"
         "rdtsc\n\t"
         "mov %%edx, %2\n\t"
         "mov %%eax, %3\n\t"
         "mfence\n\t"
         "mov %4, %%al\n\t"
         "mfence\n\t"
         "rdtsc\n\t"
         "sub %2, %%edx\n\t"
         "sbb %3, %%eax"
         : "= a" (cycles_used)
         , "= d" (edx)
         , "= r" (temp1)
         ,"= r"  (temp2)
         : "m" (random_data[random_offset])
         );

         if(cycles_used < max_latency)
          latencies[cycles_used]++;
          else
          latencies[max_latency - 1]++;

          }

          munmap(random_data, lower_cache_size);
          return(0);

    }

 

 

Link to post
Share on other sites

0 answers to this question

Recommended Posts

There have been no answers to this question yet

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Answer this question...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



  • Forum Statistics

    • Total Topics
      148680
    • Total Posts
      644502
×
×
  • Create New...