Medir memória cache

Geovanidarcie · Outubro 10, 2019

Por favor, alguém consegue me explicar o que acontece exatamente no main e na função int i386_cpuid_caches( size_t *data_caches)

/*medindo latências de cache

Prefiro tentar usar o galo de hardware como uma medida.
A instrução rdtsc informa a contagem atual de ciclos desde que a CPU foi ligada. Além disso,
é melhor usar asm para garantir que sempre as mesmas instruções sejam usadas nas corridas medidas e secas.
Usando isso e algumas estatísticas inteligentes, eu fiz isso há muito tempo*/

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>

int i386_cpuid_caches (size_t *);
int test_cache(size_t,size_t,int *, size_t);

int main(){
size_t cache_sizes[32];
int num_data_caches = i386_cpuid_caches(cache_sizes);

// é usado 0x400 em hexa para facilitar o trabalho do compilador. 0x400 = 1024
int latencies[0x400];

// seta o primeiro numero de bytes no bloco de memória.
memset(latencies,0,sizeof(latencies));

int empty_cycles = 0;

    int i;
    int attempts = 1000000;
    for(i=0; i< attempts; i++){ // mede quanto sobrecarga nós temos para os ciclos de contatem, baseado em tentativas
     int32_t cycles_used, edx, temp1, temp2;
     asm("mfence\n\t" // limita a area de memória
         "rdtsc\n\t" // pega a quantidade de ciclos da CPU
         "mov %%edx, %2\n\t"
         "mov %%eax, %3\n\t"
         "mfence\n\t" // limita a area de memória
         "mfence\n\t"
         "rdtsc\n\t"
         "sub %2, %%edx\n\t" //subtrai a quantidade de ciclo
         "sbb %3, %%eax" // substrai a quantidade de ciclo
         : "= a" (cycles_used)
         , "= d" (edx)
         , "= r" (temp1)
         ,"= r" (temp2)
         :
         );
         //printf("\n\nCICLOSUSADOS:   %d\n\n", cycles_used);
          printf("\nLATENCIA: %d\n", sizeof(*latencies));

         // pega o tamanho de lantencies e divide pelo tamanho do ponteiro latencies,e verifica se é maior que
         // ciclos usados
         if (cycles_used < sizeof(latencies) / sizeof(*latencies))
          latencies[cycles_used]++;
          else
         latencies[sizeof(latencies) / sizeof(*latencies) - 1]++;
         }

         {
          int j;
          size_t sum = 0;
          size_t sum2 = 0;

          for (j=0; j<sizeof(latencies) / sizeof(*latencies); j++){
           sum+= latencies[j];
          }

          for (j=0; j<sizeof(latencies) / sizeof(*latencies); j++){
           sum2+= latencies[j];
           if(sum2 >= sum * .75){
           empty_cycles = j;
           fprintf(stderr, "Empty counting takes %d cycles\n", empty_cycles);
           break;
           }
          }
         }

         for(i=0; i < num_data_caches; i++){
          int j;
          size_t sum = 0;
          size_t sum2 = 0;

test_cache(attempts, cache_sizes * 4, latencies, sizeof(latencies) / sizeof(*latencies));

          for(j=0; j < sizeof(latencies) / sizeof(*latencies); j++){
           sum += latencies[j];
          }

for(j=0; j < sizeof(latencies) / sizeof(*latencies); j++){
sum2 += latencies[j];

          if(sum2 >= sum * .75){
          fprintf(stderr, "Cache ID %i has latency %d cycles\n", i, j - empty_cycles);
          break;
          }

}
}

return(0);

}

int i386_cpuid_caches( size_t *data_caches){
int i;
int num_data_caches = 0;

for (i=0; i < 32; i++){
uint32_t eax, ebx, ecx, edx;

eax = 4;
ecx = i;

        asm (
        "cpuid"
        : "+a"(eax)
        , "=b"(ebx)
        , "+c"(ecx)
        , "=d"(edx)
        );

int cache_type = eax & 0x1F;

if (cache_type == 0)
break;

         char * cache_type_string;
         switch (cache_type){
            case 1: cache_type_string = "Data Cache"; break;
            case 2: cache_type_string = "Instruction Cache"; break;
            case 3: cache_type_string = "Unified Cache"; break;
            default: cache_type_string = "Unkown Type Cache"; break;
         }

int cache_level = (eax >>= 5) & 0x7;

int cache_is_self_initializing = (eax >>= 3) & 0x1;
int cache_is_fully_associativity = (eax >>= 1) & 0x1;

         unsigned int cache_sets = ecx + 1;
         unsigned int cache_coherency_line_size = (ebx & 0x3FFF) + 1;
         unsigned int cache_physical_line_partitions = ((ebx >>= 12) & 0x3FF) + 1;
         unsigned int cache_ways_of_associativity = ((ebx >>= 10) & 0x3FF) + 1;

size_t cache_total_size = cache_ways_of_associativity * cache_physical_line_partitions * cache_coherency_line_size *
cache_sets;

         if (cache_type == 1 || cache_type ==3){
          data_caches[num_data_caches++] = cache_total_size;
         }

         printf(
          "Cache ID %d: \n"
          "-level: %d\n"
          "-Type: %s\n"
          "-Sets: %d\n"
          "- System Coherency Line Size: %d bytes\n"
          "- Physical Line partitions: %d\n"
          "- Ways of associativity: %d\n"
          "-Total Size: %zu bytes (%zu kb) \n"
          "- Is fully associative:%s\n"
          "- Is self initializing: %s\n"
          "\n"
          ,i
          ,cache_level
          ,cache_type_string
          ,cache_sets
          ,cache_coherency_line_size
          ,cache_physical_line_partitions
          ,cache_ways_of_associativity
          ,cache_total_size, cache_total_size >> 10
          ,cache_is_fully_associativity ? "true" : "false"
          ,cache_is_self_initializing ? "true" : "false"
          );

}

return(num_data_caches);
}

int test_cache(size_t attempts, size_t lower_cache_size, int *latencies , size_t max_latency){
size_t i;
int64_t random_offset = 0;

int fd = open("/dev/urandom", O_RDONLY);

    if (fd < 0){
        perror("open");
        abort();
    }

    char *random_data = mmap(
        NULL
        ,lower_cache_size
        ,PROT_READ | PROT_WRITE
        ,MAP_PRIVATE | MAP_ANON
        ,-1
        ,0
        );

     if(random_data == MAP_FAILED){
        perror("mmap");
        abort();
     }

    for(i=0; i < lower_cache_size; i += sysconf(_SC_PAGESIZE)){
     random_data = 1;
    }

    while (attempts--){
        random_offset += rand();
        random_offset %= lower_cache_size;
        int32_t cycles_used, edx, temp1, temp2;

        asm(
        "mfence\n\t"
         "rdtsc\n\t"
         "mov %%edx, %2\n\t"
         "mov %%eax, %3\n\t"
         "mfence\n\t"
         "mov %4, %%al\n\t"
         "mfence\n\t"
         "rdtsc\n\t"
         "sub %2, %%edx\n\t"
         "sbb %3, %%eax"
         : "= a" (cycles_used)
         , "= d" (edx)
         , "= r" (temp1)
         ,"= r" (temp2)
         : "m" (random_data[random_offset])
         );

         if(cycles_used < max_latency)
          latencies[cycles_used]++;
          else
          latencies[max_latency - 1]++;

}

munmap(random_data, lower_cache_size);
return(0);

}

Entrar

Medir memória cache

Pergunta

Geovanidarcie

Link para o comentário

Compartilhar em outros sites

0 respostass a esta questão

Posts Recomendados

Participe da discussão

Estatísticas dos Fóruns

Navegação

Atividades