社区首页 >问答首页 >增加程序执行时间与线程数的关系

问增加程序执行时间与线程数的关系
EN

Stack Overflow用户

提问于 2019-11-11 02:48:25

回答 1查看 228关注 0票数 0

我正在尝试使用pthreads，C构建一个高效的并发散列映射。

以下是我的实现

#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <stdint.h>
#include <limits.h>
#include <stdio.h>
#include <linux/limits.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>

#define ENTRIES_PER_BUCKET 3

struct Bucket
{
    pthread_mutex_t mutex;
    void **keys;
    int *vals;
    struct Bucket *next;
};

struct Concurrent_Map
{
    struct Bucket *buckets;
    map_keys_equality *keys_eq;
    map_key_hash *khash;
    int capacity;
};

int concurrent_map_allocate /*@ <t> @*/ (map_keys_equality *keq, map_key_hash *khash,
                                         unsigned capacity,
                                         struct Concurrent_Map **map_out)

{

    struct Concurrent_Map *old_map_val = *map_out;
    struct Concurrent_Map *map_alloc = malloc(sizeof(struct Concurrent_Map));
    if (map_alloc == NULL)
    {
        return 0;
    }
    *map_out = (struct Concurrent_Map *)map_alloc;

    struct Bucket *buckets_alloc = (struct Bucket *)malloc(sizeof(struct Bucket) * (int)capacity);

    if (buckets_alloc == NULL)
    {
        free(map_alloc);
        *map_out = old_map_val;
        return 0;
    }
    (*map_out)->buckets = buckets_alloc;
    (*map_out)->capacity = capacity;
    (*map_out)->keys_eq = keq;
    (*map_out)->khash = khash;

    unsigned i;

    for (i = 0; i < capacity; i++)
    {
        if (pthread_mutex_init(&((*map_out)->buckets[i].mutex), NULL) == 0)
        {
            void **key_alloc = malloc(sizeof(void *) * (ENTRIES_PER_BUCKET));

            if (key_alloc != NULL)
            {
                (*map_out)->buckets[i].keys = key_alloc;

                int k;
                for (k = 0; k < ENTRIES_PER_BUCKET; k++)
                {

                    (*map_out)->buckets[i].keys[k] = NULL;
                }
            }

            int *vals_alloc = malloc(sizeof(int) * (ENTRIES_PER_BUCKET));

            if (vals_alloc != NULL)
            {
                (*map_out)->buckets[i].vals = vals_alloc;

                int k;
                for (k = 0; k < ENTRIES_PER_BUCKET; k++)
                {
                    (*map_out)->buckets[i].vals[k] = -1;
                }
            }

            (*map_out)->buckets[i].next = NULL;
        }
    }

    // todo exceptions in allocation

    return 1;
}

static unsigned loop(unsigned k, unsigned capacity)
{
    unsigned g = k % capacity;

    unsigned res = (g + capacity) % capacity;

    return res;
}

int concurrent_map_get(struct Concurrent_Map *map, void *key, int *value_out)

{
    map_key_hash *khash = map->khash;
    unsigned hash = khash(key);

    unsigned start = loop(hash, map->capacity);
    unsigned bucket_index = loop(start + 0, map->capacity);

    if (bucket_index < map->capacity)
    {

        struct Bucket *bucket = &(map->buckets[bucket_index]);

        pthread_mutex_t mutex = bucket->mutex;

        pthread_mutex_lock(&mutex);

        int j;
        do
        {
            for (j = 0; j < ENTRIES_PER_BUCKET; j++)
            {
                int val = bucket->vals[j];
                if (map->keys_eq(bucket->keys[j], key))
                {
                    if (bucket->vals[j] == val)
                    {
                        *value_out = val;
                        return 1;
                    }
                    else
                    {
                        *value_out = -1;
                        return 0;
                    }
                }
            }
            if (bucket->next != NULL)
            {
                bucket = (bucket->next);
            }
            else
            {
                break;
                pthread_mutex_unlock(&mutex);
            }

            pthread_mutex_unlock(&mutex);

        } while (1);
    }
    *value_out = -1;
    return 0;
}

int concurrent_map_put(struct Concurrent_Map *map, void *key, int value)

{
    map_key_hash *khash = map->khash;
    unsigned hash = khash(key);

    unsigned start = loop(hash, map->capacity);
    unsigned bucket_index = loop(start + 0, map->capacity);

    struct Bucket *bucket = &(map->buckets[bucket_index]);

    int j;

    do
    {

        pthread_mutex_t mutex = bucket->mutex;

        int j;

        pthread_mutex_lock(&mutex);

        for (j = 0; j < ENTRIES_PER_BUCKET; j++)
        {
            if (map->keys_eq(bucket->keys[j], key))
            {
                pthread_mutex_unlock(&mutex);
                return 0;
            }
            else if (bucket->keys[j] == NULL)
            {
                bucket->vals[j] = value;
                bucket->keys[j] = key;
                pthread_mutex_unlock(&mutex);
                return 1;
            }
        }
        if (bucket->next == NULL)

        {
            // allocate a new bucket

            struct Bucket *new_bucket = malloc(sizeof(struct Bucket));

            if (pthread_mutex_init(&(new_bucket->mutex), NULL) == 0)
            {
                void **key_alloc = malloc(sizeof(void *) * (ENTRIES_PER_BUCKET));

                if (key_alloc != NULL)
                {
                    new_bucket->keys = key_alloc;

                    int k;
                    for (k = 0; k < ENTRIES_PER_BUCKET; k++)
                    {
                        new_bucket->keys[k] = NULL;
                    }
                }

                int *vals_alloc = malloc(sizeof(int) * (ENTRIES_PER_BUCKET));

                if (vals_alloc != NULL)
                {
                    new_bucket->vals = vals_alloc;

                    int k;
                    for (k = 0; k < ENTRIES_PER_BUCKET; k++)
                    {
                        new_bucket->vals[k] = -1;
                    }
                }

                bucket->next = new_bucket;
            }
        }

        pthread_mutex_unlock(&mutex);
        bucket = bucket->next;

    } while (1);

    return 0;
}

int concurrent_map_erase(struct Concurrent_Map *map, void *key, void **trash)

{

    map_key_hash *khash = map->khash;
    unsigned hash = khash(key);

    unsigned start = loop(hash, map->capacity);
    unsigned bucket_index = loop(start + 0, map->capacity);

    struct Bucket *bucket = &(map->buckets[bucket_index]);

    int j;

    do
    {

        pthread_mutex_t mutex = bucket->mutex;

        int j;

        pthread_mutex_lock(&mutex);

        for (j = 0; j < ENTRIES_PER_BUCKET; j++)
        {
            if (map->keys_eq(bucket->keys[j], key))
            {
                bucket->vals[j] = -1;
                bucket->keys[j] = NULL;
                pthread_mutex_unlock(&mutex);
                return 1;
            }
        }

        pthread_mutex_unlock(&mutex);
        if (bucket->next != NULL)
        {
            bucket = (bucket->next);
        }
        else
        {
            break;
        }

    } while (1);
    return 0;
}

int concurrent_map_size(struct Concurrent_Map *map)

{
    int num_buckets = 0;

    struct Bucket *buckets = map->buckets;
    unsigned i;

    for (i = 0; i < map->capacity; i++)
    {
        struct Bucket bucket = buckets[i];
        do
        {
            num_buckets++;
            if (bucket.next != NULL)
            {
                bucket = *(bucket.next);
            }
            else
            {
                break;
            }

        } while (1);
    }
    return num_buckets * ENTRIES_PER_BUCKET;
}
struct FlowId
{
    int src_port;
    int dst_port;
    int src_ip;
    int dst_ip;
    int internal_device;
    int protocol;
};

bool FlowId_eq(void *a, void *b)

{
    if (a == NULL || b == NULL)
    {
        return false;
    }
    struct FlowId *id1 = a;
    struct FlowId *id2 = b;

    return (id1->src_port == id2->src_port) && (id1->dst_port == id2->dst_port) && (id1->src_ip == id2->src_ip) && (id1->dst_ip == id2->dst_ip) && (id1->internal_device == id2->internal_device) && (id1->protocol == id2->protocol);
}

unsigned FlowId_hash(void *obj)

{
    struct FlowId *id = obj;
    unsigned hash = 0;
    hash = __builtin_ia32_crc32si(hash, id->src_port);
    hash = __builtin_ia32_crc32si(hash, id->dst_port);
    hash = __builtin_ia32_crc32si(hash, id->src_ip);
    hash = __builtin_ia32_crc32si(hash, id->dst_ip);
    hash = __builtin_ia32_crc32si(hash, id->internal_device);
    hash = __builtin_ia32_crc32si(hash, id->protocol);
    return hash;
}

struct Concurrent_Map *concurrent_map;

#define NUM_THREADS 2
#define NUM_PACKETS 10000000

void *expirator(void *arg)
{
    // printf("Thread started executing\n");
    unsigned i = 0;
    int error = 0;
    unsigned packet_count = NUM_PACKETS / NUM_THREADS;
    while (i < packet_count)
    {
        i++;
        struct FlowId *id = malloc(sizeof(struct FlowId));
        struct FlowId *id1 = malloc(sizeof(struct FlowId));
        id->dst_ip = 1;
        id->src_ip = 1;
        id->internal_device = 1;
        id->protocol = 1;
        id->src_port = 1;
        id->dst_port = rand() % 65536;

        id1->dst_ip = 1;
        id1->src_ip = 1;
        id1->internal_device = 1;
        id1->protocol = 1;
        id1->src_port = 1;
        id1->dst_port = rand() % 65536;

        int external_port = rand() % 65536;
        int external;

        concurrent_map_erase(concurrent_map, id, NULL);

        concurrent_map_put(concurrent_map, id, external_port);
        concurrent_map_get(concurrent_map, id, &external);

        if (external_port != external)
        {
            error++;
        }
        else
        {
        }
    }
    return NULL;
}

int main()
{

    clock_t begin = clock();

    concurrent_map_allocate(FlowId_eq, FlowId_hash, 65536, &(concurrent_map));

    pthread_t *threads = malloc(sizeof(pthread_t) * NUM_THREADS);
    int i;
    for (i = 0; i < NUM_THREADS; i++)
    {
        if (pthread_create(&threads[i], NULL, expirator, NULL) != 0)
        {
            printf("Error creating threads");
            exit(0);
        }
    }
    for (i = 0; i < NUM_THREADS; i++)
    {
        if (pthread_join(threads[i], NULL) != 0)
        {
            printf("Error joining threads");
            exit(0);
        }
    }
    clock_t end = clock();
    double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
    printf("%lf\n", time_spent);
    return 0;
}

下面是如何运行这个程序。

gcc  concurrent_map.c  -o test-concurrent-new -lpthread -msse4.2 -O3

然后，我测量固定工作负载的执行时间，下面是我观察到的时间值。

1: 3.29

2: 6.687811

3: 5.88

4: 6.23

5: 6.38

6: 6.52

7: 6.74

8: 6.82

看起来，当线程数量增加时，执行时间就会增加，并且几乎保持不变。

我使用Mutrace分析了这段代码，它查找互斥争用。结果证明

没有根据过滤参数竞争互斥。

我检查了缓存丢失的数量，结果发现当修改线程数时，缓存丢失的数量大致相同。

为什么当线程数增加时，执行时间不减少？

我正在一台32核心机器上运行这个

performance

pthreads

multithreading

回答 1

Stack Overflow用户

发布于 2019-11-11 07:47:32

rand()通常不适合多线程执行。相反，可以使用rand_r()。

还可以使用linux时间工具对应用程序进行计时。

您的工作负载生成带来了巨大的开销，我认为这是这里的瓶颈，而不是并发的散列图。

票数 0

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/58799910

复制

并发线程数、QPS与平均耗时的关系

apache

并发线程数：指的是施压机施加的同时请求的线程数量。比如，我启动并发线程数100，即我会在施压机器上面启动100个线程，不断地向服务器发请求。

Nanako

2021/02/03

9.5K3

CPU 核数与线程数有什么关系？

编程算法 java

实际上CPU和厨师一样，都是按照菜谱(机器指令)去执行某个动作，从操作系统的角度讲当CPU切换回用户态后，CPU执行的一段指令就是线程，或者说属于某个线程。

Java技术栈

2021/09/29

7K0

CPU 核数与线程数有什么关系？

编程算法

你没猜错，做菜之前先去下一份菜谱，照着菜谱一步步来：起锅烧油、葱姜蒜末下锅爆香、倒入切好的食材、大火翻炒、加入适量酱油、加入适量盐、继续翻炒、出锅喽！

JavaFish

2021/08/25

2.3K0

程序、进程、线程的关系

java

创建一个Java线程常见的有两种方式: 1.继承Thread类两种表示方法：（1）.外部类 import java.util.Date; public class Test1 { public static void main(String[] args) { //启动新线程来完成输出时间的操作 Mytime mt=new Mytime(); //启动新线程 Thread的start() mt.

汤高

2018/01/11

6260

executorservice 线程池_并发数与线程数

https 网络安全 java html

输出内容的最后是：线程池中线程数目：0，队列中等待执行的任务数目：0，已执行完的任务数目：15

全栈程序员站长

2022/09/30

8530

Java线程与Linux内核线程的映射关系

linux jvm java

Java线程与Linux内核线程的映射关系Linux从内核2.6开始使用NPTL （Native POSIX Thread Library）支持，但这时线程本质上还轻量级进程。

chengcheng222e

2021/11/04

2.2K0

synchronized 与多线程的哪些关系

编程算法数据分析 jvm jdk

JVM 实现的 synchronized JDK 实现的 ReentrantLock

BUG弄潮儿

2022/06/30

2640

Java中线程与堆栈的关系

java boolean byte char int

栈是线程私有的，每个线程都是自己的栈，每个线程中的每个方法在执行的同时会创建一个栈帧用于存局部变量表、操作数栈、动态链接、方法返回地址等信息。每一个方法从调用到执行完毕的过程，就对应着一个栈帧在虚拟机栈中从入栈到出栈的过程。其中局部变量表，存放基本类型（boolean、byte、char、short、int、float）、对象的引用等等，对象的引用不是对象实例本身，而是指向对象实例的一个指针。

万猫学社

2022/04/22

7130

进程、线程、应用程序之间的关系

编程算法

进程是指在系统中正在运行的一个应用程序；线程是系统分配处理器时间资源的基本单元，或者说进程之内独立执行的一个单元。对于操作系统而言，其调度单元是线程。一个进程至少包括一个线程，通常将该线程称为主线程。一个进程从主线程的执行开始进而创建一个或多个附加线程，就是所谓基于多线程的多任务。那进程与线程的区别到底是什么？进程是执行程序的实例。例如，当你运行记事本程序（Nodepad）时，你就创建了一个用来容纳组成 Notepad.exe的代码及其所需调用动态链接库的进程。每个进程均运行在其专用且受保护

猿人谷

2018/01/17

1.5K0

CPU核数和线程（池）数量的关系（概念理解）

编程算法

目前手机配置：支持HUAWEI Mate 8非凡表现的，是拥有强大性能的华为麒麟950芯片。此芯片为八核4*Cortex A72 2.3GHz + 4*Cortex A5

程序员小王

2018/04/12

5.4K0

python程序执行时间_用于在Python中查找程序执行时间的程序

linux python 编程算法

The execution time of a program is defined as the time spent by the system to execute the task. As we all know any program takes some execution time but we don't know how much. So, don't worry, in this tutorial we will learn it by using the datetime module and also we will see the execution time for finding the factorial of a large number. A large number will be provided by the user and we have to calculate the factorial of a number, also we have to find the execution time of the factorial program. Before going to write the Python program, we will try to understand the algorithm.

用户7886150

2021/01/28

2K0

共生与共享：线程与进程的关系

并发并发编程进程通信线程

在计算机科学和操作系统领域，线程（Thread）和进程（Process）是两个关键概念。它们之间存在密切的关系，但又有着明显的区别。本文将深入探讨线程和进程之间的关系，以及它们在并发编程和资源管理中的作用。

关忆北.

2023/10/19

2090

linux服务器CPU物理颗数.内核数.线程数查看及关系详解

linux grep

公司服务器是分几批购买的，所以造成配置方面也不大相同特别是cpu配置方面，一直想弄清楚这些cpu都是什么型号，有几颗物理cpu，每颗cpu有几个核心，没个核心有几个线程。看起来很繁琐，下面一起彻底分分析下。大致的看了下公司服务器的型号，这个很容易获取使用命令more /proc/cpuinfo |grep “model name” 或者dmidecode -s processor-version都可以得到这里我主要有两种类型的cpu 一种是Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz，另一种是Intel(R) Xeon(R) CPU E5620 @ 2.40GHz 下面一起来看下两种类型cpu都有什么不同。使用命令分别获取cpu的物理颗数内核数线程数这里要说明一下 CPU的核心数是指物理上，也就是硬件上存在着几颗物理cpu,指的是真实存在是cpu处理器的个数，1个代表一颗2个代表2颗cpu处理器。核心数：一个核心就是一个物理线程,英特尔有个超线程技术可以把一个物理线程模拟出两个线程来用,充分发挥CPU性能，意思是一个核心可以有多个线程。线程数：线程数是一种逻辑的概念，简单地说，就是模拟出的CPU核心数。比如，可以通过一个CPU核心数模拟出2线程的CPU，也就是说，这个单核心的CPU被模拟成了一个类似双核心CPU的功能。

zhangdd

2018/08/01

4.7K0

信息学与数学、奥数的关系

编程算法数据结构

反过来说，信息学对数学帮助也很大，信息学和算法是相辅相成的。因为算法就是计算方法。实现算法的过程，就是用某种编程语言来实现计算方法并求出结果的过程。算法训练必然会促进数学的进步。

海天一树

2019/05/14

1.3K0

【森林结点数，边数与树个数的关系】

看一个例子：若森林F有15条边、25个结点，则F包含树的个数是：____(2分)。答案是10。举完例子了，下面开始分析：

_DIY

2019/10/16

2.4K0

完成端口与线程池的关系_端口触发

编程算法

关于IOCP网上到处都是资料，说的也很详细。我在这里就不再多说了，这只是本人在学习IOCP时的笔记，和配合AcceptEx写的一个极小的服务端程序。由于刚刚接触ICOP加上本人刚毕业不到一年，所以里面的理解或观点可能有误，还请大家多多批评！

全栈程序员站长

2022/11/11

9420

详解tomcat的连接数与线程池

socket编程自动驾驶 http tomcat

在使用tomcat时，经常会遇到连接数、线程数之类的配置问题，要真正理解这些概念，必须先了解Tomcat的连接器（Connector）。

小柒2012

2019/12/09

1.2K0

详解 Tomcat 的连接数与线程池

tomcat xml servlet 容器

前言在使用tomcat时，经常会遇到连接数、线程数之类的配置问题，要真正理解这些概念，必须先了解Tomcat的连接器（Connector）。在前面的文章详解Tomcat配置文件server.xml 中写到过：Connector的主要功能，是接收连接请求，创建Request和Response对象用于和请求端交换数据；然后分配线程让Engine（也就是Servlet容器）来处理这个请求，并把产生的Request和Response对象传给Engine。当Engine处理完请求后，也会通过Connector将

Java高级架构

2018/04/19

3.8K0

Kafka的分区数与多线程消费探讨

kafka 编程算法

这是典型的kafka消费端消费数据的代码，但可以看出这是十分典型的单线程消费。不能直接用在生产实践中。

王知无-import_bigdata

2019/12/25

8540

探讨kafka的分区数与多线程消费

编程算法 kafka

kafka算是很麻烦的一件事儿，起因是最近需要采集大量的数据，原先是只用了典型的high-level Consumer的API，最经典的不过如下：

九州暮云

2019/08/21

2.8K0

相似问题

增加线程数会增加执行时间。

基于线程数与执行时间关系的OpenMP并行编程

CPU使用与线程数的关系

增加xcode上的线程数会增加程序时间。

增加线程数

添加站长进交流群

领取专属 10元无门槛券

AI混元助手 在线答疑

关注 腾讯云开发者公众号

洞察 腾讯核心技术

剖析业界实践案例

问增加程序执行时间与线程数的关系
EN