加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 百科 > 正文

c# – 在这种情况下,为什么使用AsParallel()比foreach慢?

发布时间:2020-12-16 00:00:09 所属栏目:百科 来源:网络整理
导读:我正在从这种格式的excel中提取数据 product1 | unnamedcol2 | product2 | unnamedcol4 | product3 | unnamedcol6 |------------------------------------------------------------------------------- @1foo | 1.10 | @1foo | 0.3 | @1foo | 0.3 @2foo | 1.
我正在从这种格式的excel中提取数据

 product1   | unnamedcol2 | product2  | unnamedcol4 | product3  | unnamedcol6 |
-------------------------------------------------------------------------------
 @1foo      |        1.10 | @1foo     |         0.3 | @1foo     |         0.3
 @2foo      |        1.00 | @2foo     |           2 | @2foo     |
 @3foo      |        1.52 | @3foo     |        2.53 | @3foo     |
 @4foo      |        1.47 |           |             | @4foo     |        1.31
 @5foo      |        1.49 |           |             | @5foo     |        1.31

该文件使用所有255个字段.使用dapper-dot-net我通过此代码获取数据

IEnumerable<IDictionary<string,object>> excelDataRaw =
                conn.Query(string.Format("select * from {0}",table)).Cast<IDictionary<string,object>>();

我将这些数据传递给这些测试方法.数据作为IDnumerable IDicaries返回,其中每个键都是一个产品,每个值都是一个IDictionary,其中每个键是product列中的值,相应的值是unnamedcol中产品列右侧的值.

var excelDataRefined = new List<IDictionary<string,IDictionary<string,decimal>>>();
excelDataRefined.Add(new Dictionary<string,decimal>>());
excelDataRefined[0].Add( "product",new Dictionary<string,decimal>());
excelDataRefined[0]["product"].Add("@1foo",1.1m);

方法:

private static Dictionary<string,decimal>> Benchmark_foreach(IEnumerable<IDictionary<string,object>> excelDataRaw)
{
    Console.WriteLine("1. Using foreach");
    var watch = new Stopwatch();
    watch.Start();

    List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
    bool isEven = false;
    List<string> products = headers.Where(h => isEven = !isEven).ToList();
    var dates = new List<IEnumerable<object>>();
    var prices = new List<IEnumerable<object>>();

    foreach (string field in headers)
    {
        string product1 = field;
        if (headers.IndexOf(field) % 2 == 0)
        {
            dates.Add(
                excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1]).Where(row => row != null));
        }

        if (headers.IndexOf(field) % 2 == 1)
        {
            prices.Add(
                excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1] ?? 0m).Take(dates.Last().Count()));
        }
    }

    watch.Stop();
    Console.WriteLine("Rearange the data in: {0}s",watch.Elapsed.TotalSeconds);
    watch.Restart();

    var excelDataRefined = new Dictionary<string,decimal>>();
    foreach (IEnumerable<object> datelist in dates)
    {
        decimal num;
        IEnumerable<object> datelist1 = datelist;
        IEnumerable<object> pricelist =
            prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
                content => decimal.TryParse(content.ToString(),out num));
        Dictionary<string,decimal> dict =
            datelist1.Zip(pricelist,(k,v) => new { k,v }).ToDictionary(
                x => (string)x.k,x => decimal.Parse(x.v.ToString()));

        if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
        {
            excelDataRefined.Add(products[dates.IndexOf(datelist1)],dict);
        }
    }

    watch.Stop();
    Console.WriteLine("Zipped the data in: {0}s",watch.Elapsed.TotalSeconds);

    return excelDataRefined;
}

private static Dictionary<string,decimal>> Benchmark_AsParallel(IEnumerable<IDictionary<string,object>> excelDataRaw)
{
    Console.WriteLine("2. Using AsParallel().AsOrdered().ForAll");
    var watch = new Stopwatch();
    watch.Start();

    List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
    bool isEven = false;
    List<string> products = headers.Where(h => isEven = !isEven).ToList();
    var dates = new List<IEnumerable<object>>();
    var prices = new List<IEnumerable<object>>();

    headers.AsParallel().AsOrdered().ForAll(
        field =>
        dates.Add(
            excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(
                col => col[field]).Where(row => row != null).ToList()));
    headers.AsParallel().AsOrdered().ForAll(
        field =>
        prices.Add(
            excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(
                col => col[field] ?? 0m).Take(256).ToList()));
    dates.RemoveAll(x => x.Count() == 0);
    prices.RemoveAll(x => x.Count() == 0);

    watch.Stop();
    Console.WriteLine("Rearange the data in: {0}s",decimal>> Benchmark_ForEach(IEnumerable<IDictionary<string,object>> excelDataRaw)
{
    Console.WriteLine("3. Using ForEach");
    var watch = new Stopwatch();
    watch.Start();

    List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
    bool isEven = false;
    List<string> products = headers.Where(h => isEven = !isEven).ToList();
    var dates = new List<IEnumerable<object>>();
    var prices = new List<IEnumerable<object>>();

    headers.ForEach(
        field =>
        dates.Add(
            excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(col => col[field]).Where(
                row => row != null).ToList()));
    headers.ForEach(
        field =>
        prices.Add(
            excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(col => col[field] ?? 0m).
            Take(256).ToList()));
    dates.RemoveAll(x => x.Count() == 0);
    prices.RemoveAll(x => x.Count() == 0);

    watch.Stop();
    Console.WriteLine("Rearange the data in: {0}s",watch.Elapsed.TotalSeconds);

    return excelDataRefined;
}

> Benchmark_foreach需要app. 3,5s重新排列,3s压缩数据.
> Benchmark_AsParallel需要app. 12s重新排列和0,005s拉链数据.
> Benchmark_ForEach需要app. 16s重新排列和0,005s拉链数据.

为什么它会像这样?我期望AsParallel最快,因为它并行执行而不是顺序执行.何我优化这个?

解决方法

为了实现并行计算,您必须拥有多个处理器或内核,否则您只是在线程池中排队等待CPU的任务.即单核心机器上的AsParallel是顺序加上线程池和线程上下文切换的开销.即使在两核机器上,也可能无法获得两个内核,因为许多其他内容在同一台机器上运行.

实际上.AsParallel()只有在具有阻塞操作(I / O)的长时间运行任务时才会变得有用,其中操作系统可以暂停阻塞线程并让另一个运行.

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读