Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'

[linux.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index b3bfe3fb4e1361474d6852896fefeb0e84236221..274c747a01ce4862307f4a97286db68e6a753824 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2657,6 +2657,18 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
         if (tg_weight)
                 shares /= tg_weight;
  
+       /*
+        * MIN_SHARES has to be unscaled here to support per-CPU partitioning
+        * of a group with small tg->shares value. It is a floor value which is
+        * assigned as a minimum load.weight to the sched_entity representing
+        * the group on a CPU.
+        *
+        * E.g. on 64-bit for a group with tg->shares of scale_load(15)=15*1024
+        * on an 8-core system with 8 tasks each runnable on one CPU shares has
+        * to be 15*1024*1/8=1920 instead of scale_load(MIN_SHARES)=2*1024. In
+        * case no task is runnable on a CPU MIN_SHARES=2 should be returned
+        * instead of 0.
+        */
         if (shares < MIN_SHARES)
                 shares = MIN_SHARES;
         if (shares > tg->shares)
@@ -2689,16 +2701,20 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
  
  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
  
-static void update_cfs_shares(struct cfs_rq *cfs_rq)
+static void update_cfs_shares(struct sched_entity *se)
  {
+       struct cfs_rq *cfs_rq = group_cfs_rq(se);
         struct task_group *tg;
-       struct sched_entity *se;
         long shares;
  
-       tg = cfs_rq->tg;
-       se = tg->se[cpu_of(rq_of(cfs_rq))];
-       if (!se || throttled_hierarchy(cfs_rq))
+       if (!cfs_rq)
+               return;
+
+       if (throttled_hierarchy(cfs_rq))
                 return;
+
+       tg = cfs_rq->tg;
+
  #ifndef CONFIG_SMP
         if (likely(se->load.weight == tg->shares))
                 return;
@@ -2707,8 +2723,9 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
  
         reweight_entity(cfs_rq_of(se), se, shares);
  }
+
  #else /* CONFIG_FAIR_GROUP_SCHED */
-static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
+static inline void update_cfs_shares(struct sched_entity *se)
  {
  }
  #endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -3582,10 +3599,18 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         if (renorm && !curr)
                 se->vruntime += cfs_rq->min_vruntime;
  
+       /*
+        * When enqueuing a sched_entity, we must:
+        *   - Update loads to have both entity and cfs_rq synced with now.
+        *   - Add its load to cfs_rq->runnable_avg
+        *   - For group_entity, update its weight to reflect the new share of
+        *     its group cfs_rq
+        *   - Add its new weight to cfs_rq->load.weight
+        */
         update_load_avg(se, UPDATE_TG);
         enqueue_entity_load_avg(cfs_rq, se);
+       update_cfs_shares(se);
         account_entity_enqueue(cfs_rq, se);
-       update_cfs_shares(cfs_rq);
  
         if (flags & ENQUEUE_WAKEUP)
                 place_entity(cfs_rq, se, 0);
@@ -3657,6 +3682,15 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
          * Update run-time statistics of the 'current'.
          */
         update_curr(cfs_rq);
+
+       /*
+        * When dequeuing a sched_entity, we must:
+        *   - Update loads to have both entity and cfs_rq synced with now.
+        *   - Substract its load from the cfs_rq->runnable_avg.
+        *   - Substract its previous weight from cfs_rq->load.weight.
+        *   - For group entity, update its weight to reflect the new share
+        *     of its group cfs_rq.
+        */
         update_load_avg(se, UPDATE_TG);
         dequeue_entity_load_avg(cfs_rq, se);
  
@@ -3681,7 +3715,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         /* return excess runtime on last dequeue */
         return_cfs_rq_runtime(cfs_rq);
  
-       update_cfs_shares(cfs_rq);
+       update_cfs_shares(se);
  
         /*
          * Now advance min_vruntime if @se was the entity holding it back,
@@ -3864,7 +3898,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
          * Ensure that runnable average is periodically updated.
          */
         update_load_avg(curr, UPDATE_TG);
-       update_cfs_shares(cfs_rq);
+       update_cfs_shares(curr);
  
  #ifdef CONFIG_SCHED_HRTICK
         /*
@@ -4761,7 +4795,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                         break;
  
                 update_load_avg(se, UPDATE_TG);
-               update_cfs_shares(cfs_rq);
+               update_cfs_shares(se);
         }
  
         if (!se)
@@ -4820,7 +4854,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                         break;
  
                 update_load_avg(se, UPDATE_TG);
-               update_cfs_shares(cfs_rq);
+               update_cfs_shares(se);
         }
  
         if (!se)
@@ -9362,8 +9396,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
  
                 /* Possible calls to update_curr() need rq clock */
                 update_rq_clock(rq);
-               for_each_sched_entity(se)
-                       update_cfs_shares(group_cfs_rq(se));
+               for_each_sched_entity(se) {
+                       update_load_avg(se, UPDATE_TG);
+                       update_cfs_shares(se);
+               }
                 raw_spin_unlock_irqrestore(&rq->lock, flags);
         }