Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'

[linux.git] / net / netfilter / nf_conntrack_core.c
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 0f87e5d21be7161f6d885fd3ab40f1e752e68a66..4e8083c5e01d1ec631258af169c18aceed101e3a 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -85,11 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
  static __read_mostly bool nf_conntrack_locks_all;
  
  /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV     64u
-/* upper bound of scan intervals */
-#define GC_INTERVAL_MAX                (2 * HZ)
-/* maximum conntracks to evict per gc run */
-#define GC_MAX_EVICTS          256u
+#define GC_MAX_BUCKETS_DIV     128u
+/* upper bound of full table scan */
+#define GC_MAX_SCAN_JIFFIES    (16u * HZ)
+/* desired ratio of entries found to be expired */
+#define GC_EVICT_RATIO 50u
  
  static struct conntrack_gc_work conntrack_gc_work;
  
@@ -783,7 +783,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         /* set conntrack timestamp, if enabled. */
         tstamp = nf_conn_tstamp_find(ct);
         if (tstamp) {
-               if (skb->tstamp.tv64 == 0)
+               if (skb->tstamp == 0)
                         __net_timestamp(skb);
  
                 tstamp->start = ktime_to_ns(skb->tstamp);
@@ -938,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
  
  static void gc_worker(struct work_struct *work)
  {
+       unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
         unsigned int i, goal, buckets = 0, expired_count = 0;
         struct conntrack_gc_work *gc_work;
         unsigned int ratio, scanned = 0;
@@ -979,8 +980,7 @@ static void gc_worker(struct work_struct *work)
                  */
                 rcu_read_unlock();
                 cond_resched_rcu_qs();
-       } while (++buckets < goal &&
-                expired_count < GC_MAX_EVICTS);
+       } while (++buckets < goal);
  
         if (gc_work->exiting)
                 return;
@@ -997,27 +997,25 @@ static void gc_worker(struct work_struct *work)
          * 1. Minimize time until we notice a stale entry
          * 2. Maximize scan intervals to not waste cycles
          *
-        * Normally, expired_count will be 0, this increases the next_run time
-        * to priorize 2) above.
+        * Normally, expire ratio will be close to 0.
          *
-        * As soon as a timed-out entry is found, move towards 1) and increase
-        * the scan frequency.
-        * In case we have lots of evictions next scan is done immediately.
+        * As soon as a sizeable fraction of the entries have expired
+        * increase scan frequency.
          */
         ratio = scanned ? expired_count * 100 / scanned : 0;
-       if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
-               gc_work->next_gc_run = 0;
-               next_run = 0;
-       } else if (expired_count) {
-               gc_work->next_gc_run /= 2U;
-               next_run = msecs_to_jiffies(1);
+       if (ratio > GC_EVICT_RATIO) {
+               gc_work->next_gc_run = min_interval;
         } else {
-               if (gc_work->next_gc_run < GC_INTERVAL_MAX)
-                       gc_work->next_gc_run += msecs_to_jiffies(1);
+               unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
  
-               next_run = gc_work->next_gc_run;
+               BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
+
+               gc_work->next_gc_run += min_interval;
+               if (gc_work->next_gc_run > max)
+                       gc_work->next_gc_run = max;
         }
  
+       next_run = gc_work->next_gc_run;
         gc_work->last_bucket = i;
         queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
  }
@@ -1025,7 +1023,7 @@ static void gc_worker(struct work_struct *work)
  static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
  {
         INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
-       gc_work->next_gc_run = GC_INTERVAL_MAX;
+       gc_work->next_gc_run = HZ;
         gc_work->exiting = false;
  }
  
@@ -1338,7 +1336,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                 if (skb->nfct)
                         goto out;
         }
-
+repeat:
         ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
                                l3proto, l4proto, &set_reply, &ctinfo);
         if (!ct) {
@@ -1370,6 +1368,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                 NF_CT_STAT_INC_ATOMIC(net, invalid);
                 if (ret == -NF_DROP)
                         NF_CT_STAT_INC_ATOMIC(net, drop);
+               /* Special case: TCP tracker reports an attempt to reopen a
+                * closed/aborted connection. We have to go back and create a
+                * fresh conntrack.
+                */
+               if (ret == -NF_REPEAT)
+                       goto repeat;
                 ret = -ret;
                 goto out;
         }
@@ -1377,15 +1381,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
         if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
                 nf_conntrack_event_cache(IPCT_REPLY, ct);
  out:
-       if (tmpl) {
-               /* Special case: we have to repeat this hook, assign the
-                * template again to this packet. We assume that this packet
-                * has no conntrack assigned. This is used by nf_ct_tcp. */
-               if (ret == NF_REPEAT)
-                       skb->nfct = (struct nf_conntrack *)tmpl;
-               else
-                       nf_ct_put(tmpl);
-       }
+       if (tmpl)
+               nf_ct_put(tmpl);
  
         return ret;
  }
@@ -1918,7 +1915,7 @@ int nf_conntrack_init_start(void)
         nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
  
         conntrack_gc_work_init(&conntrack_gc_work);
-       queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
+       queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
  
         return 0;