VM traffic is already limited by a throughput limit, but there is no control over the maximum packet per second (PPS). In DOS attack the major issue is rather PPS than throughput. With provider offering more bandwidth to VMs, it becames easy to coordinate a massive attack using VMs. Example: 100Mbits ~ 200kpps using 64B packets. This patch provides a new option to limit VMs maximum packets per second emission rate. It follows the same credits logic used for throughput shaping. For the moment we have considered each "txreq" as a packet. PPS limits is passed to VIF at connection time via xenstore. PPS credit uses the same usecond period used by rate shaping check. known limitations: - by using the same usecond period, PPS shaping depends on throughput shaping. - it is not always true that a "txreq" correspond to a paquet (fragmentation cases) but as this shaping is meant to avoid DOS (small paquets) such an pproximation should not impact the results. - Some help on burst handling will be appreciated. Signed-off-by: Ahmed Amamou <ahmed@gandi.net> Signed-off-by: William Dauchy <william@gandi.net> Signed-off-by: Kamel Haddadou <kamel@gandi.net> --- drivers/net/xen-netback/common.h | 2 ++ drivers/net/xen-netback/interface.c | 1 + drivers/net/xen-netback/netback.c | 46 +++++++++++++++++++++++++++++++++++ drivers/net/xen-netback/xenbus.c | 23 +++++++++++++++--- 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 9d7f172..fefa79a 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -85,8 +85,10 @@ struct xenvif { /* Transmit shaping: allow ''credit_bytes'' every ''credit_usec''. */ unsigned long credit_bytes; + unsigned long credit_packets; unsigned long credit_usec; unsigned long remaining_credit; + unsigned long remaining_packets; struct timer_list credit_timeout; /* Statistics */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index d984141..06257dd 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -273,6 +273,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, INIT_LIST_HEAD(&vif->notify_list); vif->credit_bytes = vif->remaining_credit = ~0UL; + vif->credit_packets = vif->remaining_packets = ~0UL; vif->credit_usec = 0UL; init_timer(&vif->credit_timeout); /* Initialize ''expires'' now: it''s used to track the credit window. */ diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 8c20935..097a390 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -905,10 +905,16 @@ static void tx_add_credit(struct xenvif *vif) vif->remaining_credit = min(max_credit, max_burst); } +static void tx_add_packets(struct xenvif *vif) +{ + vif->remaining_packets = vif->credit_packets; +} + static void tx_credit_callback(unsigned long data) { struct xenvif *vif = (struct xenvif *)data; tx_add_credit(vif); + tx_add_packets(vif); xen_netbk_check_rx_xenvif(vif); } @@ -1419,6 +1425,38 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) return false; } +static bool tx_packets_exceeded(struct xenvif *vif) +{ + unsigned long now = jiffies; + unsigned long next_credit + vif->credit_timeout.expires + + msecs_to_jiffies(vif->credit_usec / 1000); + + /* Timer could already be pending in rare cases. */ + if (timer_pending(&vif->credit_timeout)) + return true; + + /* Passed the point where we can replenish credit? */ + if (time_after_eq(now, next_credit)) { + vif->credit_timeout.expires = now; + tx_add_packets(vif); + } + + /* Not enough slot to send right now? Set a callback. */ + if (vif->remaining_packets < 1) { + vif->credit_timeout.data + (unsigned long)vif; + vif->credit_timeout.function + tx_credit_callback; + mod_timer(&vif->credit_timeout, + next_credit); + + return true; + } + + return false; +} + static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) { struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; @@ -1470,6 +1508,13 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) rmb(); /* Ensure that we see the request before we copy it. */ memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); + /* pps-based scheduling. */ + if(vif->remaining_packets < 1 && + tx_packets_exceeded(vif)) { + xenvif_put(vif); + continue; + } + /* Credit-based scheduling. */ if (txreq.size > vif->remaining_credit && tx_credit_exceeded(vif, txreq.size)) { @@ -1478,6 +1523,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) } vif->remaining_credit -= txreq.size; + vif->remaining_packets--; work_to_do--; vif->tx.req_cons = ++idx; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 410018c..91cff4f 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -267,15 +267,16 @@ static void frontend_changed(struct xenbus_device *dev, static void xen_net_read_rate(struct xenbus_device *dev, - unsigned long *bytes, unsigned long *usec) + unsigned long *bytes, unsigned long *packet, unsigned long *usec) { char *s, *e; - unsigned long b, u; - char *ratestr; + unsigned long b, u, pps; + char *ratestr, *ppsstr; /* Default to unlimited bandwidth. */ *bytes = ~0UL; *usec = 0; + *packet = ~0UL; ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL); if (IS_ERR(ratestr)) @@ -295,11 +296,24 @@ static void xen_net_read_rate(struct xenbus_device *dev, *usec = u; kfree(ratestr); + ppsstr = xenbus_read(XBT_NIL, dev->nodename, "pps", NULL); + if (IS_ERR(ppsstr)) + return; + s = ppsstr; + pps = simple_strtoul(s, &e, 10); + if ((s == e) || (*e != ''\0'')) + goto fail2; + *packet = pps; + kfree(ppsstr); return; fail: pr_warn("Failed to parse network rate limit. Traffic unlimited.\n"); kfree(ratestr); + return; +fail2: + pr_warn("Failed to parse network pps limit. pps unlimited.\n"); + kfree(ppsstr); } static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) @@ -370,8 +384,9 @@ static void connect(struct backend_info *be) } xen_net_read_rate(dev, &be->vif->credit_bytes, - &be->vif->credit_usec); + &be->vif->credit_packets,&be->vif->credit_usec); be->vif->remaining_credit = be->vif->credit_bytes; + be->vif->remaining_packets = be->vif->credit_packets; unregister_hotplug_status_watch(be); err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, -- 1.7.9.5
Hi, First question, can we achieve the same effect by using existing facility in backend domain, say, tcng in Linux? On Fri, Jun 21, 2013 at 03:43:53PM +0200, William Dauchy wrote:> VM traffic is already limited by a throughput limit, but there is no > control over the maximum packet per second (PPS). > In DOS attack the major issue is rather PPS than throughput. > With provider offering more bandwidth to VMs, it becames easy to > coordinate a massive attack using VMs. Example: 100Mbits ~ 200kpps using > 64B packets. > This patch provides a new option to limit VMs maximum packets per second > emission rate. > It follows the same credits logic used for throughput shaping. For the > moment we have considered each "txreq" as a packet. > PPS limits is passed to VIF at connection time via xenstore. > PPS credit uses the same usecond period used by rate shaping check. > > known limitations: > - by using the same usecond period, PPS shaping depends on throughput > shaping. > - it is not always true that a "txreq" correspond to a paquet > (fragmentation cases) but as this shaping is meant to avoid DOS > (small paquets) such an pproximation should not impact the results.IMHO the txreq in xen_netbk_tx_build_gops marks the starting of a packet, so it is OK to do accounting like that.> - Some help on burst handling will be appreciated. > > Signed-off-by: Ahmed Amamou <ahmed@gandi.net> > Signed-off-by: William Dauchy <william@gandi.net> > Signed-off-by: Kamel Haddadou <kamel@gandi.net>It would be better if you can come up with patch for toolstack as well -- the rate parameter is parsed in libxl (see $XEN/tools/libxl/libxlu_vif.c) -- so that users of this parameter can specify it in their VM config file. But that''s another topic and deserve another patch.> --- > drivers/net/xen-netback/common.h | 2 ++ > drivers/net/xen-netback/interface.c | 1 + > drivers/net/xen-netback/netback.c | 46 +++++++++++++++++++++++++++++++++++ > drivers/net/xen-netback/xenbus.c | 23 +++++++++++++++--- > 4 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h > index 9d7f172..fefa79a 100644 > --- a/drivers/net/xen-netback/common.h > +++ b/drivers/net/xen-netback/common.h > @@ -85,8 +85,10 @@ struct xenvif { > > /* Transmit shaping: allow ''credit_bytes'' every ''credit_usec''. */ > unsigned long credit_bytes; > + unsigned long credit_packets; > unsigned long credit_usec; > unsigned long remaining_credit; > + unsigned long remaining_packets; > struct timer_list credit_timeout; > > /* Statistics */ > diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c > index d984141..06257dd 100644 > --- a/drivers/net/xen-netback/interface.c > +++ b/drivers/net/xen-netback/interface.c > @@ -273,6 +273,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, > INIT_LIST_HEAD(&vif->notify_list); > > vif->credit_bytes = vif->remaining_credit = ~0UL; > + vif->credit_packets = vif->remaining_packets = ~0UL; > vif->credit_usec = 0UL; > init_timer(&vif->credit_timeout); > /* Initialize ''expires'' now: it''s used to track the credit window. */ > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > index 8c20935..097a390 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -905,10 +905,16 @@ static void tx_add_credit(struct xenvif *vif) > vif->remaining_credit = min(max_credit, max_burst); > } > > +static void tx_add_packets(struct xenvif *vif) > +{ > + vif->remaining_packets = vif->credit_packets;Hmm, the replenishing looks simple. I''m not sure if it''s over simplified. Could you look at the ring and determine the possible burst value? (note that in the ring txreq != packet). Just my two cents.> +} > + > static void tx_credit_callback(unsigned long data) > { > struct xenvif *vif = (struct xenvif *)data; > tx_add_credit(vif); > + tx_add_packets(vif); > xen_netbk_check_rx_xenvif(vif); > } > > @@ -1419,6 +1425,38 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) > return false; > } > > +static bool tx_packets_exceeded(struct xenvif *vif) > +{ > + unsigned long now = jiffies; > + unsigned long next_credit > + vif->credit_timeout.expires + > + msecs_to_jiffies(vif->credit_usec / 1000); > + > + /* Timer could already be pending in rare cases. */ > + if (timer_pending(&vif->credit_timeout)) > + return true; > + > + /* Passed the point where we can replenish credit? */ > + if (time_after_eq(now, next_credit)) { > + vif->credit_timeout.expires = now; > + tx_add_packets(vif); > + } > + > + /* Not enough slot to send right now? Set a callback. */ > + if (vif->remaining_packets < 1) { > + vif->credit_timeout.data > + (unsigned long)vif; > + vif->credit_timeout.function > + tx_credit_callback; > + mod_timer(&vif->credit_timeout, > + next_credit); > + > + return true; > + } > + > + return false; > +} > + > static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) > { > struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; > @@ -1470,6 +1508,13 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) > rmb(); /* Ensure that we see the request before we copy it. */ > memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); > > + /* pps-based scheduling. */ > + if(vif->remaining_packets < 1 && > + tx_packets_exceeded(vif)) { > + xenvif_put(vif); > + continue; > + } > + > /* Credit-based scheduling. */ > if (txreq.size > vif->remaining_credit && > tx_credit_exceeded(vif, txreq.size)) { > @@ -1478,6 +1523,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) > } > > vif->remaining_credit -= txreq.size; > + vif->remaining_packets--; > > work_to_do--; > vif->tx.req_cons = ++idx; > diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c > index 410018c..91cff4f 100644 > --- a/drivers/net/xen-netback/xenbus.c > +++ b/drivers/net/xen-netback/xenbus.c > @@ -267,15 +267,16 @@ static void frontend_changed(struct xenbus_device *dev, > > > static void xen_net_read_rate(struct xenbus_device *dev, > - unsigned long *bytes, unsigned long *usec) > + unsigned long *bytes, unsigned long *packet, unsigned long *usec)Line longer than 80 characters.> { > char *s, *e; > - unsigned long b, u; > - char *ratestr; > + unsigned long b, u, pps; > + char *ratestr, *ppsstr; > > /* Default to unlimited bandwidth. */ > *bytes = ~0UL; > *usec = 0; > + *packet = ~0UL; > > ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL); > if (IS_ERR(ratestr)) > @@ -295,11 +296,24 @@ static void xen_net_read_rate(struct xenbus_device *dev, > *usec = u; > > kfree(ratestr); > + ppsstr = xenbus_read(XBT_NIL, dev->nodename, "pps", NULL); > + if (IS_ERR(ppsstr)) > + return; > + s = ppsstr; > + pps = simple_strtoul(s, &e, 10); > + if ((s == e) || (*e != ''\0'')) > + goto fail2; > + *packet = pps; > + kfree(ppsstr); > return; >If you fail to parse "rate" you skip parsing "pps", is this intentional?> fail: > pr_warn("Failed to parse network rate limit. Traffic unlimited.\n"); > kfree(ratestr); > + return; > +fail2: > + pr_warn("Failed to parse network pps limit. pps unlimited.\n");Use "PPS" (upper case) would be better.> + kfree(ppsstr); > } > > static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) > @@ -370,8 +384,9 @@ static void connect(struct backend_info *be) > } > > xen_net_read_rate(dev, &be->vif->credit_bytes, > - &be->vif->credit_usec); > + &be->vif->credit_packets,&be->vif->credit_usec);Need space after comma.> be->vif->remaining_credit = be->vif->credit_bytes; > + be->vif->remaining_packets = be->vif->credit_packets; > > unregister_hotplug_status_watch(be); > err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, > -- > 1.7.9.5 > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Hi Wei, Thanks for your reply. On Jun24 11:14, Wei Liu wrote:> First question, can we achieve the same effect by using existing > facility in backend domain, say, tcng in Linux?yes indeed, but it looks like way more complicated to configure; we thought it was a good option to manage it with the bandwitdh. Also, by using tcng, PPS shaping is done on backend level, once packet has left the VM; which means after using an additional memory transaction to copy packet from frontend. IMHO, at scale, shaping in this way should save some memory transactions comparing to tcng.> It would be better if you can come up with patch for toolstack as well > -- the rate parameter is parsed in libxl (see > $XEN/tools/libxl/libxlu_vif.c) -- so that users of this parameter can > specify it in their VM config file. But that''s another topic and deserve > another patch.indeed; for now we do have a userland patch but using xend which is out of date; that''s why we didn''t send it along. Are you interested by it anyway?> Hmm, the replenishing looks simple. I''m not sure if it''s over > simplified. Could you look at the ring and determine the possible burst > value? (note that in the ring txreq != packet). Just my two cents.we didn''t had a look for now since we were eventually looking for some help (and time) on this subject.> If you fail to parse "rate" you skip parsing "pps", is this intentional?yes; PPS is linked to rate since they are checked within the same perdiod. "by using the same usecond period, PPS shaping depends on throughput shaping." Will resend with the small typo you picked. -- William _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On Mon, Jun 24, 2013 at 03:42:35PM +0200, William Dauchy wrote:> Hi Wei, > > Thanks for your reply. > > On Jun24 11:14, Wei Liu wrote: > > First question, can we achieve the same effect by using existing > > facility in backend domain, say, tcng in Linux? > > yes indeed, but it looks like way more complicated to configure; we > thought it was a good option to manage it with the bandwitdh. > Also, by using tcng, PPS shaping is done on backend level, once packet has > left the VM; which means after using an additional memory transaction to copy > packet from frontend. IMHO, at scale, shaping in this way should save some > memory transactions comparing to tcng. >OK, fair enough. :-)> > It would be better if you can come up with patch for toolstack as well > > -- the rate parameter is parsed in libxl (see > > $XEN/tools/libxl/libxlu_vif.c) -- so that users of this parameter can > > specify it in their VM config file. But that''s another topic and deserve > > another patch. > > indeed; for now we do have a userland patch but using xend which is out > of date; that''s why we didn''t send it along. Are you interested by it > anyway? >Patch for Xend, no. We''re not supposed to add new functionality to it anyway.> > Hmm, the replenishing looks simple. I''m not sure if it''s over > > simplified. Could you look at the ring and determine the possible burst > > value? (note that in the ring txreq != packet). Just my two cents. > > we didn''t had a look for now since we were eventually looking for some > help (and time) on this subject. >Better to wait for Ian Campbell or other interested people to weight in.> > If you fail to parse "rate" you skip parsing "pps", is this intentional? > > yes; PPS is linked to rate since they are checked within the same > perdiod. > "by using the same usecond period, PPS shaping depends on throughput shaping." >It is just now we have two limits and backend should stop processing when one of the limits is hit. And, if we''re to have more limits in the future (not sure if it is a good idea but let''s just be open to this), they should probably not be dependent on each other.> Will resend with the small typo you picked. > -- > William
On Jun24 15:09, Wei Liu wrote:> Patch for Xend, no. We''re not supposed to add new functionality to it > anyway.Yes I know. Will see if we have some time to add a toolstack patch but we are still using xend for our own use since we have some stuff depending on it.> Better to wait for Ian Campbell or other interested people to weight in.ack> It is just now we have two limits and backend should stop processing > when one of the limits is hit. And, if we''re to have more limits in the > future (not sure if it is a good idea but let''s just be open to this), > they should probably not be dependent on each other.our point is just that bandwidth limit should come with PPS limit. -- William _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel