skbuff.h 144 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * Definitions for the 'struct sk_buff' memory handlers.
  4. *
  5. * Authors:
  6. * Alan Cox, <[email protected]>
  7. * Florian La Roche, <[email protected]>
  8. */
  9. #ifndef _LINUX_SKBUFF_H
  10. #define _LINUX_SKBUFF_H
  11. #include <linux/kernel.h>
  12. #include <linux/compiler.h>
  13. #include <linux/time.h>
  14. #include <linux/bug.h>
  15. #include <linux/bvec.h>
  16. #include <linux/cache.h>
  17. #include <linux/rbtree.h>
  18. #include <linux/socket.h>
  19. #include <linux/refcount.h>
  20. #include <linux/atomic.h>
  21. #include <asm/types.h>
  22. #include <linux/spinlock.h>
  23. #include <linux/net.h>
  24. #include <linux/textsearch.h>
  25. #include <net/checksum.h>
  26. #include <linux/rcupdate.h>
  27. #include <linux/hrtimer.h>
  28. #include <linux/dma-mapping.h>
  29. #include <linux/netdev_features.h>
  30. #include <linux/sched.h>
  31. #include <linux/sched/clock.h>
  32. #include <net/flow_dissector.h>
  33. #include <linux/splice.h>
  34. #include <linux/in6.h>
  35. #include <linux/if_packet.h>
  36. #include <linux/llist.h>
  37. #include <net/flow.h>
  38. #include <net/page_pool.h>
  39. #if IS_ENABLED(CONFIG_NF_CONNTRACK)
  40. #include <linux/netfilter/nf_conntrack_common.h>
  41. #endif
  42. #include <net/net_debug.h>
  43. #include <net/dropreason.h>
  44. #include <linux/android_kabi.h>
  45. #include <linux/android_vendor.h>
  46. /**
  47. * DOC: skb checksums
  48. *
  49. * The interface for checksum offload between the stack and networking drivers
  50. * is as follows...
  51. *
  52. * IP checksum related features
  53. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  54. *
  55. * Drivers advertise checksum offload capabilities in the features of a device.
  56. * From the stack's point of view these are capabilities offered by the driver.
  57. * A driver typically only advertises features that it is capable of offloading
  58. * to its device.
  59. *
  60. * .. flat-table:: Checksum related device features
  61. * :widths: 1 10
  62. *
  63. * * - %NETIF_F_HW_CSUM
  64. * - The driver (or its device) is able to compute one
  65. * IP (one's complement) checksum for any combination
  66. * of protocols or protocol layering. The checksum is
  67. * computed and set in a packet per the CHECKSUM_PARTIAL
  68. * interface (see below).
  69. *
  70. * * - %NETIF_F_IP_CSUM
  71. * - Driver (device) is only able to checksum plain
  72. * TCP or UDP packets over IPv4. These are specifically
  73. * unencapsulated packets of the form IPv4|TCP or
  74. * IPv4|UDP where the Protocol field in the IPv4 header
  75. * is TCP or UDP. The IPv4 header may contain IP options.
  76. * This feature cannot be set in features for a device
  77. * with NETIF_F_HW_CSUM also set. This feature is being
  78. * DEPRECATED (see below).
  79. *
  80. * * - %NETIF_F_IPV6_CSUM
  81. * - Driver (device) is only able to checksum plain
  82. * TCP or UDP packets over IPv6. These are specifically
  83. * unencapsulated packets of the form IPv6|TCP or
  84. * IPv6|UDP where the Next Header field in the IPv6
  85. * header is either TCP or UDP. IPv6 extension headers
  86. * are not supported with this feature. This feature
  87. * cannot be set in features for a device with
  88. * NETIF_F_HW_CSUM also set. This feature is being
  89. * DEPRECATED (see below).
  90. *
  91. * * - %NETIF_F_RXCSUM
  92. * - Driver (device) performs receive checksum offload.
  93. * This flag is only used to disable the RX checksum
  94. * feature for a device. The stack will accept receive
  95. * checksum indication in packets received on a device
  96. * regardless of whether NETIF_F_RXCSUM is set.
  97. *
  98. * Checksumming of received packets by device
  99. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  100. *
  101. * Indication of checksum verification is set in &sk_buff.ip_summed.
  102. * Possible values are:
  103. *
  104. * - %CHECKSUM_NONE
  105. *
  106. * Device did not checksum this packet e.g. due to lack of capabilities.
  107. * The packet contains full (though not verified) checksum in packet but
  108. * not in skb->csum. Thus, skb->csum is undefined in this case.
  109. *
  110. * - %CHECKSUM_UNNECESSARY
  111. *
  112. * The hardware you're dealing with doesn't calculate the full checksum
  113. * (as in %CHECKSUM_COMPLETE), but it does parse headers and verify checksums
  114. * for specific protocols. For such packets it will set %CHECKSUM_UNNECESSARY
  115. * if their checksums are okay. &sk_buff.csum is still undefined in this case
  116. * though. A driver or device must never modify the checksum field in the
  117. * packet even if checksum is verified.
  118. *
  119. * %CHECKSUM_UNNECESSARY is applicable to following protocols:
  120. *
  121. * - TCP: IPv6 and IPv4.
  122. * - UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
  123. * zero UDP checksum for either IPv4 or IPv6, the networking stack
  124. * may perform further validation in this case.
  125. * - GRE: only if the checksum is present in the header.
  126. * - SCTP: indicates the CRC in SCTP header has been validated.
  127. * - FCOE: indicates the CRC in FC frame has been validated.
  128. *
  129. * &sk_buff.csum_level indicates the number of consecutive checksums found in
  130. * the packet minus one that have been verified as %CHECKSUM_UNNECESSARY.
  131. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
  132. * and a device is able to verify the checksums for UDP (possibly zero),
  133. * GRE (checksum flag is set) and TCP, &sk_buff.csum_level would be set to
  134. * two. If the device were only able to verify the UDP checksum and not
  135. * GRE, either because it doesn't support GRE checksum or because GRE
  136. * checksum is bad, skb->csum_level would be set to zero (TCP checksum is
  137. * not considered in this case).
  138. *
  139. * - %CHECKSUM_COMPLETE
  140. *
  141. * This is the most generic way. The device supplied checksum of the _whole_
  142. * packet as seen by netif_rx() and fills in &sk_buff.csum. This means the
  143. * hardware doesn't need to parse L3/L4 headers to implement this.
  144. *
  145. * Notes:
  146. *
  147. * - Even if device supports only some protocols, but is able to produce
  148. * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY.
  149. * - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols.
  150. *
  151. * - %CHECKSUM_PARTIAL
  152. *
  153. * A checksum is set up to be offloaded to a device as described in the
  154. * output description for CHECKSUM_PARTIAL. This may occur on a packet
  155. * received directly from another Linux OS, e.g., a virtualized Linux kernel
  156. * on the same host, or it may be set in the input path in GRO or remote
  157. * checksum offload. For the purposes of checksum verification, the checksum
  158. * referred to by skb->csum_start + skb->csum_offset and any preceding
  159. * checksums in the packet are considered verified. Any checksums in the
  160. * packet that are after the checksum being offloaded are not considered to
  161. * be verified.
  162. *
  163. * Checksumming on transmit for non-GSO
  164. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  165. *
  166. * The stack requests checksum offload in the &sk_buff.ip_summed for a packet.
  167. * Values are:
  168. *
  169. * - %CHECKSUM_PARTIAL
  170. *
  171. * The driver is required to checksum the packet as seen by hard_start_xmit()
  172. * from &sk_buff.csum_start up to the end, and to record/write the checksum at
  173. * offset &sk_buff.csum_start + &sk_buff.csum_offset.
  174. * A driver may verify that the
  175. * csum_start and csum_offset values are valid values given the length and
  176. * offset of the packet, but it should not attempt to validate that the
  177. * checksum refers to a legitimate transport layer checksum -- it is the
  178. * purview of the stack to validate that csum_start and csum_offset are set
  179. * correctly.
  180. *
  181. * When the stack requests checksum offload for a packet, the driver MUST
  182. * ensure that the checksum is set correctly. A driver can either offload the
  183. * checksum calculation to the device, or call skb_checksum_help (in the case
  184. * that the device does not support offload for a particular checksum).
  185. *
  186. * %NETIF_F_IP_CSUM and %NETIF_F_IPV6_CSUM are being deprecated in favor of
  187. * %NETIF_F_HW_CSUM. New devices should use %NETIF_F_HW_CSUM to indicate
  188. * checksum offload capability.
  189. * skb_csum_hwoffload_help() can be called to resolve %CHECKSUM_PARTIAL based
  190. * on network device checksumming capabilities: if a packet does not match
  191. * them, skb_checksum_help() or skb_crc32c_help() (depending on the value of
  192. * &sk_buff.csum_not_inet, see :ref:`crc`)
  193. * is called to resolve the checksum.
  194. *
  195. * - %CHECKSUM_NONE
  196. *
  197. * The skb was already checksummed by the protocol, or a checksum is not
  198. * required.
  199. *
  200. * - %CHECKSUM_UNNECESSARY
  201. *
  202. * This has the same meaning as CHECKSUM_NONE for checksum offload on
  203. * output.
  204. *
  205. * - %CHECKSUM_COMPLETE
  206. *
  207. * Not used in checksum output. If a driver observes a packet with this value
  208. * set in skbuff, it should treat the packet as if %CHECKSUM_NONE were set.
  209. *
  210. * .. _crc:
  211. *
  212. * Non-IP checksum (CRC) offloads
  213. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  214. *
  215. * .. flat-table::
  216. * :widths: 1 10
  217. *
  218. * * - %NETIF_F_SCTP_CRC
  219. * - This feature indicates that a device is capable of
  220. * offloading the SCTP CRC in a packet. To perform this offload the stack
  221. * will set csum_start and csum_offset accordingly, set ip_summed to
  222. * %CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication
  223. * in the skbuff that the %CHECKSUM_PARTIAL refers to CRC32c.
  224. * A driver that supports both IP checksum offload and SCTP CRC32c offload
  225. * must verify which offload is configured for a packet by testing the
  226. * value of &sk_buff.csum_not_inet; skb_crc32c_csum_help() is provided to
  227. * resolve %CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1.
  228. *
  229. * * - %NETIF_F_FCOE_CRC
  230. * - This feature indicates that a device is capable of offloading the FCOE
  231. * CRC in a packet. To perform this offload the stack will set ip_summed
  232. * to %CHECKSUM_PARTIAL and set csum_start and csum_offset
  233. * accordingly. Note that there is no indication in the skbuff that the
  234. * %CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports
  235. * both IP checksum offload and FCOE CRC offload must verify which offload
  236. * is configured for a packet, presumably by inspecting packet headers.
  237. *
  238. * Checksumming on output with GSO
  239. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  240. *
  241. * In the case of a GSO packet (skb_is_gso() is true), checksum offload
  242. * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the
  243. * gso_type is %SKB_GSO_TCPV4 or %SKB_GSO_TCPV6, TCP checksum offload as
  244. * part of the GSO operation is implied. If a checksum is being offloaded
  245. * with GSO then ip_summed is %CHECKSUM_PARTIAL, and both csum_start and
  246. * csum_offset are set to refer to the outermost checksum being offloaded
  247. * (two offloaded checksums are possible with UDP encapsulation).
  248. */
  249. /* Don't change this without changing skb_csum_unnecessary! */
  250. #define CHECKSUM_NONE 0
  251. #define CHECKSUM_UNNECESSARY 1
  252. #define CHECKSUM_COMPLETE 2
  253. #define CHECKSUM_PARTIAL 3
  254. /* Maximum value in skb->csum_level */
  255. #define SKB_MAX_CSUM_LEVEL 3
  256. #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES)
  257. #define SKB_WITH_OVERHEAD(X) \
  258. ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
  259. /* For X bytes available in skb->head, what is the minimal
  260. * allocation needed, knowing struct skb_shared_info needs
  261. * to be aligned.
  262. */
  263. #define SKB_HEAD_ALIGN(X) (SKB_DATA_ALIGN(X) + \
  264. SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
  265. #define SKB_MAX_ORDER(X, ORDER) \
  266. SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
  267. #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
  268. #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
  269. /* return minimum truesize of one skb containing X bytes of data */
  270. #define SKB_TRUESIZE(X) ((X) + \
  271. SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \
  272. SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
  273. struct ahash_request;
  274. struct net_device;
  275. struct scatterlist;
  276. struct pipe_inode_info;
  277. struct iov_iter;
  278. struct napi_struct;
  279. struct bpf_prog;
  280. union bpf_attr;
  281. struct skb_ext;
  282. #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  283. struct nf_bridge_info {
  284. enum {
  285. BRNF_PROTO_UNCHANGED,
  286. BRNF_PROTO_8021Q,
  287. BRNF_PROTO_PPPOE
  288. } orig_proto:8;
  289. u8 pkt_otherhost:1;
  290. u8 in_prerouting:1;
  291. u8 bridged_dnat:1;
  292. u8 sabotage_in_done:1;
  293. __u16 frag_max_size;
  294. struct net_device *physindev;
  295. /* always valid & non-NULL from FORWARD on, for physdev match */
  296. struct net_device *physoutdev;
  297. union {
  298. /* prerouting: detect dnat in orig/reply direction */
  299. __be32 ipv4_daddr;
  300. struct in6_addr ipv6_daddr;
  301. /* after prerouting + nat detected: store original source
  302. * mac since neigh resolution overwrites it, only used while
  303. * skb is out in neigh layer.
  304. */
  305. char neigh_header[8];
  306. };
  307. };
  308. #endif
  309. #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
  310. /* Chain in tc_skb_ext will be used to share the tc chain with
  311. * ovs recirc_id. It will be set to the current chain by tc
  312. * and read by ovs to recirc_id.
  313. */
  314. struct tc_skb_ext {
  315. __u32 chain;
  316. __u16 mru;
  317. __u16 zone;
  318. u8 post_ct:1;
  319. u8 post_ct_snat:1;
  320. u8 post_ct_dnat:1;
  321. };
  322. #endif
  323. struct sk_buff_head {
  324. /* These two members must be first to match sk_buff. */
  325. struct_group_tagged(sk_buff_list, list,
  326. struct sk_buff *next;
  327. struct sk_buff *prev;
  328. );
  329. __u32 qlen;
  330. spinlock_t lock;
  331. };
  332. struct sk_buff;
  333. /* To allow 64K frame to be packed as single skb without frag_list we
  334. * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
  335. * buffers which do not start on a page boundary.
  336. *
  337. * Since GRO uses frags we allocate at least 16 regardless of page
  338. * size.
  339. */
  340. #if (65536/PAGE_SIZE + 1) < 16
  341. #define MAX_SKB_FRAGS 16UL
  342. #else
  343. #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
  344. #endif
  345. extern int sysctl_max_skb_frags;
  346. /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
  347. * segment using its current segmentation instead.
  348. */
  349. #define GSO_BY_FRAGS 0xFFFF
  350. typedef struct bio_vec skb_frag_t;
  351. /**
  352. * skb_frag_size() - Returns the size of a skb fragment
  353. * @frag: skb fragment
  354. */
  355. static inline unsigned int skb_frag_size(const skb_frag_t *frag)
  356. {
  357. return frag->bv_len;
  358. }
  359. /**
  360. * skb_frag_size_set() - Sets the size of a skb fragment
  361. * @frag: skb fragment
  362. * @size: size of fragment
  363. */
  364. static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
  365. {
  366. frag->bv_len = size;
  367. }
  368. /**
  369. * skb_frag_size_add() - Increments the size of a skb fragment by @delta
  370. * @frag: skb fragment
  371. * @delta: value to add
  372. */
  373. static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
  374. {
  375. frag->bv_len += delta;
  376. }
  377. /**
  378. * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta
  379. * @frag: skb fragment
  380. * @delta: value to subtract
  381. */
  382. static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  383. {
  384. frag->bv_len -= delta;
  385. }
  386. /**
  387. * skb_frag_must_loop - Test if %p is a high memory page
  388. * @p: fragment's page
  389. */
  390. static inline bool skb_frag_must_loop(struct page *p)
  391. {
  392. #if defined(CONFIG_HIGHMEM)
  393. if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p))
  394. return true;
  395. #endif
  396. return false;
  397. }
  398. /**
  399. * skb_frag_foreach_page - loop over pages in a fragment
  400. *
  401. * @f: skb frag to operate on
  402. * @f_off: offset from start of f->bv_page
  403. * @f_len: length from f_off to loop over
  404. * @p: (temp var) current page
  405. * @p_off: (temp var) offset from start of current page,
  406. * non-zero only on first page.
  407. * @p_len: (temp var) length in current page,
  408. * < PAGE_SIZE only on first and last page.
  409. * @copied: (temp var) length so far, excluding current p_len.
  410. *
  411. * A fragment can hold a compound page, in which case per-page
  412. * operations, notably kmap_atomic, must be called for each
  413. * regular page.
  414. */
  415. #define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \
  416. for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \
  417. p_off = (f_off) & (PAGE_SIZE - 1), \
  418. p_len = skb_frag_must_loop(p) ? \
  419. min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \
  420. copied = 0; \
  421. copied < f_len; \
  422. copied += p_len, p++, p_off = 0, \
  423. p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \
  424. #define HAVE_HW_TIME_STAMP
  425. /**
  426. * struct skb_shared_hwtstamps - hardware time stamps
  427. * @hwtstamp: hardware time stamp transformed into duration
  428. * since arbitrary point in time
  429. * @netdev_data: address/cookie of network device driver used as
  430. * reference to actual hardware time stamp
  431. *
  432. * Software time stamps generated by ktime_get_real() are stored in
  433. * skb->tstamp.
  434. *
  435. * hwtstamps can only be compared against other hwtstamps from
  436. * the same device.
  437. *
  438. * This structure is attached to packets as part of the
  439. * &skb_shared_info. Use skb_hwtstamps() to get a pointer.
  440. */
  441. struct skb_shared_hwtstamps {
  442. union {
  443. ktime_t hwtstamp;
  444. void *netdev_data;
  445. };
  446. };
  447. /* Definitions for tx_flags in struct skb_shared_info */
  448. enum {
  449. /* generate hardware time stamp */
  450. SKBTX_HW_TSTAMP = 1 << 0,
  451. /* generate software time stamp when queueing packet to NIC */
  452. SKBTX_SW_TSTAMP = 1 << 1,
  453. /* device driver is going to provide hardware time stamp */
  454. SKBTX_IN_PROGRESS = 1 << 2,
  455. /* generate hardware time stamp based on cycles if supported */
  456. SKBTX_HW_TSTAMP_USE_CYCLES = 1 << 3,
  457. /* generate wifi status information (where possible) */
  458. SKBTX_WIFI_STATUS = 1 << 4,
  459. /* determine hardware time stamp based on time or cycles */
  460. SKBTX_HW_TSTAMP_NETDEV = 1 << 5,
  461. /* generate software time stamp when entering packet scheduling */
  462. SKBTX_SCHED_TSTAMP = 1 << 6,
  463. };
  464. #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \
  465. SKBTX_SCHED_TSTAMP)
  466. #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \
  467. SKBTX_HW_TSTAMP_USE_CYCLES | \
  468. SKBTX_ANY_SW_TSTAMP)
  469. /* Definitions for flags in struct skb_shared_info */
  470. enum {
  471. /* use zcopy routines */
  472. SKBFL_ZEROCOPY_ENABLE = BIT(0),
  473. /* This indicates at least one fragment might be overwritten
  474. * (as in vmsplice(), sendfile() ...)
  475. * If we need to compute a TX checksum, we'll need to copy
  476. * all frags to avoid possible bad checksum
  477. */
  478. SKBFL_SHARED_FRAG = BIT(1),
  479. /* segment contains only zerocopy data and should not be
  480. * charged to the kernel memory.
  481. */
  482. SKBFL_PURE_ZEROCOPY = BIT(2),
  483. SKBFL_DONT_ORPHAN = BIT(3),
  484. /* page references are managed by the ubuf_info, so it's safe to
  485. * use frags only up until ubuf_info is released
  486. */
  487. SKBFL_MANAGED_FRAG_REFS = BIT(4),
  488. };
  489. #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
  490. #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \
  491. SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS)
  492. /*
  493. * The callback notifies userspace to release buffers when skb DMA is done in
  494. * lower device, the skb last reference should be 0 when calling this.
  495. * The zerocopy_success argument is true if zero copy transmit occurred,
  496. * false on data copy or out of memory error caused by data copy attempt.
  497. * The ctx field is used to track device context.
  498. * The desc field is used to track userspace buffer index.
  499. */
  500. struct ubuf_info {
  501. void (*callback)(struct sk_buff *, struct ubuf_info *,
  502. bool zerocopy_success);
  503. refcount_t refcnt;
  504. u8 flags;
  505. };
  506. struct ubuf_info_msgzc {
  507. struct ubuf_info ubuf;
  508. union {
  509. struct {
  510. unsigned long desc;
  511. void *ctx;
  512. };
  513. struct {
  514. u32 id;
  515. u16 len;
  516. u16 zerocopy:1;
  517. u32 bytelen;
  518. };
  519. };
  520. struct mmpin {
  521. struct user_struct *user;
  522. unsigned int num_pg;
  523. } mmp;
  524. };
  525. #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
  526. #define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \
  527. ubuf)
  528. int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
  529. void mm_unaccount_pinned_pages(struct mmpin *mmp);
  530. /* This data is invariant across clones and lives at
  531. * the end of the header data, ie. at skb->end.
  532. */
  533. struct skb_shared_info {
  534. __u8 flags;
  535. __u8 meta_len;
  536. __u8 nr_frags;
  537. __u8 tx_flags;
  538. unsigned short gso_size;
  539. /* Warning: this field is not always filled in (UFO)! */
  540. unsigned short gso_segs;
  541. struct sk_buff *frag_list;
  542. struct skb_shared_hwtstamps hwtstamps;
  543. unsigned int gso_type;
  544. u32 tskey;
  545. /*
  546. * Warning : all fields before dataref are cleared in __alloc_skb()
  547. */
  548. atomic_t dataref;
  549. unsigned int xdp_frags_size;
  550. /* Intermediate layers must ensure that destructor_arg
  551. * remains valid until skb destructor */
  552. void * destructor_arg;
  553. ANDROID_OEM_DATA_ARRAY(1, 3);
  554. /* must be last field, see pskb_expand_head() */
  555. skb_frag_t frags[MAX_SKB_FRAGS];
  556. };
  557. /**
  558. * DOC: dataref and headerless skbs
  559. *
  560. * Transport layers send out clones of payload skbs they hold for
  561. * retransmissions. To allow lower layers of the stack to prepend their headers
  562. * we split &skb_shared_info.dataref into two halves.
  563. * The lower 16 bits count the overall number of references.
  564. * The higher 16 bits indicate how many of the references are payload-only.
  565. * skb_header_cloned() checks if skb is allowed to add / write the headers.
  566. *
  567. * The creator of the skb (e.g. TCP) marks its skb as &sk_buff.nohdr
  568. * (via __skb_header_release()). Any clone created from marked skb will get
  569. * &sk_buff.hdr_len populated with the available headroom.
  570. * If there's the only clone in existence it's able to modify the headroom
  571. * at will. The sequence of calls inside the transport layer is::
  572. *
  573. * <alloc skb>
  574. * skb_reserve()
  575. * __skb_header_release()
  576. * skb_clone()
  577. * // send the clone down the stack
  578. *
  579. * This is not a very generic construct and it depends on the transport layers
  580. * doing the right thing. In practice there's usually only one payload-only skb.
  581. * Having multiple payload-only skbs with different lengths of hdr_len is not
  582. * possible. The payload-only skbs should never leave their owner.
  583. */
  584. #define SKB_DATAREF_SHIFT 16
  585. #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
  586. enum {
  587. SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */
  588. SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */
  589. SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */
  590. };
  591. enum {
  592. SKB_GSO_TCPV4 = 1 << 0,
  593. /* This indicates the skb is from an untrusted source. */
  594. SKB_GSO_DODGY = 1 << 1,
  595. /* This indicates the tcp segment has CWR set. */
  596. SKB_GSO_TCP_ECN = 1 << 2,
  597. SKB_GSO_TCP_FIXEDID = 1 << 3,
  598. SKB_GSO_TCPV6 = 1 << 4,
  599. SKB_GSO_FCOE = 1 << 5,
  600. SKB_GSO_GRE = 1 << 6,
  601. SKB_GSO_GRE_CSUM = 1 << 7,
  602. SKB_GSO_IPXIP4 = 1 << 8,
  603. SKB_GSO_IPXIP6 = 1 << 9,
  604. SKB_GSO_UDP_TUNNEL = 1 << 10,
  605. SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
  606. SKB_GSO_PARTIAL = 1 << 12,
  607. SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
  608. SKB_GSO_SCTP = 1 << 14,
  609. SKB_GSO_ESP = 1 << 15,
  610. SKB_GSO_UDP = 1 << 16,
  611. SKB_GSO_UDP_L4 = 1 << 17,
  612. SKB_GSO_FRAGLIST = 1 << 18,
  613. };
  614. #if BITS_PER_LONG > 32
  615. #define NET_SKBUFF_DATA_USES_OFFSET 1
  616. #endif
  617. #ifdef NET_SKBUFF_DATA_USES_OFFSET
  618. typedef unsigned int sk_buff_data_t;
  619. #else
  620. typedef unsigned char *sk_buff_data_t;
  621. #endif
  622. /**
  623. * DOC: Basic sk_buff geometry
  624. *
  625. * struct sk_buff itself is a metadata structure and does not hold any packet
  626. * data. All the data is held in associated buffers.
  627. *
  628. * &sk_buff.head points to the main "head" buffer. The head buffer is divided
  629. * into two parts:
  630. *
  631. * - data buffer, containing headers and sometimes payload;
  632. * this is the part of the skb operated on by the common helpers
  633. * such as skb_put() or skb_pull();
  634. * - shared info (struct skb_shared_info) which holds an array of pointers
  635. * to read-only data in the (page, offset, length) format.
  636. *
  637. * Optionally &skb_shared_info.frag_list may point to another skb.
  638. *
  639. * Basic diagram may look like this::
  640. *
  641. * ---------------
  642. * | sk_buff |
  643. * ---------------
  644. * ,--------------------------- + head
  645. * / ,----------------- + data
  646. * / / ,----------- + tail
  647. * | | | , + end
  648. * | | | |
  649. * v v v v
  650. * -----------------------------------------------
  651. * | headroom | data | tailroom | skb_shared_info |
  652. * -----------------------------------------------
  653. * + [page frag]
  654. * + [page frag]
  655. * + [page frag]
  656. * + [page frag] ---------
  657. * + frag_list --> | sk_buff |
  658. * ---------
  659. *
  660. */
  661. /**
  662. * struct sk_buff - socket buffer
  663. * @next: Next buffer in list
  664. * @prev: Previous buffer in list
  665. * @tstamp: Time we arrived/left
  666. * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
  667. * for retransmit timer
  668. * @rbnode: RB tree node, alternative to next/prev for netem/tcp
  669. * @list: queue head
  670. * @ll_node: anchor in an llist (eg socket defer_list)
  671. * @sk: Socket we are owned by
  672. * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
  673. * fragmentation management
  674. * @dev: Device we arrived on/are leaving by
  675. * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
  676. * @cb: Control buffer. Free for use by every layer. Put private vars here
  677. * @_skb_refdst: destination entry (with norefcount bit)
  678. * @sp: the security path, used for xfrm
  679. * @len: Length of actual data
  680. * @data_len: Data length
  681. * @mac_len: Length of link layer header
  682. * @hdr_len: writable header length of cloned skb
  683. * @csum: Checksum (must include start/offset pair)
  684. * @csum_start: Offset from skb->head where checksumming should start
  685. * @csum_offset: Offset from csum_start where checksum should be stored
  686. * @priority: Packet queueing priority
  687. * @ignore_df: allow local fragmentation
  688. * @cloned: Head may be cloned (check refcnt to be sure)
  689. * @ip_summed: Driver fed us an IP checksum
  690. * @nohdr: Payload reference only, must not modify header
  691. * @pkt_type: Packet class
  692. * @fclone: skbuff clone status
  693. * @ipvs_property: skbuff is owned by ipvs
  694. * @inner_protocol_type: whether the inner protocol is
  695. * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
  696. * @remcsum_offload: remote checksum offload is enabled
  697. * @offload_fwd_mark: Packet was L2-forwarded in hardware
  698. * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
  699. * @tc_skip_classify: do not classify packet. set by IFB device
  700. * @tc_at_ingress: used within tc_classify to distinguish in/egress
  701. * @redirected: packet was redirected by packet classifier
  702. * @from_ingress: packet was redirected from the ingress path
  703. * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
  704. * @peeked: this packet has been seen already, so stats have been
  705. * done for it, don't do them again
  706. * @nf_trace: netfilter packet trace flag
  707. * @protocol: Packet protocol from driver
  708. * @destructor: Destruct function
  709. * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
  710. * @_sk_redir: socket redirection information for skmsg
  711. * @_nfct: Associated connection, if any (with nfctinfo bits)
  712. * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  713. * @skb_iif: ifindex of device we arrived on
  714. * @tc_index: Traffic control index
  715. * @hash: the packet hash
  716. * @queue_mapping: Queue mapping for multiqueue devices
  717. * @head_frag: skb was allocated from page fragments,
  718. * not allocated by kmalloc() or vmalloc().
  719. * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
  720. * @pp_recycle: mark the packet for recycling instead of freeing (implies
  721. * page_pool support on driver)
  722. * @active_extensions: active extensions (skb_ext_id types)
  723. * @ndisc_nodetype: router type (from link layer)
  724. * @ooo_okay: allow the mapping of a socket to a queue to be changed
  725. * @l4_hash: indicate hash is a canonical 4-tuple hash over transport
  726. * ports.
  727. * @sw_hash: indicates hash was computed in software stack
  728. * @wifi_acked_valid: wifi_acked was set
  729. * @wifi_acked: whether frame was acked on wifi or not
  730. * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
  731. * @encapsulation: indicates the inner headers in the skbuff are valid
  732. * @encap_hdr_csum: software checksum is needed
  733. * @csum_valid: checksum is already valid
  734. * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
  735. * @csum_complete_sw: checksum was completed by software
  736. * @csum_level: indicates the number of consecutive checksums found in
  737. * the packet minus one that have been verified as
  738. * CHECKSUM_UNNECESSARY (max 3)
  739. * @scm_io_uring: SKB holds io_uring registered files
  740. * @dst_pending_confirm: need to confirm neighbour
  741. * @decrypted: Decrypted SKB
  742. * @slow_gro: state present at GRO time, slower prepare step required
  743. * @mono_delivery_time: When set, skb->tstamp has the
  744. * delivery_time in mono clock base (i.e. EDT). Otherwise, the
  745. * skb->tstamp has the (rcv) timestamp at ingress and
  746. * delivery_time at egress.
  747. * @napi_id: id of the NAPI struct this skb came from
  748. * @sender_cpu: (aka @napi_id) source CPU in XPS
  749. * @alloc_cpu: CPU which did the skb allocation.
  750. * @secmark: security marking
  751. * @mark: Generic packet mark
  752. * @reserved_tailroom: (aka @mark) number of bytes of free space available
  753. * at the tail of an sk_buff
  754. * @vlan_present: VLAN tag is present
  755. * @vlan_proto: vlan encapsulation protocol
  756. * @vlan_tci: vlan tag control information
  757. * @inner_protocol: Protocol (encapsulation)
  758. * @inner_ipproto: (aka @inner_protocol) stores ipproto when
  759. * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
  760. * @inner_transport_header: Inner transport layer header (encapsulation)
  761. * @inner_network_header: Network layer header (encapsulation)
  762. * @inner_mac_header: Link layer header (encapsulation)
  763. * @transport_header: Transport layer header
  764. * @network_header: Network layer header
  765. * @mac_header: Link layer header
  766. * @kcov_handle: KCOV remote handle for remote coverage collection
  767. * @tail: Tail pointer
  768. * @end: End pointer
  769. * @head: Head of buffer
  770. * @data: Data head pointer
  771. * @truesize: Buffer size
  772. * @users: User count - see {datagram,tcp}.c
  773. * @extensions: allocated extensions, valid if active_extensions is nonzero
  774. */
  775. struct sk_buff {
  776. union {
  777. struct {
  778. /* These two members must be first to match sk_buff_head. */
  779. struct sk_buff *next;
  780. struct sk_buff *prev;
  781. union {
  782. struct net_device *dev;
  783. /* Some protocols might use this space to store information,
  784. * while device pointer would be NULL.
  785. * UDP receive path is one user.
  786. */
  787. unsigned long dev_scratch;
  788. };
  789. };
  790. struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
  791. struct list_head list;
  792. struct llist_node ll_node;
  793. };
  794. union {
  795. struct sock *sk;
  796. int ip_defrag_offset;
  797. };
  798. union {
  799. ktime_t tstamp;
  800. u64 skb_mstamp_ns; /* earliest departure time */
  801. };
  802. /*
  803. * This is the control buffer. It is free to use for every
  804. * layer. Please put your private variables there. If you
  805. * want to keep them across layers you have to do a skb_clone()
  806. * first. This is owned by whoever has the skb queued ATM.
  807. */
  808. char cb[48] __aligned(8);
  809. union {
  810. struct {
  811. unsigned long _skb_refdst;
  812. void (*destructor)(struct sk_buff *skb);
  813. };
  814. struct list_head tcp_tsorted_anchor;
  815. #ifdef CONFIG_NET_SOCK_MSG
  816. unsigned long _sk_redir;
  817. #endif
  818. };
  819. #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
  820. unsigned long _nfct;
  821. #endif
  822. unsigned int len,
  823. data_len;
  824. __u16 mac_len,
  825. hdr_len;
  826. /* Following fields are _not_ copied in __copy_skb_header()
  827. * Note that queue_mapping is here mostly to fill a hole.
  828. */
  829. __u16 queue_mapping;
  830. /* if you move cloned around you also must adapt those constants */
  831. #ifdef __BIG_ENDIAN_BITFIELD
  832. #define CLONED_MASK (1 << 7)
  833. #else
  834. #define CLONED_MASK 1
  835. #endif
  836. #define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset)
  837. /* private: */
  838. __u8 __cloned_offset[0];
  839. /* public: */
  840. __u8 cloned:1,
  841. nohdr:1,
  842. fclone:2,
  843. peeked:1,
  844. head_frag:1,
  845. pfmemalloc:1,
  846. pp_recycle:1; /* page_pool recycle indicator */
  847. #ifdef CONFIG_SKB_EXTENSIONS
  848. __u8 active_extensions;
  849. #endif
  850. /* Fields enclosed in headers group are copied
  851. * using a single memcpy() in __copy_skb_header()
  852. */
  853. struct_group(headers,
  854. /* private: */
  855. __u8 __pkt_type_offset[0];
  856. /* public: */
  857. __u8 pkt_type:3; /* see PKT_TYPE_MAX */
  858. __u8 ignore_df:1;
  859. __u8 nf_trace:1;
  860. __u8 ip_summed:2;
  861. __u8 ooo_okay:1;
  862. __u8 l4_hash:1;
  863. __u8 sw_hash:1;
  864. __u8 wifi_acked_valid:1;
  865. __u8 wifi_acked:1;
  866. __u8 no_fcs:1;
  867. /* Indicates the inner headers are valid in the skbuff. */
  868. __u8 encapsulation:1;
  869. __u8 encap_hdr_csum:1;
  870. __u8 csum_valid:1;
  871. /* private: */
  872. __u8 __pkt_vlan_present_offset[0];
  873. /* public: */
  874. __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */
  875. __u8 csum_complete_sw:1;
  876. __u8 csum_level:2;
  877. __u8 dst_pending_confirm:1;
  878. __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
  879. #ifdef CONFIG_NET_CLS_ACT
  880. __u8 tc_skip_classify:1;
  881. __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
  882. #endif
  883. #ifdef CONFIG_IPV6_NDISC_NODETYPE
  884. __u8 ndisc_nodetype:2;
  885. #endif
  886. __u8 ipvs_property:1;
  887. __u8 inner_protocol_type:1;
  888. __u8 remcsum_offload:1;
  889. #ifdef CONFIG_NET_SWITCHDEV
  890. __u8 offload_fwd_mark:1;
  891. __u8 offload_l3_fwd_mark:1;
  892. #endif
  893. __u8 redirected:1;
  894. #ifdef CONFIG_NET_REDIRECT
  895. __u8 from_ingress:1;
  896. #endif
  897. #ifdef CONFIG_NETFILTER_SKIP_EGRESS
  898. __u8 nf_skip_egress:1;
  899. #endif
  900. #ifdef CONFIG_TLS_DEVICE
  901. __u8 decrypted:1;
  902. #endif
  903. __u8 slow_gro:1;
  904. __u8 csum_not_inet:1;
  905. __u8 scm_io_uring:1;
  906. #ifdef CONFIG_NET_SCHED
  907. __u16 tc_index; /* traffic control index */
  908. #endif
  909. union {
  910. __wsum csum;
  911. struct {
  912. __u16 csum_start;
  913. __u16 csum_offset;
  914. };
  915. };
  916. __u32 priority;
  917. int skb_iif;
  918. __u32 hash;
  919. __be16 vlan_proto;
  920. __u16 vlan_tci;
  921. #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
  922. union {
  923. unsigned int napi_id;
  924. unsigned int sender_cpu;
  925. };
  926. #endif
  927. u16 alloc_cpu;
  928. #ifdef CONFIG_NETWORK_SECMARK
  929. __u32 secmark;
  930. #endif
  931. union {
  932. __u32 mark;
  933. __u32 reserved_tailroom;
  934. };
  935. union {
  936. __be16 inner_protocol;
  937. __u8 inner_ipproto;
  938. };
  939. __u16 inner_transport_header;
  940. __u16 inner_network_header;
  941. __u16 inner_mac_header;
  942. __be16 protocol;
  943. __u16 transport_header;
  944. __u16 network_header;
  945. __u16 mac_header;
  946. #ifdef CONFIG_KCOV
  947. u64 kcov_handle;
  948. #endif
  949. ANDROID_KABI_RESERVE(1);
  950. ANDROID_KABI_RESERVE(2);
  951. ); /* end headers group */
  952. /* These elements must be at the end, see alloc_skb() for details. */
  953. sk_buff_data_t tail;
  954. sk_buff_data_t end;
  955. unsigned char *head,
  956. *data;
  957. unsigned int truesize;
  958. refcount_t users;
  959. #ifdef CONFIG_SKB_EXTENSIONS
  960. /* only useable after checking ->active_extensions != 0 */
  961. struct skb_ext *extensions;
  962. #endif
  963. };
  964. /* if you move pkt_type around you also must adapt those constants */
  965. #ifdef __BIG_ENDIAN_BITFIELD
  966. #define PKT_TYPE_MAX (7 << 5)
  967. #else
  968. #define PKT_TYPE_MAX 7
  969. #endif
  970. #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset)
  971. /* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time
  972. * around, you also must adapt these constants.
  973. */
  974. #ifdef __BIG_ENDIAN_BITFIELD
  975. #define PKT_VLAN_PRESENT_BIT 7
  976. #define TC_AT_INGRESS_MASK (1 << 0)
  977. #define SKB_MONO_DELIVERY_TIME_MASK (1 << 2)
  978. #else
  979. #define PKT_VLAN_PRESENT_BIT 0
  980. #define TC_AT_INGRESS_MASK (1 << 7)
  981. #define SKB_MONO_DELIVERY_TIME_MASK (1 << 5)
  982. #endif
  983. #define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset)
  984. #ifdef __KERNEL__
  985. /*
  986. * Handling routines are only of interest to the kernel
  987. */
  988. #define SKB_ALLOC_FCLONE 0x01
  989. #define SKB_ALLOC_RX 0x02
  990. #define SKB_ALLOC_NAPI 0x04
  991. /**
  992. * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves
  993. * @skb: buffer
  994. */
  995. static inline bool skb_pfmemalloc(const struct sk_buff *skb)
  996. {
  997. return unlikely(skb->pfmemalloc);
  998. }
  999. /*
  1000. * skb might have a dst pointer attached, refcounted or not.
  1001. * _skb_refdst low order bit is set if refcount was _not_ taken
  1002. */
  1003. #define SKB_DST_NOREF 1UL
  1004. #define SKB_DST_PTRMASK ~(SKB_DST_NOREF)
  1005. /**
  1006. * skb_dst - returns skb dst_entry
  1007. * @skb: buffer
  1008. *
  1009. * Returns skb dst_entry, regardless of reference taken or not.
  1010. */
  1011. static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
  1012. {
  1013. /* If refdst was not refcounted, check we still are in a
  1014. * rcu_read_lock section
  1015. */
  1016. WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
  1017. !rcu_read_lock_held() &&
  1018. !rcu_read_lock_bh_held());
  1019. return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
  1020. }
  1021. /**
  1022. * skb_dst_set - sets skb dst
  1023. * @skb: buffer
  1024. * @dst: dst entry
  1025. *
  1026. * Sets skb dst, assuming a reference was taken on dst and should
  1027. * be released by skb_dst_drop()
  1028. */
  1029. static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
  1030. {
  1031. skb->slow_gro |= !!dst;
  1032. skb->_skb_refdst = (unsigned long)dst;
  1033. }
  1034. /**
  1035. * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
  1036. * @skb: buffer
  1037. * @dst: dst entry
  1038. *
  1039. * Sets skb dst, assuming a reference was not taken on dst.
  1040. * If dst entry is cached, we do not take reference and dst_release
  1041. * will be avoided by refdst_drop. If dst entry is not cached, we take
  1042. * reference, so that last dst_release can destroy the dst immediately.
  1043. */
  1044. static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
  1045. {
  1046. WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  1047. skb->slow_gro |= !!dst;
  1048. skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
  1049. }
  1050. /**
  1051. * skb_dst_is_noref - Test if skb dst isn't refcounted
  1052. * @skb: buffer
  1053. */
  1054. static inline bool skb_dst_is_noref(const struct sk_buff *skb)
  1055. {
  1056. return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
  1057. }
  1058. /**
  1059. * skb_rtable - Returns the skb &rtable
  1060. * @skb: buffer
  1061. */
  1062. static inline struct rtable *skb_rtable(const struct sk_buff *skb)
  1063. {
  1064. return (struct rtable *)skb_dst(skb);
  1065. }
  1066. /* For mangling skb->pkt_type from user space side from applications
  1067. * such as nft, tc, etc, we only allow a conservative subset of
  1068. * possible pkt_types to be set.
  1069. */
  1070. static inline bool skb_pkt_type_ok(u32 ptype)
  1071. {
  1072. return ptype <= PACKET_OTHERHOST;
  1073. }
  1074. /**
  1075. * skb_napi_id - Returns the skb's NAPI id
  1076. * @skb: buffer
  1077. */
  1078. static inline unsigned int skb_napi_id(const struct sk_buff *skb)
  1079. {
  1080. #ifdef CONFIG_NET_RX_BUSY_POLL
  1081. return skb->napi_id;
  1082. #else
  1083. return 0;
  1084. #endif
  1085. }
  1086. /**
  1087. * skb_unref - decrement the skb's reference count
  1088. * @skb: buffer
  1089. *
  1090. * Returns true if we can free the skb.
  1091. */
  1092. static inline bool skb_unref(struct sk_buff *skb)
  1093. {
  1094. if (unlikely(!skb))
  1095. return false;
  1096. if (likely(refcount_read(&skb->users) == 1))
  1097. smp_rmb();
  1098. else if (likely(!refcount_dec_and_test(&skb->users)))
  1099. return false;
  1100. return true;
  1101. }
  1102. void __fix_address
  1103. kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
  1104. /**
  1105. * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
  1106. * @skb: buffer to free
  1107. */
  1108. static inline void kfree_skb(struct sk_buff *skb)
  1109. {
  1110. kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
  1111. }
  1112. void skb_release_head_state(struct sk_buff *skb);
  1113. void kfree_skb_list_reason(struct sk_buff *segs,
  1114. enum skb_drop_reason reason);
  1115. void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
  1116. void skb_tx_error(struct sk_buff *skb);
  1117. static inline void kfree_skb_list(struct sk_buff *segs)
  1118. {
  1119. kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED);
  1120. }
  1121. #ifdef CONFIG_TRACEPOINTS
  1122. void consume_skb(struct sk_buff *skb);
  1123. #else
  1124. static inline void consume_skb(struct sk_buff *skb)
  1125. {
  1126. return kfree_skb(skb);
  1127. }
  1128. #endif
  1129. void __consume_stateless_skb(struct sk_buff *skb);
  1130. void __kfree_skb(struct sk_buff *skb);
  1131. extern struct kmem_cache *skbuff_head_cache;
  1132. void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
  1133. bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
  1134. bool *fragstolen, int *delta_truesize);
  1135. struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
  1136. int node);
  1137. struct sk_buff *__build_skb(void *data, unsigned int frag_size);
  1138. struct sk_buff *build_skb(void *data, unsigned int frag_size);
  1139. struct sk_buff *build_skb_around(struct sk_buff *skb,
  1140. void *data, unsigned int frag_size);
  1141. void skb_attempt_defer_free(struct sk_buff *skb);
  1142. struct sk_buff *napi_build_skb(void *data, unsigned int frag_size);
  1143. /**
  1144. * alloc_skb - allocate a network buffer
  1145. * @size: size to allocate
  1146. * @priority: allocation mask
  1147. *
  1148. * This function is a convenient wrapper around __alloc_skb().
  1149. */
  1150. static inline struct sk_buff *alloc_skb(unsigned int size,
  1151. gfp_t priority)
  1152. {
  1153. return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
  1154. }
  1155. struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
  1156. unsigned long data_len,
  1157. int max_page_order,
  1158. int *errcode,
  1159. gfp_t gfp_mask);
  1160. struct sk_buff *alloc_skb_for_msg(struct sk_buff *first);
  1161. /* Layout of fast clones : [skb1][skb2][fclone_ref] */
  1162. struct sk_buff_fclones {
  1163. struct sk_buff skb1;
  1164. struct sk_buff skb2;
  1165. refcount_t fclone_ref;
  1166. };
  1167. /**
  1168. * skb_fclone_busy - check if fclone is busy
  1169. * @sk: socket
  1170. * @skb: buffer
  1171. *
  1172. * Returns true if skb is a fast clone, and its clone is not freed.
  1173. * Some drivers call skb_orphan() in their ndo_start_xmit(),
  1174. * so we also check that this didnt happen.
  1175. */
  1176. static inline bool skb_fclone_busy(const struct sock *sk,
  1177. const struct sk_buff *skb)
  1178. {
  1179. const struct sk_buff_fclones *fclones;
  1180. fclones = container_of(skb, struct sk_buff_fclones, skb1);
  1181. return skb->fclone == SKB_FCLONE_ORIG &&
  1182. refcount_read(&fclones->fclone_ref) > 1 &&
  1183. READ_ONCE(fclones->skb2.sk) == sk;
  1184. }
  1185. /**
  1186. * alloc_skb_fclone - allocate a network buffer from fclone cache
  1187. * @size: size to allocate
  1188. * @priority: allocation mask
  1189. *
  1190. * This function is a convenient wrapper around __alloc_skb().
  1191. */
  1192. static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
  1193. gfp_t priority)
  1194. {
  1195. return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
  1196. }
  1197. struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
  1198. void skb_headers_offset_update(struct sk_buff *skb, int off);
  1199. int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
  1200. struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
  1201. void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
  1202. struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
  1203. struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
  1204. gfp_t gfp_mask, bool fclone);
  1205. static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
  1206. gfp_t gfp_mask)
  1207. {
  1208. return __pskb_copy_fclone(skb, headroom, gfp_mask, false);
  1209. }
  1210. int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
  1211. struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
  1212. unsigned int headroom);
  1213. struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
  1214. struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
  1215. int newtailroom, gfp_t priority);
  1216. int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
  1217. int offset, int len);
  1218. int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg,
  1219. int offset, int len);
  1220. int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
  1221. int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error);
  1222. /**
  1223. * skb_pad - zero pad the tail of an skb
  1224. * @skb: buffer to pad
  1225. * @pad: space to pad
  1226. *
  1227. * Ensure that a buffer is followed by a padding area that is zero
  1228. * filled. Used by network drivers which may DMA or transfer data
  1229. * beyond the buffer end onto the wire.
  1230. *
  1231. * May return error in out of memory cases. The skb is freed on error.
  1232. */
  1233. static inline int skb_pad(struct sk_buff *skb, int pad)
  1234. {
  1235. return __skb_pad(skb, pad, true);
  1236. }
  1237. #define dev_kfree_skb(a) consume_skb(a)
  1238. int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
  1239. int offset, size_t size);
  1240. struct skb_seq_state {
  1241. __u32 lower_offset;
  1242. __u32 upper_offset;
  1243. __u32 frag_idx;
  1244. __u32 stepped_offset;
  1245. struct sk_buff *root_skb;
  1246. struct sk_buff *cur_skb;
  1247. __u8 *frag_data;
  1248. __u32 frag_off;
  1249. };
  1250. void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
  1251. unsigned int to, struct skb_seq_state *st);
  1252. unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
  1253. struct skb_seq_state *st);
  1254. void skb_abort_seq_read(struct skb_seq_state *st);
  1255. unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
  1256. unsigned int to, struct ts_config *config);
  1257. /*
  1258. * Packet hash types specify the type of hash in skb_set_hash.
  1259. *
  1260. * Hash types refer to the protocol layer addresses which are used to
  1261. * construct a packet's hash. The hashes are used to differentiate or identify
  1262. * flows of the protocol layer for the hash type. Hash types are either
  1263. * layer-2 (L2), layer-3 (L3), or layer-4 (L4).
  1264. *
  1265. * Properties of hashes:
  1266. *
  1267. * 1) Two packets in different flows have different hash values
  1268. * 2) Two packets in the same flow should have the same hash value
  1269. *
  1270. * A hash at a higher layer is considered to be more specific. A driver should
  1271. * set the most specific hash possible.
  1272. *
  1273. * A driver cannot indicate a more specific hash than the layer at which a hash
  1274. * was computed. For instance an L3 hash cannot be set as an L4 hash.
  1275. *
  1276. * A driver may indicate a hash level which is less specific than the
  1277. * actual layer the hash was computed on. For instance, a hash computed
  1278. * at L4 may be considered an L3 hash. This should only be done if the
  1279. * driver can't unambiguously determine that the HW computed the hash at
  1280. * the higher layer. Note that the "should" in the second property above
  1281. * permits this.
  1282. */
  1283. enum pkt_hash_types {
  1284. PKT_HASH_TYPE_NONE, /* Undefined type */
  1285. PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */
  1286. PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */
  1287. PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */
  1288. };
  1289. static inline void skb_clear_hash(struct sk_buff *skb)
  1290. {
  1291. skb->hash = 0;
  1292. skb->sw_hash = 0;
  1293. skb->l4_hash = 0;
  1294. }
  1295. static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
  1296. {
  1297. if (!skb->l4_hash)
  1298. skb_clear_hash(skb);
  1299. }
  1300. static inline void
  1301. __skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
  1302. {
  1303. skb->l4_hash = is_l4;
  1304. skb->sw_hash = is_sw;
  1305. skb->hash = hash;
  1306. }
  1307. static inline void
  1308. skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
  1309. {
  1310. /* Used by drivers to set hash from HW */
  1311. __skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4);
  1312. }
  1313. static inline void
  1314. __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
  1315. {
  1316. __skb_set_hash(skb, hash, true, is_l4);
  1317. }
  1318. void __skb_get_hash(struct sk_buff *skb);
  1319. u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
  1320. u32 skb_get_poff(const struct sk_buff *skb);
  1321. u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
  1322. const struct flow_keys_basic *keys, int hlen);
  1323. __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
  1324. const void *data, int hlen_proto);
  1325. static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
  1326. int thoff, u8 ip_proto)
  1327. {
  1328. return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
  1329. }
  1330. void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
  1331. const struct flow_dissector_key *key,
  1332. unsigned int key_count);
  1333. struct bpf_flow_dissector;
  1334. u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
  1335. __be16 proto, int nhoff, int hlen, unsigned int flags);
  1336. bool __skb_flow_dissect(const struct net *net,
  1337. const struct sk_buff *skb,
  1338. struct flow_dissector *flow_dissector,
  1339. void *target_container, const void *data,
  1340. __be16 proto, int nhoff, int hlen, unsigned int flags);
  1341. static inline bool skb_flow_dissect(const struct sk_buff *skb,
  1342. struct flow_dissector *flow_dissector,
  1343. void *target_container, unsigned int flags)
  1344. {
  1345. return __skb_flow_dissect(NULL, skb, flow_dissector,
  1346. target_container, NULL, 0, 0, 0, flags);
  1347. }
  1348. static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
  1349. struct flow_keys *flow,
  1350. unsigned int flags)
  1351. {
  1352. memset(flow, 0, sizeof(*flow));
  1353. return __skb_flow_dissect(NULL, skb, &flow_keys_dissector,
  1354. flow, NULL, 0, 0, 0, flags);
  1355. }
  1356. static inline bool
  1357. skb_flow_dissect_flow_keys_basic(const struct net *net,
  1358. const struct sk_buff *skb,
  1359. struct flow_keys_basic *flow,
  1360. const void *data, __be16 proto,
  1361. int nhoff, int hlen, unsigned int flags)
  1362. {
  1363. memset(flow, 0, sizeof(*flow));
  1364. return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
  1365. data, proto, nhoff, hlen, flags);
  1366. }
  1367. void skb_flow_dissect_meta(const struct sk_buff *skb,
  1368. struct flow_dissector *flow_dissector,
  1369. void *target_container);
  1370. /* Gets a skb connection tracking info, ctinfo map should be a
  1371. * map of mapsize to translate enum ip_conntrack_info states
  1372. * to user states.
  1373. */
  1374. void
  1375. skb_flow_dissect_ct(const struct sk_buff *skb,
  1376. struct flow_dissector *flow_dissector,
  1377. void *target_container,
  1378. u16 *ctinfo_map, size_t mapsize,
  1379. bool post_ct, u16 zone);
  1380. void
  1381. skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
  1382. struct flow_dissector *flow_dissector,
  1383. void *target_container);
  1384. void skb_flow_dissect_hash(const struct sk_buff *skb,
  1385. struct flow_dissector *flow_dissector,
  1386. void *target_container);
  1387. static inline __u32 skb_get_hash(struct sk_buff *skb)
  1388. {
  1389. if (!skb->l4_hash && !skb->sw_hash)
  1390. __skb_get_hash(skb);
  1391. return skb->hash;
  1392. }
  1393. static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
  1394. {
  1395. if (!skb->l4_hash && !skb->sw_hash) {
  1396. struct flow_keys keys;
  1397. __u32 hash = __get_hash_from_flowi6(fl6, &keys);
  1398. __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
  1399. }
  1400. return skb->hash;
  1401. }
  1402. __u32 skb_get_hash_perturb(const struct sk_buff *skb,
  1403. const siphash_key_t *perturb);
  1404. static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
  1405. {
  1406. return skb->hash;
  1407. }
  1408. static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
  1409. {
  1410. to->hash = from->hash;
  1411. to->sw_hash = from->sw_hash;
  1412. to->l4_hash = from->l4_hash;
  1413. };
  1414. static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
  1415. const struct sk_buff *skb2)
  1416. {
  1417. #ifdef CONFIG_TLS_DEVICE
  1418. return skb2->decrypted - skb1->decrypted;
  1419. #else
  1420. return 0;
  1421. #endif
  1422. }
  1423. static inline void skb_copy_decrypted(struct sk_buff *to,
  1424. const struct sk_buff *from)
  1425. {
  1426. #ifdef CONFIG_TLS_DEVICE
  1427. to->decrypted = from->decrypted;
  1428. #endif
  1429. }
  1430. #ifdef NET_SKBUFF_DATA_USES_OFFSET
  1431. static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
  1432. {
  1433. return skb->head + skb->end;
  1434. }
  1435. static inline unsigned int skb_end_offset(const struct sk_buff *skb)
  1436. {
  1437. return skb->end;
  1438. }
  1439. static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
  1440. {
  1441. skb->end = offset;
  1442. }
  1443. #else
  1444. static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
  1445. {
  1446. return skb->end;
  1447. }
  1448. static inline unsigned int skb_end_offset(const struct sk_buff *skb)
  1449. {
  1450. return skb->end - skb->head;
  1451. }
  1452. static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
  1453. {
  1454. skb->end = skb->head + offset;
  1455. }
  1456. #endif
  1457. struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
  1458. struct ubuf_info *uarg);
  1459. void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
  1460. void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
  1461. bool success);
  1462. int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
  1463. struct sk_buff *skb, struct iov_iter *from,
  1464. size_t length);
  1465. static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
  1466. struct msghdr *msg, int len)
  1467. {
  1468. return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
  1469. }
  1470. int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
  1471. struct msghdr *msg, int len,
  1472. struct ubuf_info *uarg);
  1473. /* Internal */
  1474. #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
  1475. static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
  1476. {
  1477. return &skb_shinfo(skb)->hwtstamps;
  1478. }
  1479. static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
  1480. {
  1481. bool is_zcopy = skb && skb_shinfo(skb)->flags & SKBFL_ZEROCOPY_ENABLE;
  1482. return is_zcopy ? skb_uarg(skb) : NULL;
  1483. }
  1484. static inline bool skb_zcopy_pure(const struct sk_buff *skb)
  1485. {
  1486. return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
  1487. }
  1488. static inline bool skb_zcopy_managed(const struct sk_buff *skb)
  1489. {
  1490. return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS;
  1491. }
  1492. static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
  1493. const struct sk_buff *skb2)
  1494. {
  1495. return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2);
  1496. }
  1497. static inline void net_zcopy_get(struct ubuf_info *uarg)
  1498. {
  1499. refcount_inc(&uarg->refcnt);
  1500. }
  1501. static inline void skb_zcopy_init(struct sk_buff *skb, struct ubuf_info *uarg)
  1502. {
  1503. skb_shinfo(skb)->destructor_arg = uarg;
  1504. skb_shinfo(skb)->flags |= uarg->flags;
  1505. }
  1506. static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg,
  1507. bool *have_ref)
  1508. {
  1509. if (skb && uarg && !skb_zcopy(skb)) {
  1510. if (unlikely(have_ref && *have_ref))
  1511. *have_ref = false;
  1512. else
  1513. net_zcopy_get(uarg);
  1514. skb_zcopy_init(skb, uarg);
  1515. }
  1516. }
  1517. static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
  1518. {
  1519. skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
  1520. skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_FRAG;
  1521. }
  1522. static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
  1523. {
  1524. return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
  1525. }
  1526. static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
  1527. {
  1528. return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
  1529. }
  1530. static inline void net_zcopy_put(struct ubuf_info *uarg)
  1531. {
  1532. if (uarg)
  1533. uarg->callback(NULL, uarg, true);
  1534. }
  1535. static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref)
  1536. {
  1537. if (uarg) {
  1538. if (uarg->callback == msg_zerocopy_callback)
  1539. msg_zerocopy_put_abort(uarg, have_uref);
  1540. else if (have_uref)
  1541. net_zcopy_put(uarg);
  1542. }
  1543. }
  1544. /* Release a reference on a zerocopy structure */
  1545. static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
  1546. {
  1547. struct ubuf_info *uarg = skb_zcopy(skb);
  1548. if (uarg) {
  1549. if (!skb_zcopy_is_nouarg(skb))
  1550. uarg->callback(skb, uarg, zerocopy_success);
  1551. skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
  1552. }
  1553. }
  1554. void __skb_zcopy_downgrade_managed(struct sk_buff *skb);
  1555. static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb)
  1556. {
  1557. if (unlikely(skb_zcopy_managed(skb)))
  1558. __skb_zcopy_downgrade_managed(skb);
  1559. }
  1560. static inline void skb_mark_not_on_list(struct sk_buff *skb)
  1561. {
  1562. skb->next = NULL;
  1563. }
  1564. /* Iterate through singly-linked GSO fragments of an skb. */
  1565. #define skb_list_walk_safe(first, skb, next_skb) \
  1566. for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \
  1567. (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL)
  1568. static inline void skb_list_del_init(struct sk_buff *skb)
  1569. {
  1570. __list_del_entry(&skb->list);
  1571. skb_mark_not_on_list(skb);
  1572. }
  1573. /**
  1574. * skb_queue_empty - check if a queue is empty
  1575. * @list: queue head
  1576. *
  1577. * Returns true if the queue is empty, false otherwise.
  1578. */
  1579. static inline int skb_queue_empty(const struct sk_buff_head *list)
  1580. {
  1581. return list->next == (const struct sk_buff *) list;
  1582. }
  1583. /**
  1584. * skb_queue_empty_lockless - check if a queue is empty
  1585. * @list: queue head
  1586. *
  1587. * Returns true if the queue is empty, false otherwise.
  1588. * This variant can be used in lockless contexts.
  1589. */
  1590. static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list)
  1591. {
  1592. return READ_ONCE(list->next) == (const struct sk_buff *) list;
  1593. }
  1594. /**
  1595. * skb_queue_is_last - check if skb is the last entry in the queue
  1596. * @list: queue head
  1597. * @skb: buffer
  1598. *
  1599. * Returns true if @skb is the last buffer on the list.
  1600. */
  1601. static inline bool skb_queue_is_last(const struct sk_buff_head *list,
  1602. const struct sk_buff *skb)
  1603. {
  1604. return skb->next == (const struct sk_buff *) list;
  1605. }
  1606. /**
  1607. * skb_queue_is_first - check if skb is the first entry in the queue
  1608. * @list: queue head
  1609. * @skb: buffer
  1610. *
  1611. * Returns true if @skb is the first buffer on the list.
  1612. */
  1613. static inline bool skb_queue_is_first(const struct sk_buff_head *list,
  1614. const struct sk_buff *skb)
  1615. {
  1616. return skb->prev == (const struct sk_buff *) list;
  1617. }
  1618. /**
  1619. * skb_queue_next - return the next packet in the queue
  1620. * @list: queue head
  1621. * @skb: current buffer
  1622. *
  1623. * Return the next packet in @list after @skb. It is only valid to
  1624. * call this if skb_queue_is_last() evaluates to false.
  1625. */
  1626. static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list,
  1627. const struct sk_buff *skb)
  1628. {
  1629. /* This BUG_ON may seem severe, but if we just return then we
  1630. * are going to dereference garbage.
  1631. */
  1632. BUG_ON(skb_queue_is_last(list, skb));
  1633. return skb->next;
  1634. }
  1635. /**
  1636. * skb_queue_prev - return the prev packet in the queue
  1637. * @list: queue head
  1638. * @skb: current buffer
  1639. *
  1640. * Return the prev packet in @list before @skb. It is only valid to
  1641. * call this if skb_queue_is_first() evaluates to false.
  1642. */
  1643. static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
  1644. const struct sk_buff *skb)
  1645. {
  1646. /* This BUG_ON may seem severe, but if we just return then we
  1647. * are going to dereference garbage.
  1648. */
  1649. BUG_ON(skb_queue_is_first(list, skb));
  1650. return skb->prev;
  1651. }
  1652. /**
  1653. * skb_get - reference buffer
  1654. * @skb: buffer to reference
  1655. *
  1656. * Makes another reference to a socket buffer and returns a pointer
  1657. * to the buffer.
  1658. */
  1659. static inline struct sk_buff *skb_get(struct sk_buff *skb)
  1660. {
  1661. refcount_inc(&skb->users);
  1662. return skb;
  1663. }
  1664. /*
  1665. * If users == 1, we are the only owner and can avoid redundant atomic changes.
  1666. */
  1667. /**
  1668. * skb_cloned - is the buffer a clone
  1669. * @skb: buffer to check
  1670. *
  1671. * Returns true if the buffer was generated with skb_clone() and is
  1672. * one of multiple shared copies of the buffer. Cloned buffers are
  1673. * shared data so must not be written to under normal circumstances.
  1674. */
  1675. static inline int skb_cloned(const struct sk_buff *skb)
  1676. {
  1677. return skb->cloned &&
  1678. (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
  1679. }
  1680. static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
  1681. {
  1682. might_sleep_if(gfpflags_allow_blocking(pri));
  1683. if (skb_cloned(skb))
  1684. return pskb_expand_head(skb, 0, 0, pri);
  1685. return 0;
  1686. }
  1687. /* This variant of skb_unclone() makes sure skb->truesize
  1688. * and skb_end_offset() are not changed, whenever a new skb->head is needed.
  1689. *
  1690. * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X))
  1691. * when various debugging features are in place.
  1692. */
  1693. int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri);
  1694. static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
  1695. {
  1696. might_sleep_if(gfpflags_allow_blocking(pri));
  1697. if (skb_cloned(skb))
  1698. return __skb_unclone_keeptruesize(skb, pri);
  1699. return 0;
  1700. }
  1701. /**
  1702. * skb_header_cloned - is the header a clone
  1703. * @skb: buffer to check
  1704. *
  1705. * Returns true if modifying the header part of the buffer requires
  1706. * the data to be copied.
  1707. */
  1708. static inline int skb_header_cloned(const struct sk_buff *skb)
  1709. {
  1710. int dataref;
  1711. if (!skb->cloned)
  1712. return 0;
  1713. dataref = atomic_read(&skb_shinfo(skb)->dataref);
  1714. dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT);
  1715. return dataref != 1;
  1716. }
  1717. static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri)
  1718. {
  1719. might_sleep_if(gfpflags_allow_blocking(pri));
  1720. if (skb_header_cloned(skb))
  1721. return pskb_expand_head(skb, 0, 0, pri);
  1722. return 0;
  1723. }
  1724. /**
  1725. * __skb_header_release() - allow clones to use the headroom
  1726. * @skb: buffer to operate on
  1727. *
  1728. * See "DOC: dataref and headerless skbs".
  1729. */
  1730. static inline void __skb_header_release(struct sk_buff *skb)
  1731. {
  1732. skb->nohdr = 1;
  1733. atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT));
  1734. }
  1735. /**
  1736. * skb_shared - is the buffer shared
  1737. * @skb: buffer to check
  1738. *
  1739. * Returns true if more than one person has a reference to this
  1740. * buffer.
  1741. */
  1742. static inline int skb_shared(const struct sk_buff *skb)
  1743. {
  1744. return refcount_read(&skb->users) != 1;
  1745. }
  1746. /**
  1747. * skb_share_check - check if buffer is shared and if so clone it
  1748. * @skb: buffer to check
  1749. * @pri: priority for memory allocation
  1750. *
  1751. * If the buffer is shared the buffer is cloned and the old copy
  1752. * drops a reference. A new clone with a single reference is returned.
  1753. * If the buffer is not shared the original buffer is returned. When
  1754. * being called from interrupt status or with spinlocks held pri must
  1755. * be GFP_ATOMIC.
  1756. *
  1757. * NULL is returned on a memory allocation failure.
  1758. */
  1759. static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
  1760. {
  1761. might_sleep_if(gfpflags_allow_blocking(pri));
  1762. if (skb_shared(skb)) {
  1763. struct sk_buff *nskb = skb_clone(skb, pri);
  1764. if (likely(nskb))
  1765. consume_skb(skb);
  1766. else
  1767. kfree_skb(skb);
  1768. skb = nskb;
  1769. }
  1770. return skb;
  1771. }
  1772. /*
  1773. * Copy shared buffers into a new sk_buff. We effectively do COW on
  1774. * packets to handle cases where we have a local reader and forward
  1775. * and a couple of other messy ones. The normal one is tcpdumping
  1776. * a packet thats being forwarded.
  1777. */
  1778. /**
  1779. * skb_unshare - make a copy of a shared buffer
  1780. * @skb: buffer to check
  1781. * @pri: priority for memory allocation
  1782. *
  1783. * If the socket buffer is a clone then this function creates a new
  1784. * copy of the data, drops a reference count on the old copy and returns
  1785. * the new copy with the reference count at 1. If the buffer is not a clone
  1786. * the original buffer is returned. When called with a spinlock held or
  1787. * from interrupt state @pri must be %GFP_ATOMIC
  1788. *
  1789. * %NULL is returned on a memory allocation failure.
  1790. */
  1791. static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
  1792. gfp_t pri)
  1793. {
  1794. might_sleep_if(gfpflags_allow_blocking(pri));
  1795. if (skb_cloned(skb)) {
  1796. struct sk_buff *nskb = skb_copy(skb, pri);
  1797. /* Free our shared copy */
  1798. if (likely(nskb))
  1799. consume_skb(skb);
  1800. else
  1801. kfree_skb(skb);
  1802. skb = nskb;
  1803. }
  1804. return skb;
  1805. }
  1806. /**
  1807. * skb_peek - peek at the head of an &sk_buff_head
  1808. * @list_: list to peek at
  1809. *
  1810. * Peek an &sk_buff. Unlike most other operations you _MUST_
  1811. * be careful with this one. A peek leaves the buffer on the
  1812. * list and someone else may run off with it. You must hold
  1813. * the appropriate locks or have a private queue to do this.
  1814. *
  1815. * Returns %NULL for an empty list or a pointer to the head element.
  1816. * The reference count is not incremented and the reference is therefore
  1817. * volatile. Use with caution.
  1818. */
  1819. static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
  1820. {
  1821. struct sk_buff *skb = list_->next;
  1822. if (skb == (struct sk_buff *)list_)
  1823. skb = NULL;
  1824. return skb;
  1825. }
  1826. /**
  1827. * __skb_peek - peek at the head of a non-empty &sk_buff_head
  1828. * @list_: list to peek at
  1829. *
  1830. * Like skb_peek(), but the caller knows that the list is not empty.
  1831. */
  1832. static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_)
  1833. {
  1834. return list_->next;
  1835. }
  1836. /**
  1837. * skb_peek_next - peek skb following the given one from a queue
  1838. * @skb: skb to start from
  1839. * @list_: list to peek at
  1840. *
  1841. * Returns %NULL when the end of the list is met or a pointer to the
  1842. * next element. The reference count is not incremented and the
  1843. * reference is therefore volatile. Use with caution.
  1844. */
  1845. static inline struct sk_buff *skb_peek_next(struct sk_buff *skb,
  1846. const struct sk_buff_head *list_)
  1847. {
  1848. struct sk_buff *next = skb->next;
  1849. if (next == (struct sk_buff *)list_)
  1850. next = NULL;
  1851. return next;
  1852. }
  1853. /**
  1854. * skb_peek_tail - peek at the tail of an &sk_buff_head
  1855. * @list_: list to peek at
  1856. *
  1857. * Peek an &sk_buff. Unlike most other operations you _MUST_
  1858. * be careful with this one. A peek leaves the buffer on the
  1859. * list and someone else may run off with it. You must hold
  1860. * the appropriate locks or have a private queue to do this.
  1861. *
  1862. * Returns %NULL for an empty list or a pointer to the tail element.
  1863. * The reference count is not incremented and the reference is therefore
  1864. * volatile. Use with caution.
  1865. */
  1866. static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_)
  1867. {
  1868. struct sk_buff *skb = READ_ONCE(list_->prev);
  1869. if (skb == (struct sk_buff *)list_)
  1870. skb = NULL;
  1871. return skb;
  1872. }
  1873. /**
  1874. * skb_queue_len - get queue length
  1875. * @list_: list to measure
  1876. *
  1877. * Return the length of an &sk_buff queue.
  1878. */
  1879. static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
  1880. {
  1881. return list_->qlen;
  1882. }
  1883. /**
  1884. * skb_queue_len_lockless - get queue length
  1885. * @list_: list to measure
  1886. *
  1887. * Return the length of an &sk_buff queue.
  1888. * This variant can be used in lockless contexts.
  1889. */
  1890. static inline __u32 skb_queue_len_lockless(const struct sk_buff_head *list_)
  1891. {
  1892. return READ_ONCE(list_->qlen);
  1893. }
  1894. /**
  1895. * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head
  1896. * @list: queue to initialize
  1897. *
  1898. * This initializes only the list and queue length aspects of
  1899. * an sk_buff_head object. This allows to initialize the list
  1900. * aspects of an sk_buff_head without reinitializing things like
  1901. * the spinlock. It can also be used for on-stack sk_buff_head
  1902. * objects where the spinlock is known to not be used.
  1903. */
  1904. static inline void __skb_queue_head_init(struct sk_buff_head *list)
  1905. {
  1906. list->prev = list->next = (struct sk_buff *)list;
  1907. list->qlen = 0;
  1908. }
  1909. /*
  1910. * This function creates a split out lock class for each invocation;
  1911. * this is needed for now since a whole lot of users of the skb-queue
  1912. * infrastructure in drivers have different locking usage (in hardirq)
  1913. * than the networking core (in softirq only). In the long run either the
  1914. * network layer or drivers should need annotation to consolidate the
  1915. * main types of usage into 3 classes.
  1916. */
  1917. static inline void skb_queue_head_init(struct sk_buff_head *list)
  1918. {
  1919. spin_lock_init(&list->lock);
  1920. __skb_queue_head_init(list);
  1921. }
  1922. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  1923. struct lock_class_key *class)
  1924. {
  1925. skb_queue_head_init(list);
  1926. lockdep_set_class(&list->lock, class);
  1927. }
  1928. /*
  1929. * Insert an sk_buff on a list.
  1930. *
  1931. * The "__skb_xxxx()" functions are the non-atomic ones that
  1932. * can only be called with interrupts disabled.
  1933. */
  1934. static inline void __skb_insert(struct sk_buff *newsk,
  1935. struct sk_buff *prev, struct sk_buff *next,
  1936. struct sk_buff_head *list)
  1937. {
  1938. /* See skb_queue_empty_lockless() and skb_peek_tail()
  1939. * for the opposite READ_ONCE()
  1940. */
  1941. WRITE_ONCE(newsk->next, next);
  1942. WRITE_ONCE(newsk->prev, prev);
  1943. WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk);
  1944. WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk);
  1945. WRITE_ONCE(list->qlen, list->qlen + 1);
  1946. }
  1947. static inline void __skb_queue_splice(const struct sk_buff_head *list,
  1948. struct sk_buff *prev,
  1949. struct sk_buff *next)
  1950. {
  1951. struct sk_buff *first = list->next;
  1952. struct sk_buff *last = list->prev;
  1953. WRITE_ONCE(first->prev, prev);
  1954. WRITE_ONCE(prev->next, first);
  1955. WRITE_ONCE(last->next, next);
  1956. WRITE_ONCE(next->prev, last);
  1957. }
  1958. /**
  1959. * skb_queue_splice - join two skb lists, this is designed for stacks
  1960. * @list: the new list to add
  1961. * @head: the place to add it in the first list
  1962. */
  1963. static inline void skb_queue_splice(const struct sk_buff_head *list,
  1964. struct sk_buff_head *head)
  1965. {
  1966. if (!skb_queue_empty(list)) {
  1967. __skb_queue_splice(list, (struct sk_buff *) head, head->next);
  1968. head->qlen += list->qlen;
  1969. }
  1970. }
  1971. /**
  1972. * skb_queue_splice_init - join two skb lists and reinitialise the emptied list
  1973. * @list: the new list to add
  1974. * @head: the place to add it in the first list
  1975. *
  1976. * The list at @list is reinitialised
  1977. */
  1978. static inline void skb_queue_splice_init(struct sk_buff_head *list,
  1979. struct sk_buff_head *head)
  1980. {
  1981. if (!skb_queue_empty(list)) {
  1982. __skb_queue_splice(list, (struct sk_buff *) head, head->next);
  1983. head->qlen += list->qlen;
  1984. __skb_queue_head_init(list);
  1985. }
  1986. }
  1987. /**
  1988. * skb_queue_splice_tail - join two skb lists, each list being a queue
  1989. * @list: the new list to add
  1990. * @head: the place to add it in the first list
  1991. */
  1992. static inline void skb_queue_splice_tail(const struct sk_buff_head *list,
  1993. struct sk_buff_head *head)
  1994. {
  1995. if (!skb_queue_empty(list)) {
  1996. __skb_queue_splice(list, head->prev, (struct sk_buff *) head);
  1997. head->qlen += list->qlen;
  1998. }
  1999. }
  2000. /**
  2001. * skb_queue_splice_tail_init - join two skb lists and reinitialise the emptied list
  2002. * @list: the new list to add
  2003. * @head: the place to add it in the first list
  2004. *
  2005. * Each of the lists is a queue.
  2006. * The list at @list is reinitialised
  2007. */
  2008. static inline void skb_queue_splice_tail_init(struct sk_buff_head *list,
  2009. struct sk_buff_head *head)
  2010. {
  2011. if (!skb_queue_empty(list)) {
  2012. __skb_queue_splice(list, head->prev, (struct sk_buff *) head);
  2013. head->qlen += list->qlen;
  2014. __skb_queue_head_init(list);
  2015. }
  2016. }
  2017. /**
  2018. * __skb_queue_after - queue a buffer at the list head
  2019. * @list: list to use
  2020. * @prev: place after this buffer
  2021. * @newsk: buffer to queue
  2022. *
  2023. * Queue a buffer int the middle of a list. This function takes no locks
  2024. * and you must therefore hold required locks before calling it.
  2025. *
  2026. * A buffer cannot be placed on two lists at the same time.
  2027. */
  2028. static inline void __skb_queue_after(struct sk_buff_head *list,
  2029. struct sk_buff *prev,
  2030. struct sk_buff *newsk)
  2031. {
  2032. __skb_insert(newsk, prev, ((struct sk_buff_list *)prev)->next, list);
  2033. }
  2034. void skb_append(struct sk_buff *old, struct sk_buff *newsk,
  2035. struct sk_buff_head *list);
  2036. static inline void __skb_queue_before(struct sk_buff_head *list,
  2037. struct sk_buff *next,
  2038. struct sk_buff *newsk)
  2039. {
  2040. __skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list);
  2041. }
  2042. /**
  2043. * __skb_queue_head - queue a buffer at the list head
  2044. * @list: list to use
  2045. * @newsk: buffer to queue
  2046. *
  2047. * Queue a buffer at the start of a list. This function takes no locks
  2048. * and you must therefore hold required locks before calling it.
  2049. *
  2050. * A buffer cannot be placed on two lists at the same time.
  2051. */
  2052. static inline void __skb_queue_head(struct sk_buff_head *list,
  2053. struct sk_buff *newsk)
  2054. {
  2055. __skb_queue_after(list, (struct sk_buff *)list, newsk);
  2056. }
  2057. void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
  2058. /**
  2059. * __skb_queue_tail - queue a buffer at the list tail
  2060. * @list: list to use
  2061. * @newsk: buffer to queue
  2062. *
  2063. * Queue a buffer at the end of a list. This function takes no locks
  2064. * and you must therefore hold required locks before calling it.
  2065. *
  2066. * A buffer cannot be placed on two lists at the same time.
  2067. */
  2068. static inline void __skb_queue_tail(struct sk_buff_head *list,
  2069. struct sk_buff *newsk)
  2070. {
  2071. __skb_queue_before(list, (struct sk_buff *)list, newsk);
  2072. }
  2073. void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
  2074. /*
  2075. * remove sk_buff from list. _Must_ be called atomically, and with
  2076. * the list known..
  2077. */
  2078. void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
  2079. static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  2080. {
  2081. struct sk_buff *next, *prev;
  2082. WRITE_ONCE(list->qlen, list->qlen - 1);
  2083. next = skb->next;
  2084. prev = skb->prev;
  2085. skb->next = skb->prev = NULL;
  2086. WRITE_ONCE(next->prev, prev);
  2087. WRITE_ONCE(prev->next, next);
  2088. }
  2089. /**
  2090. * __skb_dequeue - remove from the head of the queue
  2091. * @list: list to dequeue from
  2092. *
  2093. * Remove the head of the list. This function does not take any locks
  2094. * so must be used with appropriate locks held only. The head item is
  2095. * returned or %NULL if the list is empty.
  2096. */
  2097. static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  2098. {
  2099. struct sk_buff *skb = skb_peek(list);
  2100. if (skb)
  2101. __skb_unlink(skb, list);
  2102. return skb;
  2103. }
  2104. struct sk_buff *skb_dequeue(struct sk_buff_head *list);
  2105. /**
  2106. * __skb_dequeue_tail - remove from the tail of the queue
  2107. * @list: list to dequeue from
  2108. *
  2109. * Remove the tail of the list. This function does not take any locks
  2110. * so must be used with appropriate locks held only. The tail item is
  2111. * returned or %NULL if the list is empty.
  2112. */
  2113. static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
  2114. {
  2115. struct sk_buff *skb = skb_peek_tail(list);
  2116. if (skb)
  2117. __skb_unlink(skb, list);
  2118. return skb;
  2119. }
  2120. struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
  2121. static inline bool skb_is_nonlinear(const struct sk_buff *skb)
  2122. {
  2123. return skb->data_len;
  2124. }
  2125. static inline unsigned int skb_headlen(const struct sk_buff *skb)
  2126. {
  2127. return skb->len - skb->data_len;
  2128. }
  2129. static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
  2130. {
  2131. unsigned int i, len = 0;
  2132. for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
  2133. len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
  2134. return len;
  2135. }
  2136. static inline unsigned int skb_pagelen(const struct sk_buff *skb)
  2137. {
  2138. return skb_headlen(skb) + __skb_pagelen(skb);
  2139. }
  2140. static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
  2141. int i, struct page *page,
  2142. int off, int size)
  2143. {
  2144. skb_frag_t *frag = &shinfo->frags[i];
  2145. /*
  2146. * Propagate page pfmemalloc to the skb if we can. The problem is
  2147. * that not all callers have unique ownership of the page but rely
  2148. * on page_is_pfmemalloc doing the right thing(tm).
  2149. */
  2150. frag->bv_page = page;
  2151. frag->bv_offset = off;
  2152. skb_frag_size_set(frag, size);
  2153. }
  2154. /**
  2155. * skb_len_add - adds a number to len fields of skb
  2156. * @skb: buffer to add len to
  2157. * @delta: number of bytes to add
  2158. */
  2159. static inline void skb_len_add(struct sk_buff *skb, int delta)
  2160. {
  2161. skb->len += delta;
  2162. skb->data_len += delta;
  2163. skb->truesize += delta;
  2164. }
  2165. /**
  2166. * __skb_fill_page_desc - initialise a paged fragment in an skb
  2167. * @skb: buffer containing fragment to be initialised
  2168. * @i: paged fragment index to initialise
  2169. * @page: the page to use for this fragment
  2170. * @off: the offset to the data with @page
  2171. * @size: the length of the data
  2172. *
  2173. * Initialises the @i'th fragment of @skb to point to &size bytes at
  2174. * offset @off within @page.
  2175. *
  2176. * Does not take any additional reference on the fragment.
  2177. */
  2178. static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
  2179. struct page *page, int off, int size)
  2180. {
  2181. __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
  2182. page = compound_head(page);
  2183. if (page_is_pfmemalloc(page))
  2184. skb->pfmemalloc = true;
  2185. }
  2186. /**
  2187. * skb_fill_page_desc - initialise a paged fragment in an skb
  2188. * @skb: buffer containing fragment to be initialised
  2189. * @i: paged fragment index to initialise
  2190. * @page: the page to use for this fragment
  2191. * @off: the offset to the data with @page
  2192. * @size: the length of the data
  2193. *
  2194. * As per __skb_fill_page_desc() -- initialises the @i'th fragment of
  2195. * @skb to point to @size bytes at offset @off within @page. In
  2196. * addition updates @skb such that @i is the last fragment.
  2197. *
  2198. * Does not take any additional reference on the fragment.
  2199. */
  2200. static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
  2201. struct page *page, int off, int size)
  2202. {
  2203. __skb_fill_page_desc(skb, i, page, off, size);
  2204. skb_shinfo(skb)->nr_frags = i + 1;
  2205. }
  2206. /**
  2207. * skb_fill_page_desc_noacc - initialise a paged fragment in an skb
  2208. * @skb: buffer containing fragment to be initialised
  2209. * @i: paged fragment index to initialise
  2210. * @page: the page to use for this fragment
  2211. * @off: the offset to the data with @page
  2212. * @size: the length of the data
  2213. *
  2214. * Variant of skb_fill_page_desc() which does not deal with
  2215. * pfmemalloc, if page is not owned by us.
  2216. */
  2217. static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
  2218. struct page *page, int off,
  2219. int size)
  2220. {
  2221. struct skb_shared_info *shinfo = skb_shinfo(skb);
  2222. __skb_fill_page_desc_noacc(shinfo, i, page, off, size);
  2223. shinfo->nr_frags = i + 1;
  2224. }
  2225. void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
  2226. int size, unsigned int truesize);
  2227. void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
  2228. unsigned int truesize);
  2229. #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
  2230. #ifdef NET_SKBUFF_DATA_USES_OFFSET
  2231. static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
  2232. {
  2233. return skb->head + skb->tail;
  2234. }
  2235. static inline void skb_reset_tail_pointer(struct sk_buff *skb)
  2236. {
  2237. skb->tail = skb->data - skb->head;
  2238. }
  2239. static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
  2240. {
  2241. skb_reset_tail_pointer(skb);
  2242. skb->tail += offset;
  2243. }
  2244. #else /* NET_SKBUFF_DATA_USES_OFFSET */
  2245. static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
  2246. {
  2247. return skb->tail;
  2248. }
  2249. static inline void skb_reset_tail_pointer(struct sk_buff *skb)
  2250. {
  2251. skb->tail = skb->data;
  2252. }
  2253. static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
  2254. {
  2255. skb->tail = skb->data + offset;
  2256. }
  2257. #endif /* NET_SKBUFF_DATA_USES_OFFSET */
  2258. static inline void skb_assert_len(struct sk_buff *skb)
  2259. {
  2260. #ifdef CONFIG_DEBUG_NET
  2261. if (WARN_ONCE(!skb->len, "%s\n", __func__))
  2262. DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
  2263. #endif /* CONFIG_DEBUG_NET */
  2264. }
  2265. /*
  2266. * Add data to an sk_buff
  2267. */
  2268. void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
  2269. void *skb_put(struct sk_buff *skb, unsigned int len);
  2270. static inline void *__skb_put(struct sk_buff *skb, unsigned int len)
  2271. {
  2272. void *tmp = skb_tail_pointer(skb);
  2273. SKB_LINEAR_ASSERT(skb);
  2274. skb->tail += len;
  2275. skb->len += len;
  2276. return tmp;
  2277. }
  2278. static inline void *__skb_put_zero(struct sk_buff *skb, unsigned int len)
  2279. {
  2280. void *tmp = __skb_put(skb, len);
  2281. memset(tmp, 0, len);
  2282. return tmp;
  2283. }
  2284. static inline void *__skb_put_data(struct sk_buff *skb, const void *data,
  2285. unsigned int len)
  2286. {
  2287. void *tmp = __skb_put(skb, len);
  2288. memcpy(tmp, data, len);
  2289. return tmp;
  2290. }
  2291. static inline void __skb_put_u8(struct sk_buff *skb, u8 val)
  2292. {
  2293. *(u8 *)__skb_put(skb, 1) = val;
  2294. }
  2295. static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len)
  2296. {
  2297. void *tmp = skb_put(skb, len);
  2298. memset(tmp, 0, len);
  2299. return tmp;
  2300. }
  2301. static inline void *skb_put_data(struct sk_buff *skb, const void *data,
  2302. unsigned int len)
  2303. {
  2304. void *tmp = skb_put(skb, len);
  2305. memcpy(tmp, data, len);
  2306. return tmp;
  2307. }
  2308. static inline void skb_put_u8(struct sk_buff *skb, u8 val)
  2309. {
  2310. *(u8 *)skb_put(skb, 1) = val;
  2311. }
  2312. void *skb_push(struct sk_buff *skb, unsigned int len);
  2313. static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
  2314. {
  2315. skb->data -= len;
  2316. skb->len += len;
  2317. return skb->data;
  2318. }
  2319. void *skb_pull(struct sk_buff *skb, unsigned int len);
  2320. static inline void *__skb_pull(struct sk_buff *skb, unsigned int len)
  2321. {
  2322. skb->len -= len;
  2323. if (unlikely(skb->len < skb->data_len)) {
  2324. #if defined(CONFIG_DEBUG_NET)
  2325. skb->len += len;
  2326. pr_err("__skb_pull(len=%u)\n", len);
  2327. skb_dump(KERN_ERR, skb, false);
  2328. #endif
  2329. BUG();
  2330. }
  2331. return skb->data += len;
  2332. }
  2333. static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len)
  2334. {
  2335. return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
  2336. }
  2337. void *skb_pull_data(struct sk_buff *skb, size_t len);
  2338. void *__pskb_pull_tail(struct sk_buff *skb, int delta);
  2339. static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len)
  2340. {
  2341. if (likely(len <= skb_headlen(skb)))
  2342. return true;
  2343. if (unlikely(len > skb->len))
  2344. return false;
  2345. return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
  2346. }
  2347. static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
  2348. {
  2349. if (!pskb_may_pull(skb, len))
  2350. return NULL;
  2351. skb->len -= len;
  2352. return skb->data += len;
  2353. }
  2354. void skb_condense(struct sk_buff *skb);
  2355. /**
  2356. * skb_headroom - bytes at buffer head
  2357. * @skb: buffer to check
  2358. *
  2359. * Return the number of bytes of free space at the head of an &sk_buff.
  2360. */
  2361. static inline unsigned int skb_headroom(const struct sk_buff *skb)
  2362. {
  2363. return skb->data - skb->head;
  2364. }
  2365. /**
  2366. * skb_tailroom - bytes at buffer end
  2367. * @skb: buffer to check
  2368. *
  2369. * Return the number of bytes of free space at the tail of an sk_buff
  2370. */
  2371. static inline int skb_tailroom(const struct sk_buff *skb)
  2372. {
  2373. return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
  2374. }
  2375. /**
  2376. * skb_availroom - bytes at buffer end
  2377. * @skb: buffer to check
  2378. *
  2379. * Return the number of bytes of free space at the tail of an sk_buff
  2380. * allocated by sk_stream_alloc()
  2381. */
  2382. static inline int skb_availroom(const struct sk_buff *skb)
  2383. {
  2384. if (skb_is_nonlinear(skb))
  2385. return 0;
  2386. return skb->end - skb->tail - skb->reserved_tailroom;
  2387. }
  2388. /**
  2389. * skb_reserve - adjust headroom
  2390. * @skb: buffer to alter
  2391. * @len: bytes to move
  2392. *
  2393. * Increase the headroom of an empty &sk_buff by reducing the tail
  2394. * room. This is only allowed for an empty buffer.
  2395. */
  2396. static inline void skb_reserve(struct sk_buff *skb, int len)
  2397. {
  2398. skb->data += len;
  2399. skb->tail += len;
  2400. }
  2401. /**
  2402. * skb_tailroom_reserve - adjust reserved_tailroom
  2403. * @skb: buffer to alter
  2404. * @mtu: maximum amount of headlen permitted
  2405. * @needed_tailroom: minimum amount of reserved_tailroom
  2406. *
  2407. * Set reserved_tailroom so that headlen can be as large as possible but
  2408. * not larger than mtu and tailroom cannot be smaller than
  2409. * needed_tailroom.
  2410. * The required headroom should already have been reserved before using
  2411. * this function.
  2412. */
  2413. static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu,
  2414. unsigned int needed_tailroom)
  2415. {
  2416. SKB_LINEAR_ASSERT(skb);
  2417. if (mtu < skb_tailroom(skb) - needed_tailroom)
  2418. /* use at most mtu */
  2419. skb->reserved_tailroom = skb_tailroom(skb) - mtu;
  2420. else
  2421. /* use up to all available space */
  2422. skb->reserved_tailroom = needed_tailroom;
  2423. }
  2424. #define ENCAP_TYPE_ETHER 0
  2425. #define ENCAP_TYPE_IPPROTO 1
  2426. static inline void skb_set_inner_protocol(struct sk_buff *skb,
  2427. __be16 protocol)
  2428. {
  2429. skb->inner_protocol = protocol;
  2430. skb->inner_protocol_type = ENCAP_TYPE_ETHER;
  2431. }
  2432. static inline void skb_set_inner_ipproto(struct sk_buff *skb,
  2433. __u8 ipproto)
  2434. {
  2435. skb->inner_ipproto = ipproto;
  2436. skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
  2437. }
  2438. static inline void skb_reset_inner_headers(struct sk_buff *skb)
  2439. {
  2440. skb->inner_mac_header = skb->mac_header;
  2441. skb->inner_network_header = skb->network_header;
  2442. skb->inner_transport_header = skb->transport_header;
  2443. }
  2444. static inline void skb_reset_mac_len(struct sk_buff *skb)
  2445. {
  2446. skb->mac_len = skb->network_header - skb->mac_header;
  2447. }
  2448. static inline unsigned char *skb_inner_transport_header(const struct sk_buff
  2449. *skb)
  2450. {
  2451. return skb->head + skb->inner_transport_header;
  2452. }
  2453. static inline int skb_inner_transport_offset(const struct sk_buff *skb)
  2454. {
  2455. return skb_inner_transport_header(skb) - skb->data;
  2456. }
  2457. static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
  2458. {
  2459. skb->inner_transport_header = skb->data - skb->head;
  2460. }
  2461. static inline void skb_set_inner_transport_header(struct sk_buff *skb,
  2462. const int offset)
  2463. {
  2464. skb_reset_inner_transport_header(skb);
  2465. skb->inner_transport_header += offset;
  2466. }
  2467. static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
  2468. {
  2469. return skb->head + skb->inner_network_header;
  2470. }
  2471. static inline void skb_reset_inner_network_header(struct sk_buff *skb)
  2472. {
  2473. skb->inner_network_header = skb->data - skb->head;
  2474. }
  2475. static inline void skb_set_inner_network_header(struct sk_buff *skb,
  2476. const int offset)
  2477. {
  2478. skb_reset_inner_network_header(skb);
  2479. skb->inner_network_header += offset;
  2480. }
  2481. static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
  2482. {
  2483. return skb->head + skb->inner_mac_header;
  2484. }
  2485. static inline void skb_reset_inner_mac_header(struct sk_buff *skb)
  2486. {
  2487. skb->inner_mac_header = skb->data - skb->head;
  2488. }
  2489. static inline void skb_set_inner_mac_header(struct sk_buff *skb,
  2490. const int offset)
  2491. {
  2492. skb_reset_inner_mac_header(skb);
  2493. skb->inner_mac_header += offset;
  2494. }
  2495. static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
  2496. {
  2497. return skb->transport_header != (typeof(skb->transport_header))~0U;
  2498. }
  2499. static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
  2500. {
  2501. DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb));
  2502. return skb->head + skb->transport_header;
  2503. }
  2504. static inline void skb_reset_transport_header(struct sk_buff *skb)
  2505. {
  2506. skb->transport_header = skb->data - skb->head;
  2507. }
  2508. static inline void skb_set_transport_header(struct sk_buff *skb,
  2509. const int offset)
  2510. {
  2511. skb_reset_transport_header(skb);
  2512. skb->transport_header += offset;
  2513. }
  2514. static inline unsigned char *skb_network_header(const struct sk_buff *skb)
  2515. {
  2516. return skb->head + skb->network_header;
  2517. }
  2518. static inline void skb_reset_network_header(struct sk_buff *skb)
  2519. {
  2520. skb->network_header = skb->data - skb->head;
  2521. }
  2522. static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
  2523. {
  2524. skb_reset_network_header(skb);
  2525. skb->network_header += offset;
  2526. }
  2527. static inline int skb_mac_header_was_set(const struct sk_buff *skb)
  2528. {
  2529. return skb->mac_header != (typeof(skb->mac_header))~0U;
  2530. }
  2531. static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
  2532. {
  2533. DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
  2534. return skb->head + skb->mac_header;
  2535. }
  2536. static inline int skb_mac_offset(const struct sk_buff *skb)
  2537. {
  2538. return skb_mac_header(skb) - skb->data;
  2539. }
  2540. static inline u32 skb_mac_header_len(const struct sk_buff *skb)
  2541. {
  2542. DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
  2543. return skb->network_header - skb->mac_header;
  2544. }
  2545. static inline void skb_unset_mac_header(struct sk_buff *skb)
  2546. {
  2547. skb->mac_header = (typeof(skb->mac_header))~0U;
  2548. }
  2549. static inline void skb_reset_mac_header(struct sk_buff *skb)
  2550. {
  2551. skb->mac_header = skb->data - skb->head;
  2552. }
  2553. static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
  2554. {
  2555. skb_reset_mac_header(skb);
  2556. skb->mac_header += offset;
  2557. }
  2558. static inline void skb_pop_mac_header(struct sk_buff *skb)
  2559. {
  2560. skb->mac_header = skb->network_header;
  2561. }
  2562. static inline void skb_probe_transport_header(struct sk_buff *skb)
  2563. {
  2564. struct flow_keys_basic keys;
  2565. if (skb_transport_header_was_set(skb))
  2566. return;
  2567. if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
  2568. NULL, 0, 0, 0, 0))
  2569. skb_set_transport_header(skb, keys.control.thoff);
  2570. }
  2571. static inline void skb_mac_header_rebuild(struct sk_buff *skb)
  2572. {
  2573. if (skb_mac_header_was_set(skb)) {
  2574. const unsigned char *old_mac = skb_mac_header(skb);
  2575. skb_set_mac_header(skb, -skb->mac_len);
  2576. memmove(skb_mac_header(skb), old_mac, skb->mac_len);
  2577. }
  2578. }
  2579. static inline int skb_checksum_start_offset(const struct sk_buff *skb)
  2580. {
  2581. return skb->csum_start - skb_headroom(skb);
  2582. }
  2583. static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
  2584. {
  2585. return skb->head + skb->csum_start;
  2586. }
  2587. static inline int skb_transport_offset(const struct sk_buff *skb)
  2588. {
  2589. return skb_transport_header(skb) - skb->data;
  2590. }
  2591. static inline u32 skb_network_header_len(const struct sk_buff *skb)
  2592. {
  2593. return skb->transport_header - skb->network_header;
  2594. }
  2595. static inline u32 skb_inner_network_header_len(const struct sk_buff *skb)
  2596. {
  2597. return skb->inner_transport_header - skb->inner_network_header;
  2598. }
  2599. static inline int skb_network_offset(const struct sk_buff *skb)
  2600. {
  2601. return skb_network_header(skb) - skb->data;
  2602. }
  2603. static inline int skb_inner_network_offset(const struct sk_buff *skb)
  2604. {
  2605. return skb_inner_network_header(skb) - skb->data;
  2606. }
  2607. static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
  2608. {
  2609. return pskb_may_pull(skb, skb_network_offset(skb) + len);
  2610. }
  2611. /*
  2612. * CPUs often take a performance hit when accessing unaligned memory
  2613. * locations. The actual performance hit varies, it can be small if the
  2614. * hardware handles it or large if we have to take an exception and fix it
  2615. * in software.
  2616. *
  2617. * Since an ethernet header is 14 bytes network drivers often end up with
  2618. * the IP header at an unaligned offset. The IP header can be aligned by
  2619. * shifting the start of the packet by 2 bytes. Drivers should do this
  2620. * with:
  2621. *
  2622. * skb_reserve(skb, NET_IP_ALIGN);
  2623. *
  2624. * The downside to this alignment of the IP header is that the DMA is now
  2625. * unaligned. On some architectures the cost of an unaligned DMA is high
  2626. * and this cost outweighs the gains made by aligning the IP header.
  2627. *
  2628. * Since this trade off varies between architectures, we allow NET_IP_ALIGN
  2629. * to be overridden.
  2630. */
  2631. #ifndef NET_IP_ALIGN
  2632. #define NET_IP_ALIGN 2
  2633. #endif
  2634. /*
  2635. * The networking layer reserves some headroom in skb data (via
  2636. * dev_alloc_skb). This is used to avoid having to reallocate skb data when
  2637. * the header has to grow. In the default case, if the header has to grow
  2638. * 32 bytes or less we avoid the reallocation.
  2639. *
  2640. * Unfortunately this headroom changes the DMA alignment of the resulting
  2641. * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
  2642. * on some architectures. An architecture can override this value,
  2643. * perhaps setting it to a cacheline in size (since that will maintain
  2644. * cacheline alignment of the DMA). It must be a power of 2.
  2645. *
  2646. * Various parts of the networking layer expect at least 32 bytes of
  2647. * headroom, you should not reduce this.
  2648. *
  2649. * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS)
  2650. * to reduce average number of cache lines per packet.
  2651. * get_rps_cpu() for example only access one 64 bytes aligned block :
  2652. * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
  2653. */
  2654. #ifndef NET_SKB_PAD
  2655. #define NET_SKB_PAD max(32, L1_CACHE_BYTES)
  2656. #endif
  2657. int ___pskb_trim(struct sk_buff *skb, unsigned int len);
  2658. static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
  2659. {
  2660. if (WARN_ON(skb_is_nonlinear(skb)))
  2661. return;
  2662. skb->len = len;
  2663. skb_set_tail_pointer(skb, len);
  2664. }
  2665. static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  2666. {
  2667. __skb_set_length(skb, len);
  2668. }
  2669. void skb_trim(struct sk_buff *skb, unsigned int len);
  2670. static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
  2671. {
  2672. if (skb->data_len)
  2673. return ___pskb_trim(skb, len);
  2674. __skb_trim(skb, len);
  2675. return 0;
  2676. }
  2677. static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
  2678. {
  2679. return (len < skb->len) ? __pskb_trim(skb, len) : 0;
  2680. }
  2681. /**
  2682. * pskb_trim_unique - remove end from a paged unique (not cloned) buffer
  2683. * @skb: buffer to alter
  2684. * @len: new length
  2685. *
  2686. * This is identical to pskb_trim except that the caller knows that
  2687. * the skb is not cloned so we should never get an error due to out-
  2688. * of-memory.
  2689. */
  2690. static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
  2691. {
  2692. int err = pskb_trim(skb, len);
  2693. BUG_ON(err);
  2694. }
  2695. static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
  2696. {
  2697. unsigned int diff = len - skb->len;
  2698. if (skb_tailroom(skb) < diff) {
  2699. int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb),
  2700. GFP_ATOMIC);
  2701. if (ret)
  2702. return ret;
  2703. }
  2704. __skb_set_length(skb, len);
  2705. return 0;
  2706. }
  2707. /**
  2708. * skb_orphan - orphan a buffer
  2709. * @skb: buffer to orphan
  2710. *
  2711. * If a buffer currently has an owner then we call the owner's
  2712. * destructor function and make the @skb unowned. The buffer continues
  2713. * to exist but is no longer charged to its former owner.
  2714. */
  2715. static inline void skb_orphan(struct sk_buff *skb)
  2716. {
  2717. if (skb->destructor) {
  2718. skb->destructor(skb);
  2719. skb->destructor = NULL;
  2720. skb->sk = NULL;
  2721. } else {
  2722. BUG_ON(skb->sk);
  2723. }
  2724. }
  2725. /**
  2726. * skb_orphan_frags - orphan the frags contained in a buffer
  2727. * @skb: buffer to orphan frags from
  2728. * @gfp_mask: allocation mask for replacement pages
  2729. *
  2730. * For each frag in the SKB which needs a destructor (i.e. has an
  2731. * owner) create a copy of that frag and release the original
  2732. * page by calling the destructor.
  2733. */
  2734. static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
  2735. {
  2736. if (likely(!skb_zcopy(skb)))
  2737. return 0;
  2738. if (skb_shinfo(skb)->flags & SKBFL_DONT_ORPHAN)
  2739. return 0;
  2740. return skb_copy_ubufs(skb, gfp_mask);
  2741. }
  2742. /* Frags must be orphaned, even if refcounted, if skb might loop to rx path */
  2743. static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
  2744. {
  2745. if (likely(!skb_zcopy(skb)))
  2746. return 0;
  2747. return skb_copy_ubufs(skb, gfp_mask);
  2748. }
  2749. /**
  2750. * __skb_queue_purge - empty a list
  2751. * @list: list to empty
  2752. *
  2753. * Delete all buffers on an &sk_buff list. Each buffer is removed from
  2754. * the list and one reference dropped. This function does not take the
  2755. * list lock and the caller must hold the relevant locks to use it.
  2756. */
  2757. static inline void __skb_queue_purge(struct sk_buff_head *list)
  2758. {
  2759. struct sk_buff *skb;
  2760. while ((skb = __skb_dequeue(list)) != NULL)
  2761. kfree_skb(skb);
  2762. }
  2763. void skb_queue_purge(struct sk_buff_head *list);
  2764. unsigned int skb_rbtree_purge(struct rb_root *root);
  2765. void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
  2766. /**
  2767. * netdev_alloc_frag - allocate a page fragment
  2768. * @fragsz: fragment size
  2769. *
  2770. * Allocates a frag from a page for receive buffer.
  2771. * Uses GFP_ATOMIC allocations.
  2772. */
  2773. static inline void *netdev_alloc_frag(unsigned int fragsz)
  2774. {
  2775. return __netdev_alloc_frag_align(fragsz, ~0u);
  2776. }
  2777. static inline void *netdev_alloc_frag_align(unsigned int fragsz,
  2778. unsigned int align)
  2779. {
  2780. WARN_ON_ONCE(!is_power_of_2(align));
  2781. return __netdev_alloc_frag_align(fragsz, -align);
  2782. }
  2783. struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
  2784. gfp_t gfp_mask);
  2785. /**
  2786. * netdev_alloc_skb - allocate an skbuff for rx on a specific device
  2787. * @dev: network device to receive on
  2788. * @length: length to allocate
  2789. *
  2790. * Allocate a new &sk_buff and assign it a usage count of one. The
  2791. * buffer has unspecified headroom built in. Users should allocate
  2792. * the headroom they think they need without accounting for the
  2793. * built in space. The built in space is used for optimisations.
  2794. *
  2795. * %NULL is returned if there is no free memory. Although this function
  2796. * allocates memory it can be called from an interrupt.
  2797. */
  2798. static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
  2799. unsigned int length)
  2800. {
  2801. return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
  2802. }
  2803. /* legacy helper around __netdev_alloc_skb() */
  2804. static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
  2805. gfp_t gfp_mask)
  2806. {
  2807. return __netdev_alloc_skb(NULL, length, gfp_mask);
  2808. }
  2809. /* legacy helper around netdev_alloc_skb() */
  2810. static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  2811. {
  2812. return netdev_alloc_skb(NULL, length);
  2813. }
  2814. static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev,
  2815. unsigned int length, gfp_t gfp)
  2816. {
  2817. struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp);
  2818. if (NET_IP_ALIGN && skb)
  2819. skb_reserve(skb, NET_IP_ALIGN);
  2820. return skb;
  2821. }
  2822. static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
  2823. unsigned int length)
  2824. {
  2825. return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
  2826. }
  2827. static inline void skb_free_frag(void *addr)
  2828. {
  2829. page_frag_free(addr);
  2830. }
  2831. void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
  2832. static inline void *napi_alloc_frag(unsigned int fragsz)
  2833. {
  2834. return __napi_alloc_frag_align(fragsz, ~0u);
  2835. }
  2836. static inline void *napi_alloc_frag_align(unsigned int fragsz,
  2837. unsigned int align)
  2838. {
  2839. WARN_ON_ONCE(!is_power_of_2(align));
  2840. return __napi_alloc_frag_align(fragsz, -align);
  2841. }
  2842. struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
  2843. unsigned int length, gfp_t gfp_mask);
  2844. static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
  2845. unsigned int length)
  2846. {
  2847. return __napi_alloc_skb(napi, length, GFP_ATOMIC);
  2848. }
  2849. void napi_consume_skb(struct sk_buff *skb, int budget);
  2850. void napi_skb_free_stolen_head(struct sk_buff *skb);
  2851. void __kfree_skb_defer(struct sk_buff *skb);
  2852. /**
  2853. * __dev_alloc_pages - allocate page for network Rx
  2854. * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
  2855. * @order: size of the allocation
  2856. *
  2857. * Allocate a new page.
  2858. *
  2859. * %NULL is returned if there is no free memory.
  2860. */
  2861. static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
  2862. unsigned int order)
  2863. {
  2864. /* This piece of code contains several assumptions.
  2865. * 1. This is for device Rx, therefor a cold page is preferred.
  2866. * 2. The expectation is the user wants a compound page.
  2867. * 3. If requesting a order 0 page it will not be compound
  2868. * due to the check to see if order has a value in prep_new_page
  2869. * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
  2870. * code in gfp_to_alloc_flags that should be enforcing this.
  2871. */
  2872. gfp_mask |= __GFP_COMP | __GFP_MEMALLOC;
  2873. return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
  2874. }
  2875. static inline struct page *dev_alloc_pages(unsigned int order)
  2876. {
  2877. return __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, order);
  2878. }
  2879. /**
  2880. * __dev_alloc_page - allocate a page for network Rx
  2881. * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
  2882. *
  2883. * Allocate a new page.
  2884. *
  2885. * %NULL is returned if there is no free memory.
  2886. */
  2887. static inline struct page *__dev_alloc_page(gfp_t gfp_mask)
  2888. {
  2889. return __dev_alloc_pages(gfp_mask, 0);
  2890. }
  2891. static inline struct page *dev_alloc_page(void)
  2892. {
  2893. return dev_alloc_pages(0);
  2894. }
  2895. /**
  2896. * dev_page_is_reusable - check whether a page can be reused for network Rx
  2897. * @page: the page to test
  2898. *
  2899. * A page shouldn't be considered for reusing/recycling if it was allocated
  2900. * under memory pressure or at a distant memory node.
  2901. *
  2902. * Returns false if this page should be returned to page allocator, true
  2903. * otherwise.
  2904. */
  2905. static inline bool dev_page_is_reusable(const struct page *page)
  2906. {
  2907. return likely(page_to_nid(page) == numa_mem_id() &&
  2908. !page_is_pfmemalloc(page));
  2909. }
  2910. /**
  2911. * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
  2912. * @page: The page that was allocated from skb_alloc_page
  2913. * @skb: The skb that may need pfmemalloc set
  2914. */
  2915. static inline void skb_propagate_pfmemalloc(const struct page *page,
  2916. struct sk_buff *skb)
  2917. {
  2918. if (page_is_pfmemalloc(page))
  2919. skb->pfmemalloc = true;
  2920. }
  2921. /**
  2922. * skb_frag_off() - Returns the offset of a skb fragment
  2923. * @frag: the paged fragment
  2924. */
  2925. static inline unsigned int skb_frag_off(const skb_frag_t *frag)
  2926. {
  2927. return frag->bv_offset;
  2928. }
  2929. /**
  2930. * skb_frag_off_add() - Increments the offset of a skb fragment by @delta
  2931. * @frag: skb fragment
  2932. * @delta: value to add
  2933. */
  2934. static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
  2935. {
  2936. frag->bv_offset += delta;
  2937. }
  2938. /**
  2939. * skb_frag_off_set() - Sets the offset of a skb fragment
  2940. * @frag: skb fragment
  2941. * @offset: offset of fragment
  2942. */
  2943. static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
  2944. {
  2945. frag->bv_offset = offset;
  2946. }
  2947. /**
  2948. * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment
  2949. * @fragto: skb fragment where offset is set
  2950. * @fragfrom: skb fragment offset is copied from
  2951. */
  2952. static inline void skb_frag_off_copy(skb_frag_t *fragto,
  2953. const skb_frag_t *fragfrom)
  2954. {
  2955. fragto->bv_offset = fragfrom->bv_offset;
  2956. }
  2957. /**
  2958. * skb_frag_page - retrieve the page referred to by a paged fragment
  2959. * @frag: the paged fragment
  2960. *
  2961. * Returns the &struct page associated with @frag.
  2962. */
  2963. static inline struct page *skb_frag_page(const skb_frag_t *frag)
  2964. {
  2965. return frag->bv_page;
  2966. }
  2967. /**
  2968. * __skb_frag_ref - take an addition reference on a paged fragment.
  2969. * @frag: the paged fragment
  2970. *
  2971. * Takes an additional reference on the paged fragment @frag.
  2972. */
  2973. static inline void __skb_frag_ref(skb_frag_t *frag)
  2974. {
  2975. get_page(skb_frag_page(frag));
  2976. }
  2977. /**
  2978. * skb_frag_ref - take an addition reference on a paged fragment of an skb.
  2979. * @skb: the buffer
  2980. * @f: the fragment offset.
  2981. *
  2982. * Takes an additional reference on the @f'th paged fragment of @skb.
  2983. */
  2984. static inline void skb_frag_ref(struct sk_buff *skb, int f)
  2985. {
  2986. __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
  2987. }
  2988. /**
  2989. * __skb_frag_unref - release a reference on a paged fragment.
  2990. * @frag: the paged fragment
  2991. * @recycle: recycle the page if allocated via page_pool
  2992. *
  2993. * Releases a reference on the paged fragment @frag
  2994. * or recycles the page via the page_pool API.
  2995. */
  2996. static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
  2997. {
  2998. struct page *page = skb_frag_page(frag);
  2999. #ifdef CONFIG_PAGE_POOL
  3000. if (recycle && page_pool_return_skb_page(page))
  3001. return;
  3002. #endif
  3003. put_page(page);
  3004. }
  3005. /**
  3006. * skb_frag_unref - release a reference on a paged fragment of an skb.
  3007. * @skb: the buffer
  3008. * @f: the fragment offset
  3009. *
  3010. * Releases a reference on the @f'th paged fragment of @skb.
  3011. */
  3012. static inline void skb_frag_unref(struct sk_buff *skb, int f)
  3013. {
  3014. struct skb_shared_info *shinfo = skb_shinfo(skb);
  3015. if (!skb_zcopy_managed(skb))
  3016. __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
  3017. }
  3018. /**
  3019. * skb_frag_address - gets the address of the data contained in a paged fragment
  3020. * @frag: the paged fragment buffer
  3021. *
  3022. * Returns the address of the data within @frag. The page must already
  3023. * be mapped.
  3024. */
  3025. static inline void *skb_frag_address(const skb_frag_t *frag)
  3026. {
  3027. return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
  3028. }
  3029. /**
  3030. * skb_frag_address_safe - gets the address of the data contained in a paged fragment
  3031. * @frag: the paged fragment buffer
  3032. *
  3033. * Returns the address of the data within @frag. Checks that the page
  3034. * is mapped and returns %NULL otherwise.
  3035. */
  3036. static inline void *skb_frag_address_safe(const skb_frag_t *frag)
  3037. {
  3038. void *ptr = page_address(skb_frag_page(frag));
  3039. if (unlikely(!ptr))
  3040. return NULL;
  3041. return ptr + skb_frag_off(frag);
  3042. }
  3043. /**
  3044. * skb_frag_page_copy() - sets the page in a fragment from another fragment
  3045. * @fragto: skb fragment where page is set
  3046. * @fragfrom: skb fragment page is copied from
  3047. */
  3048. static inline void skb_frag_page_copy(skb_frag_t *fragto,
  3049. const skb_frag_t *fragfrom)
  3050. {
  3051. fragto->bv_page = fragfrom->bv_page;
  3052. }
  3053. /**
  3054. * __skb_frag_set_page - sets the page contained in a paged fragment
  3055. * @frag: the paged fragment
  3056. * @page: the page to set
  3057. *
  3058. * Sets the fragment @frag to contain @page.
  3059. */
  3060. static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
  3061. {
  3062. frag->bv_page = page;
  3063. }
  3064. /**
  3065. * skb_frag_set_page - sets the page contained in a paged fragment of an skb
  3066. * @skb: the buffer
  3067. * @f: the fragment offset
  3068. * @page: the page to set
  3069. *
  3070. * Sets the @f'th fragment of @skb to contain @page.
  3071. */
  3072. static inline void skb_frag_set_page(struct sk_buff *skb, int f,
  3073. struct page *page)
  3074. {
  3075. __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
  3076. }
  3077. bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
  3078. /**
  3079. * skb_frag_dma_map - maps a paged fragment via the DMA API
  3080. * @dev: the device to map the fragment to
  3081. * @frag: the paged fragment to map
  3082. * @offset: the offset within the fragment (starting at the
  3083. * fragment's own offset)
  3084. * @size: the number of bytes to map
  3085. * @dir: the direction of the mapping (``PCI_DMA_*``)
  3086. *
  3087. * Maps the page associated with @frag to @device.
  3088. */
  3089. static inline dma_addr_t skb_frag_dma_map(struct device *dev,
  3090. const skb_frag_t *frag,
  3091. size_t offset, size_t size,
  3092. enum dma_data_direction dir)
  3093. {
  3094. return dma_map_page(dev, skb_frag_page(frag),
  3095. skb_frag_off(frag) + offset, size, dir);
  3096. }
  3097. static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
  3098. gfp_t gfp_mask)
  3099. {
  3100. return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
  3101. }
  3102. static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb,
  3103. gfp_t gfp_mask)
  3104. {
  3105. return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true);
  3106. }
  3107. /**
  3108. * skb_clone_writable - is the header of a clone writable
  3109. * @skb: buffer to check
  3110. * @len: length up to which to write
  3111. *
  3112. * Returns true if modifying the header part of the cloned buffer
  3113. * does not requires the data to be copied.
  3114. */
  3115. static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len)
  3116. {
  3117. return !skb_header_cloned(skb) &&
  3118. skb_headroom(skb) + len <= skb->hdr_len;
  3119. }
  3120. static inline int skb_try_make_writable(struct sk_buff *skb,
  3121. unsigned int write_len)
  3122. {
  3123. return skb_cloned(skb) && !skb_clone_writable(skb, write_len) &&
  3124. pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
  3125. }
  3126. static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
  3127. int cloned)
  3128. {
  3129. int delta = 0;
  3130. if (headroom > skb_headroom(skb))
  3131. delta = headroom - skb_headroom(skb);
  3132. if (delta || cloned)
  3133. return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0,
  3134. GFP_ATOMIC);
  3135. return 0;
  3136. }
  3137. /**
  3138. * skb_cow - copy header of skb when it is required
  3139. * @skb: buffer to cow
  3140. * @headroom: needed headroom
  3141. *
  3142. * If the skb passed lacks sufficient headroom or its data part
  3143. * is shared, data is reallocated. If reallocation fails, an error
  3144. * is returned and original skb is not changed.
  3145. *
  3146. * The result is skb with writable area skb->head...skb->tail
  3147. * and at least @headroom of space at head.
  3148. */
  3149. static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
  3150. {
  3151. return __skb_cow(skb, headroom, skb_cloned(skb));
  3152. }
  3153. /**
  3154. * skb_cow_head - skb_cow but only making the head writable
  3155. * @skb: buffer to cow
  3156. * @headroom: needed headroom
  3157. *
  3158. * This function is identical to skb_cow except that we replace the
  3159. * skb_cloned check by skb_header_cloned. It should be used when
  3160. * you only need to push on some header and do not need to modify
  3161. * the data.
  3162. */
  3163. static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
  3164. {
  3165. return __skb_cow(skb, headroom, skb_header_cloned(skb));
  3166. }
  3167. /**
  3168. * skb_padto - pad an skbuff up to a minimal size
  3169. * @skb: buffer to pad
  3170. * @len: minimal length
  3171. *
  3172. * Pads up a buffer to ensure the trailing bytes exist and are
  3173. * blanked. If the buffer already contains sufficient data it
  3174. * is untouched. Otherwise it is extended. Returns zero on
  3175. * success. The skb is freed on error.
  3176. */
  3177. static inline int skb_padto(struct sk_buff *skb, unsigned int len)
  3178. {
  3179. unsigned int size = skb->len;
  3180. if (likely(size >= len))
  3181. return 0;
  3182. return skb_pad(skb, len - size);
  3183. }
  3184. /**
  3185. * __skb_put_padto - increase size and pad an skbuff up to a minimal size
  3186. * @skb: buffer to pad
  3187. * @len: minimal length
  3188. * @free_on_error: free buffer on error
  3189. *
  3190. * Pads up a buffer to ensure the trailing bytes exist and are
  3191. * blanked. If the buffer already contains sufficient data it
  3192. * is untouched. Otherwise it is extended. Returns zero on
  3193. * success. The skb is freed on error if @free_on_error is true.
  3194. */
  3195. static inline int __must_check __skb_put_padto(struct sk_buff *skb,
  3196. unsigned int len,
  3197. bool free_on_error)
  3198. {
  3199. unsigned int size = skb->len;
  3200. if (unlikely(size < len)) {
  3201. len -= size;
  3202. if (__skb_pad(skb, len, free_on_error))
  3203. return -ENOMEM;
  3204. __skb_put(skb, len);
  3205. }
  3206. return 0;
  3207. }
  3208. /**
  3209. * skb_put_padto - increase size and pad an skbuff up to a minimal size
  3210. * @skb: buffer to pad
  3211. * @len: minimal length
  3212. *
  3213. * Pads up a buffer to ensure the trailing bytes exist and are
  3214. * blanked. If the buffer already contains sufficient data it
  3215. * is untouched. Otherwise it is extended. Returns zero on
  3216. * success. The skb is freed on error.
  3217. */
  3218. static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len)
  3219. {
  3220. return __skb_put_padto(skb, len, true);
  3221. }
  3222. static inline int skb_add_data(struct sk_buff *skb,
  3223. struct iov_iter *from, int copy)
  3224. {
  3225. const int off = skb->len;
  3226. if (skb->ip_summed == CHECKSUM_NONE) {
  3227. __wsum csum = 0;
  3228. if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy,
  3229. &csum, from)) {
  3230. skb->csum = csum_block_add(skb->csum, csum, off);
  3231. return 0;
  3232. }
  3233. } else if (copy_from_iter_full(skb_put(skb, copy), copy, from))
  3234. return 0;
  3235. __skb_trim(skb, off);
  3236. return -EFAULT;
  3237. }
  3238. static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
  3239. const struct page *page, int off)
  3240. {
  3241. if (skb_zcopy(skb))
  3242. return false;
  3243. if (i) {
  3244. const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
  3245. return page == skb_frag_page(frag) &&
  3246. off == skb_frag_off(frag) + skb_frag_size(frag);
  3247. }
  3248. return false;
  3249. }
  3250. static inline int __skb_linearize(struct sk_buff *skb)
  3251. {
  3252. return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
  3253. }
  3254. /**
  3255. * skb_linearize - convert paged skb to linear one
  3256. * @skb: buffer to linarize
  3257. *
  3258. * If there is no free memory -ENOMEM is returned, otherwise zero
  3259. * is returned and the old skb data released.
  3260. */
  3261. static inline int skb_linearize(struct sk_buff *skb)
  3262. {
  3263. return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
  3264. }
  3265. /**
  3266. * skb_has_shared_frag - can any frag be overwritten
  3267. * @skb: buffer to test
  3268. *
  3269. * Return true if the skb has at least one frag that might be modified
  3270. * by an external entity (as in vmsplice()/sendfile())
  3271. */
  3272. static inline bool skb_has_shared_frag(const struct sk_buff *skb)
  3273. {
  3274. return skb_is_nonlinear(skb) &&
  3275. skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
  3276. }
  3277. /**
  3278. * skb_linearize_cow - make sure skb is linear and writable
  3279. * @skb: buffer to process
  3280. *
  3281. * If there is no free memory -ENOMEM is returned, otherwise zero
  3282. * is returned and the old skb data released.
  3283. */
  3284. static inline int skb_linearize_cow(struct sk_buff *skb)
  3285. {
  3286. return skb_is_nonlinear(skb) || skb_cloned(skb) ?
  3287. __skb_linearize(skb) : 0;
  3288. }
  3289. static __always_inline void
  3290. __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
  3291. unsigned int off)
  3292. {
  3293. if (skb->ip_summed == CHECKSUM_COMPLETE)
  3294. skb->csum = csum_block_sub(skb->csum,
  3295. csum_partial(start, len, 0), off);
  3296. else if (skb->ip_summed == CHECKSUM_PARTIAL &&
  3297. skb_checksum_start_offset(skb) < 0)
  3298. skb->ip_summed = CHECKSUM_NONE;
  3299. }
  3300. /**
  3301. * skb_postpull_rcsum - update checksum for received skb after pull
  3302. * @skb: buffer to update
  3303. * @start: start of data before pull
  3304. * @len: length of data pulled
  3305. *
  3306. * After doing a pull on a received packet, you need to call this to
  3307. * update the CHECKSUM_COMPLETE checksum, or set ip_summed to
  3308. * CHECKSUM_NONE so that it can be recomputed from scratch.
  3309. */
  3310. static inline void skb_postpull_rcsum(struct sk_buff *skb,
  3311. const void *start, unsigned int len)
  3312. {
  3313. if (skb->ip_summed == CHECKSUM_COMPLETE)
  3314. skb->csum = wsum_negate(csum_partial(start, len,
  3315. wsum_negate(skb->csum)));
  3316. else if (skb->ip_summed == CHECKSUM_PARTIAL &&
  3317. skb_checksum_start_offset(skb) < 0)
  3318. skb->ip_summed = CHECKSUM_NONE;
  3319. }
  3320. static __always_inline void
  3321. __skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
  3322. unsigned int off)
  3323. {
  3324. if (skb->ip_summed == CHECKSUM_COMPLETE)
  3325. skb->csum = csum_block_add(skb->csum,
  3326. csum_partial(start, len, 0), off);
  3327. }
  3328. /**
  3329. * skb_postpush_rcsum - update checksum for received skb after push
  3330. * @skb: buffer to update
  3331. * @start: start of data after push
  3332. * @len: length of data pushed
  3333. *
  3334. * After doing a push on a received packet, you need to call this to
  3335. * update the CHECKSUM_COMPLETE checksum.
  3336. */
  3337. static inline void skb_postpush_rcsum(struct sk_buff *skb,
  3338. const void *start, unsigned int len)
  3339. {
  3340. __skb_postpush_rcsum(skb, start, len, 0);
  3341. }
  3342. void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
  3343. /**
  3344. * skb_push_rcsum - push skb and update receive checksum
  3345. * @skb: buffer to update
  3346. * @len: length of data pulled
  3347. *
  3348. * This function performs an skb_push on the packet and updates
  3349. * the CHECKSUM_COMPLETE checksum. It should be used on
  3350. * receive path processing instead of skb_push unless you know
  3351. * that the checksum difference is zero (e.g., a valid IP header)
  3352. * or you are setting ip_summed to CHECKSUM_NONE.
  3353. */
  3354. static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len)
  3355. {
  3356. skb_push(skb, len);
  3357. skb_postpush_rcsum(skb, skb->data, len);
  3358. return skb->data;
  3359. }
  3360. int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
  3361. /**
  3362. * pskb_trim_rcsum - trim received skb and update checksum
  3363. * @skb: buffer to trim
  3364. * @len: new length
  3365. *
  3366. * This is exactly the same as pskb_trim except that it ensures the
  3367. * checksum of received packets are still valid after the operation.
  3368. * It can change skb pointers.
  3369. */
  3370. static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
  3371. {
  3372. if (likely(len >= skb->len))
  3373. return 0;
  3374. return pskb_trim_rcsum_slow(skb, len);
  3375. }
  3376. static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
  3377. {
  3378. if (skb->ip_summed == CHECKSUM_COMPLETE)
  3379. skb->ip_summed = CHECKSUM_NONE;
  3380. __skb_trim(skb, len);
  3381. return 0;
  3382. }
  3383. static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
  3384. {
  3385. if (skb->ip_summed == CHECKSUM_COMPLETE)
  3386. skb->ip_summed = CHECKSUM_NONE;
  3387. return __skb_grow(skb, len);
  3388. }
  3389. #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
  3390. #define skb_rb_first(root) rb_to_skb(rb_first(root))
  3391. #define skb_rb_last(root) rb_to_skb(rb_last(root))
  3392. #define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode))
  3393. #define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode))
  3394. #define skb_queue_walk(queue, skb) \
  3395. for (skb = (queue)->next; \
  3396. skb != (struct sk_buff *)(queue); \
  3397. skb = skb->next)
  3398. #define skb_queue_walk_safe(queue, skb, tmp) \
  3399. for (skb = (queue)->next, tmp = skb->next; \
  3400. skb != (struct sk_buff *)(queue); \
  3401. skb = tmp, tmp = skb->next)
  3402. #define skb_queue_walk_from(queue, skb) \
  3403. for (; skb != (struct sk_buff *)(queue); \
  3404. skb = skb->next)
  3405. #define skb_rbtree_walk(skb, root) \
  3406. for (skb = skb_rb_first(root); skb != NULL; \
  3407. skb = skb_rb_next(skb))
  3408. #define skb_rbtree_walk_from(skb) \
  3409. for (; skb != NULL; \
  3410. skb = skb_rb_next(skb))
  3411. #define skb_rbtree_walk_from_safe(skb, tmp) \
  3412. for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \
  3413. skb = tmp)
  3414. #define skb_queue_walk_from_safe(queue, skb, tmp) \
  3415. for (tmp = skb->next; \
  3416. skb != (struct sk_buff *)(queue); \
  3417. skb = tmp, tmp = skb->next)
  3418. #define skb_queue_reverse_walk(queue, skb) \
  3419. for (skb = (queue)->prev; \
  3420. skb != (struct sk_buff *)(queue); \
  3421. skb = skb->prev)
  3422. #define skb_queue_reverse_walk_safe(queue, skb, tmp) \
  3423. for (skb = (queue)->prev, tmp = skb->prev; \
  3424. skb != (struct sk_buff *)(queue); \
  3425. skb = tmp, tmp = skb->prev)
  3426. #define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \
  3427. for (tmp = skb->prev; \
  3428. skb != (struct sk_buff *)(queue); \
  3429. skb = tmp, tmp = skb->prev)
  3430. static inline bool skb_has_frag_list(const struct sk_buff *skb)
  3431. {
  3432. return skb_shinfo(skb)->frag_list != NULL;
  3433. }
  3434. static inline void skb_frag_list_init(struct sk_buff *skb)
  3435. {
  3436. skb_shinfo(skb)->frag_list = NULL;
  3437. }
  3438. #define skb_walk_frags(skb, iter) \
  3439. for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
  3440. int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
  3441. int *err, long *timeo_p,
  3442. const struct sk_buff *skb);
  3443. struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
  3444. struct sk_buff_head *queue,
  3445. unsigned int flags,
  3446. int *off, int *err,
  3447. struct sk_buff **last);
  3448. struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
  3449. struct sk_buff_head *queue,
  3450. unsigned int flags, int *off, int *err,
  3451. struct sk_buff **last);
  3452. struct sk_buff *__skb_recv_datagram(struct sock *sk,
  3453. struct sk_buff_head *sk_queue,
  3454. unsigned int flags, int *off, int *err);
  3455. struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err);
  3456. __poll_t datagram_poll(struct file *file, struct socket *sock,
  3457. struct poll_table_struct *wait);
  3458. int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
  3459. struct iov_iter *to, int size);
  3460. static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
  3461. struct msghdr *msg, int size)
  3462. {
  3463. return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size);
  3464. }
  3465. int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
  3466. struct msghdr *msg);
  3467. int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
  3468. struct iov_iter *to, int len,
  3469. struct ahash_request *hash);
  3470. int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
  3471. struct iov_iter *from, int len);
  3472. int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
  3473. void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
  3474. void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len);
  3475. static inline void skb_free_datagram_locked(struct sock *sk,
  3476. struct sk_buff *skb)
  3477. {
  3478. __skb_free_datagram_locked(sk, skb, 0);
  3479. }
  3480. int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
  3481. int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
  3482. int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
  3483. __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
  3484. int len);
  3485. int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
  3486. struct pipe_inode_info *pipe, unsigned int len,
  3487. unsigned int flags);
  3488. int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
  3489. int len);
  3490. int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
  3491. void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
  3492. unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
  3493. int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
  3494. int len, int hlen);
  3495. void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
  3496. int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
  3497. void skb_scrub_packet(struct sk_buff *skb, bool xnet);
  3498. bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
  3499. bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
  3500. struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
  3501. struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features,
  3502. unsigned int offset);
  3503. struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
  3504. int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len);
  3505. int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
  3506. int skb_vlan_pop(struct sk_buff *skb);
  3507. int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
  3508. int skb_eth_pop(struct sk_buff *skb);
  3509. int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
  3510. const unsigned char *src);
  3511. int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
  3512. int mac_len, bool ethernet);
  3513. int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
  3514. bool ethernet);
  3515. int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse);
  3516. int skb_mpls_dec_ttl(struct sk_buff *skb);
  3517. struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
  3518. gfp_t gfp);
  3519. static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
  3520. {
  3521. return copy_from_iter_full(data, len, &msg->msg_iter) ? 0 : -EFAULT;
  3522. }
  3523. static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
  3524. {
  3525. return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
  3526. }
  3527. struct skb_checksum_ops {
  3528. __wsum (*update)(const void *mem, int len, __wsum wsum);
  3529. __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
  3530. };
  3531. extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly;
  3532. __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
  3533. __wsum csum, const struct skb_checksum_ops *ops);
  3534. __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
  3535. __wsum csum);
  3536. static inline void * __must_check
  3537. __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
  3538. const void *data, int hlen, void *buffer)
  3539. {
  3540. if (likely(hlen - offset >= len))
  3541. return (void *)data + offset;
  3542. if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
  3543. return NULL;
  3544. return buffer;
  3545. }
  3546. static inline void * __must_check
  3547. skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer)
  3548. {
  3549. return __skb_header_pointer(skb, offset, len, skb->data,
  3550. skb_headlen(skb), buffer);
  3551. }
  3552. /**
  3553. * skb_needs_linearize - check if we need to linearize a given skb
  3554. * depending on the given device features.
  3555. * @skb: socket buffer to check
  3556. * @features: net device features
  3557. *
  3558. * Returns true if either:
  3559. * 1. skb has frag_list and the device doesn't support FRAGLIST, or
  3560. * 2. skb is fragmented and the device does not support SG.
  3561. */
  3562. static inline bool skb_needs_linearize(struct sk_buff *skb,
  3563. netdev_features_t features)
  3564. {
  3565. return skb_is_nonlinear(skb) &&
  3566. ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
  3567. (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG)));
  3568. }
  3569. static inline void skb_copy_from_linear_data(const struct sk_buff *skb,
  3570. void *to,
  3571. const unsigned int len)
  3572. {
  3573. memcpy(to, skb->data, len);
  3574. }
  3575. static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
  3576. const int offset, void *to,
  3577. const unsigned int len)
  3578. {
  3579. memcpy(to, skb->data + offset, len);
  3580. }
  3581. static inline void skb_copy_to_linear_data(struct sk_buff *skb,
  3582. const void *from,
  3583. const unsigned int len)
  3584. {
  3585. memcpy(skb->data, from, len);
  3586. }
  3587. static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
  3588. const int offset,
  3589. const void *from,
  3590. const unsigned int len)
  3591. {
  3592. memcpy(skb->data + offset, from, len);
  3593. }
  3594. void skb_init(void);
  3595. static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
  3596. {
  3597. return skb->tstamp;
  3598. }
  3599. /**
  3600. * skb_get_timestamp - get timestamp from a skb
  3601. * @skb: skb to get stamp from
  3602. * @stamp: pointer to struct __kernel_old_timeval to store stamp in
  3603. *
  3604. * Timestamps are stored in the skb as offsets to a base timestamp.
  3605. * This function converts the offset back to a struct timeval and stores
  3606. * it in stamp.
  3607. */
  3608. static inline void skb_get_timestamp(const struct sk_buff *skb,
  3609. struct __kernel_old_timeval *stamp)
  3610. {
  3611. *stamp = ns_to_kernel_old_timeval(skb->tstamp);
  3612. }
  3613. static inline void skb_get_new_timestamp(const struct sk_buff *skb,
  3614. struct __kernel_sock_timeval *stamp)
  3615. {
  3616. struct timespec64 ts = ktime_to_timespec64(skb->tstamp);
  3617. stamp->tv_sec = ts.tv_sec;
  3618. stamp->tv_usec = ts.tv_nsec / 1000;
  3619. }
  3620. static inline void skb_get_timestampns(const struct sk_buff *skb,
  3621. struct __kernel_old_timespec *stamp)
  3622. {
  3623. struct timespec64 ts = ktime_to_timespec64(skb->tstamp);
  3624. stamp->tv_sec = ts.tv_sec;
  3625. stamp->tv_nsec = ts.tv_nsec;
  3626. }
  3627. static inline void skb_get_new_timestampns(const struct sk_buff *skb,
  3628. struct __kernel_timespec *stamp)
  3629. {
  3630. struct timespec64 ts = ktime_to_timespec64(skb->tstamp);
  3631. stamp->tv_sec = ts.tv_sec;
  3632. stamp->tv_nsec = ts.tv_nsec;
  3633. }
  3634. static inline void __net_timestamp(struct sk_buff *skb)
  3635. {
  3636. skb->tstamp = ktime_get_real();
  3637. skb->mono_delivery_time = 0;
  3638. }
  3639. static inline ktime_t net_timedelta(ktime_t t)
  3640. {
  3641. return ktime_sub(ktime_get_real(), t);
  3642. }
  3643. static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
  3644. bool mono)
  3645. {
  3646. skb->tstamp = kt;
  3647. skb->mono_delivery_time = kt && mono;
  3648. }
  3649. DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
  3650. /* It is used in the ingress path to clear the delivery_time.
  3651. * If needed, set the skb->tstamp to the (rcv) timestamp.
  3652. */
  3653. static inline void skb_clear_delivery_time(struct sk_buff *skb)
  3654. {
  3655. if (skb->mono_delivery_time) {
  3656. skb->mono_delivery_time = 0;
  3657. if (static_branch_unlikely(&netstamp_needed_key))
  3658. skb->tstamp = ktime_get_real();
  3659. else
  3660. skb->tstamp = 0;
  3661. }
  3662. }
  3663. static inline void skb_clear_tstamp(struct sk_buff *skb)
  3664. {
  3665. if (skb->mono_delivery_time)
  3666. return;
  3667. skb->tstamp = 0;
  3668. }
  3669. static inline ktime_t skb_tstamp(const struct sk_buff *skb)
  3670. {
  3671. if (skb->mono_delivery_time)
  3672. return 0;
  3673. return skb->tstamp;
  3674. }
  3675. static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond)
  3676. {
  3677. if (!skb->mono_delivery_time && skb->tstamp)
  3678. return skb->tstamp;
  3679. if (static_branch_unlikely(&netstamp_needed_key) || cond)
  3680. return ktime_get_real();
  3681. return 0;
  3682. }
  3683. static inline u8 skb_metadata_len(const struct sk_buff *skb)
  3684. {
  3685. return skb_shinfo(skb)->meta_len;
  3686. }
  3687. static inline void *skb_metadata_end(const struct sk_buff *skb)
  3688. {
  3689. return skb_mac_header(skb);
  3690. }
  3691. static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
  3692. const struct sk_buff *skb_b,
  3693. u8 meta_len)
  3694. {
  3695. const void *a = skb_metadata_end(skb_a);
  3696. const void *b = skb_metadata_end(skb_b);
  3697. /* Using more efficient varaiant than plain call to memcmp(). */
  3698. #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
  3699. u64 diffs = 0;
  3700. switch (meta_len) {
  3701. #define __it(x, op) (x -= sizeof(u##op))
  3702. #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
  3703. case 32: diffs |= __it_diff(a, b, 64);
  3704. fallthrough;
  3705. case 24: diffs |= __it_diff(a, b, 64);
  3706. fallthrough;
  3707. case 16: diffs |= __it_diff(a, b, 64);
  3708. fallthrough;
  3709. case 8: diffs |= __it_diff(a, b, 64);
  3710. break;
  3711. case 28: diffs |= __it_diff(a, b, 64);
  3712. fallthrough;
  3713. case 20: diffs |= __it_diff(a, b, 64);
  3714. fallthrough;
  3715. case 12: diffs |= __it_diff(a, b, 64);
  3716. fallthrough;
  3717. case 4: diffs |= __it_diff(a, b, 32);
  3718. break;
  3719. }
  3720. return diffs;
  3721. #else
  3722. return memcmp(a - meta_len, b - meta_len, meta_len);
  3723. #endif
  3724. }
  3725. static inline bool skb_metadata_differs(const struct sk_buff *skb_a,
  3726. const struct sk_buff *skb_b)
  3727. {
  3728. u8 len_a = skb_metadata_len(skb_a);
  3729. u8 len_b = skb_metadata_len(skb_b);
  3730. if (!(len_a | len_b))
  3731. return false;
  3732. return len_a != len_b ?
  3733. true : __skb_metadata_differs(skb_a, skb_b, len_a);
  3734. }
  3735. static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len)
  3736. {
  3737. skb_shinfo(skb)->meta_len = meta_len;
  3738. }
  3739. static inline void skb_metadata_clear(struct sk_buff *skb)
  3740. {
  3741. skb_metadata_set(skb, 0);
  3742. }
  3743. struct sk_buff *skb_clone_sk(struct sk_buff *skb);
  3744. #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
  3745. void skb_clone_tx_timestamp(struct sk_buff *skb);
  3746. bool skb_defer_rx_timestamp(struct sk_buff *skb);
  3747. #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
  3748. static inline void skb_clone_tx_timestamp(struct sk_buff *skb)
  3749. {
  3750. }
  3751. static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
  3752. {
  3753. return false;
  3754. }
  3755. #endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */
  3756. /**
  3757. * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
  3758. *
  3759. * PHY drivers may accept clones of transmitted packets for
  3760. * timestamping via their phy_driver.txtstamp method. These drivers
  3761. * must call this function to return the skb back to the stack with a
  3762. * timestamp.
  3763. *
  3764. * @skb: clone of the original outgoing packet
  3765. * @hwtstamps: hardware time stamps
  3766. *
  3767. */
  3768. void skb_complete_tx_timestamp(struct sk_buff *skb,
  3769. struct skb_shared_hwtstamps *hwtstamps);
  3770. void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb,
  3771. struct skb_shared_hwtstamps *hwtstamps,
  3772. struct sock *sk, int tstype);
  3773. /**
  3774. * skb_tstamp_tx - queue clone of skb with send time stamps
  3775. * @orig_skb: the original outgoing packet
  3776. * @hwtstamps: hardware time stamps, may be NULL if not available
  3777. *
  3778. * If the skb has a socket associated, then this function clones the
  3779. * skb (thus sharing the actual data and optional structures), stores
  3780. * the optional hardware time stamping information (if non NULL) or
  3781. * generates a software time stamp (otherwise), then queues the clone
  3782. * to the error queue of the socket. Errors are silently ignored.
  3783. */
  3784. void skb_tstamp_tx(struct sk_buff *orig_skb,
  3785. struct skb_shared_hwtstamps *hwtstamps);
  3786. /**
  3787. * skb_tx_timestamp() - Driver hook for transmit timestamping
  3788. *
  3789. * Ethernet MAC Drivers should call this function in their hard_xmit()
  3790. * function immediately before giving the sk_buff to the MAC hardware.
  3791. *
  3792. * Specifically, one should make absolutely sure that this function is
  3793. * called before TX completion of this packet can trigger. Otherwise
  3794. * the packet could potentially already be freed.
  3795. *
  3796. * @skb: A socket buffer.
  3797. */
  3798. static inline void skb_tx_timestamp(struct sk_buff *skb)
  3799. {
  3800. skb_clone_tx_timestamp(skb);
  3801. if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
  3802. skb_tstamp_tx(skb, NULL);
  3803. }
  3804. /**
  3805. * skb_complete_wifi_ack - deliver skb with wifi status
  3806. *
  3807. * @skb: the original outgoing packet
  3808. * @acked: ack status
  3809. *
  3810. */
  3811. void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
  3812. __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
  3813. __sum16 __skb_checksum_complete(struct sk_buff *skb);
  3814. static inline int skb_csum_unnecessary(const struct sk_buff *skb)
  3815. {
  3816. return ((skb->ip_summed == CHECKSUM_UNNECESSARY) ||
  3817. skb->csum_valid ||
  3818. (skb->ip_summed == CHECKSUM_PARTIAL &&
  3819. skb_checksum_start_offset(skb) >= 0));
  3820. }
  3821. /**
  3822. * skb_checksum_complete - Calculate checksum of an entire packet
  3823. * @skb: packet to process
  3824. *
  3825. * This function calculates the checksum over the entire packet plus
  3826. * the value of skb->csum. The latter can be used to supply the
  3827. * checksum of a pseudo header as used by TCP/UDP. It returns the
  3828. * checksum.
  3829. *
  3830. * For protocols that contain complete checksums such as ICMP/TCP/UDP,
  3831. * this function can be used to verify that checksum on received
  3832. * packets. In that case the function should return zero if the
  3833. * checksum is correct. In particular, this function will return zero
  3834. * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
  3835. * hardware has already verified the correctness of the checksum.
  3836. */
  3837. static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
  3838. {
  3839. return skb_csum_unnecessary(skb) ?
  3840. 0 : __skb_checksum_complete(skb);
  3841. }
  3842. static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
  3843. {
  3844. if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
  3845. if (skb->csum_level == 0)
  3846. skb->ip_summed = CHECKSUM_NONE;
  3847. else
  3848. skb->csum_level--;
  3849. }
  3850. }
  3851. static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
  3852. {
  3853. if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
  3854. if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
  3855. skb->csum_level++;
  3856. } else if (skb->ip_summed == CHECKSUM_NONE) {
  3857. skb->ip_summed = CHECKSUM_UNNECESSARY;
  3858. skb->csum_level = 0;
  3859. }
  3860. }
  3861. static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb)
  3862. {
  3863. if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
  3864. skb->ip_summed = CHECKSUM_NONE;
  3865. skb->csum_level = 0;
  3866. }
  3867. }
  3868. /* Check if we need to perform checksum complete validation.
  3869. *
  3870. * Returns true if checksum complete is needed, false otherwise
  3871. * (either checksum is unnecessary or zero checksum is allowed).
  3872. */
  3873. static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
  3874. bool zero_okay,
  3875. __sum16 check)
  3876. {
  3877. if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
  3878. skb->csum_valid = 1;
  3879. __skb_decr_checksum_unnecessary(skb);
  3880. return false;
  3881. }
  3882. return true;
  3883. }
  3884. /* For small packets <= CHECKSUM_BREAK perform checksum complete directly
  3885. * in checksum_init.
  3886. */
  3887. #define CHECKSUM_BREAK 76
  3888. /* Unset checksum-complete
  3889. *
  3890. * Unset checksum complete can be done when packet is being modified
  3891. * (uncompressed for instance) and checksum-complete value is
  3892. * invalidated.
  3893. */
  3894. static inline void skb_checksum_complete_unset(struct sk_buff *skb)
  3895. {
  3896. if (skb->ip_summed == CHECKSUM_COMPLETE)
  3897. skb->ip_summed = CHECKSUM_NONE;
  3898. }
  3899. /* Validate (init) checksum based on checksum complete.
  3900. *
  3901. * Return values:
  3902. * 0: checksum is validated or try to in skb_checksum_complete. In the latter
  3903. * case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo
  3904. * checksum is stored in skb->csum for use in __skb_checksum_complete
  3905. * non-zero: value of invalid checksum
  3906. *
  3907. */
  3908. static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
  3909. bool complete,
  3910. __wsum psum)
  3911. {
  3912. if (skb->ip_summed == CHECKSUM_COMPLETE) {
  3913. if (!csum_fold(csum_add(psum, skb->csum))) {
  3914. skb->csum_valid = 1;
  3915. return 0;
  3916. }
  3917. }
  3918. skb->csum = psum;
  3919. if (complete || skb->len <= CHECKSUM_BREAK) {
  3920. __sum16 csum;
  3921. csum = __skb_checksum_complete(skb);
  3922. skb->csum_valid = !csum;
  3923. return csum;
  3924. }
  3925. return 0;
  3926. }
  3927. static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
  3928. {
  3929. return 0;
  3930. }
  3931. /* Perform checksum validate (init). Note that this is a macro since we only
  3932. * want to calculate the pseudo header which is an input function if necessary.
  3933. * First we try to validate without any computation (checksum unnecessary) and
  3934. * then calculate based on checksum complete calling the function to compute
  3935. * pseudo header.
  3936. *
  3937. * Return values:
  3938. * 0: checksum is validated or try to in skb_checksum_complete
  3939. * non-zero: value of invalid checksum
  3940. */
  3941. #define __skb_checksum_validate(skb, proto, complete, \
  3942. zero_okay, check, compute_pseudo) \
  3943. ({ \
  3944. __sum16 __ret = 0; \
  3945. skb->csum_valid = 0; \
  3946. if (__skb_checksum_validate_needed(skb, zero_okay, check)) \
  3947. __ret = __skb_checksum_validate_complete(skb, \
  3948. complete, compute_pseudo(skb, proto)); \
  3949. __ret; \
  3950. })
  3951. #define skb_checksum_init(skb, proto, compute_pseudo) \
  3952. __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo)
  3953. #define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \
  3954. __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo)
  3955. #define skb_checksum_validate(skb, proto, compute_pseudo) \
  3956. __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo)
  3957. #define skb_checksum_validate_zero_check(skb, proto, check, \
  3958. compute_pseudo) \
  3959. __skb_checksum_validate(skb, proto, true, true, check, compute_pseudo)
  3960. #define skb_checksum_simple_validate(skb) \
  3961. __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
  3962. static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
  3963. {
  3964. return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
  3965. }
  3966. static inline void __skb_checksum_convert(struct sk_buff *skb, __wsum pseudo)
  3967. {
  3968. skb->csum = ~pseudo;
  3969. skb->ip_summed = CHECKSUM_COMPLETE;
  3970. }
  3971. #define skb_checksum_try_convert(skb, proto, compute_pseudo) \
  3972. do { \
  3973. if (__skb_checksum_convert_check(skb)) \
  3974. __skb_checksum_convert(skb, compute_pseudo(skb, proto)); \
  3975. } while (0)
  3976. static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr,
  3977. u16 start, u16 offset)
  3978. {
  3979. skb->ip_summed = CHECKSUM_PARTIAL;
  3980. skb->csum_start = ((unsigned char *)ptr + start) - skb->head;
  3981. skb->csum_offset = offset - start;
  3982. }
  3983. /* Update skbuf and packet to reflect the remote checksum offload operation.
  3984. * When called, ptr indicates the starting point for skb->csum when
  3985. * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete
  3986. * here, skb_postpull_rcsum is done so skb->csum start is ptr.
  3987. */
  3988. static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
  3989. int start, int offset, bool nopartial)
  3990. {
  3991. __wsum delta;
  3992. if (!nopartial) {
  3993. skb_remcsum_adjust_partial(skb, ptr, start, offset);
  3994. return;
  3995. }
  3996. if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
  3997. __skb_checksum_complete(skb);
  3998. skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
  3999. }
  4000. delta = remcsum_adjust(ptr, skb->csum, start, offset);
  4001. /* Adjust skb->csum since we changed the packet */
  4002. skb->csum = csum_add(skb->csum, delta);
  4003. }
  4004. static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb)
  4005. {
  4006. #if IS_ENABLED(CONFIG_NF_CONNTRACK)
  4007. return (void *)(skb->_nfct & NFCT_PTRMASK);
  4008. #else
  4009. return NULL;
  4010. #endif
  4011. }
  4012. static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
  4013. {
  4014. #if IS_ENABLED(CONFIG_NF_CONNTRACK)
  4015. return skb->_nfct;
  4016. #else
  4017. return 0UL;
  4018. #endif
  4019. }
  4020. static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
  4021. {
  4022. #if IS_ENABLED(CONFIG_NF_CONNTRACK)
  4023. skb->slow_gro |= !!nfct;
  4024. skb->_nfct = nfct;
  4025. #endif
  4026. }
  4027. #ifdef CONFIG_SKB_EXTENSIONS
  4028. enum skb_ext_id {
  4029. #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  4030. SKB_EXT_BRIDGE_NF,
  4031. #endif
  4032. #ifdef CONFIG_XFRM
  4033. SKB_EXT_SEC_PATH,
  4034. #endif
  4035. #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
  4036. TC_SKB_EXT,
  4037. #endif
  4038. #if IS_ENABLED(CONFIG_MPTCP)
  4039. SKB_EXT_MPTCP,
  4040. #endif
  4041. #if IS_ENABLED(CONFIG_MCTP_FLOWS)
  4042. SKB_EXT_MCTP,
  4043. #endif
  4044. SKB_EXT_NUM, /* must be last */
  4045. };
  4046. /**
  4047. * struct skb_ext - sk_buff extensions
  4048. * @refcnt: 1 on allocation, deallocated on 0
  4049. * @offset: offset to add to @data to obtain extension address
  4050. * @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units
  4051. * @data: start of extension data, variable sized
  4052. *
  4053. * Note: offsets/lengths are stored in chunks of 8 bytes, this allows
  4054. * to use 'u8' types while allowing up to 2kb worth of extension data.
  4055. */
  4056. struct skb_ext {
  4057. refcount_t refcnt;
  4058. u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */
  4059. u8 chunks; /* same */
  4060. char data[] __aligned(8);
  4061. };
  4062. struct skb_ext *__skb_ext_alloc(gfp_t flags);
  4063. void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
  4064. struct skb_ext *ext);
  4065. void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
  4066. void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
  4067. void __skb_ext_put(struct skb_ext *ext);
  4068. static inline void skb_ext_put(struct sk_buff *skb)
  4069. {
  4070. if (skb->active_extensions)
  4071. __skb_ext_put(skb->extensions);
  4072. }
  4073. static inline void __skb_ext_copy(struct sk_buff *dst,
  4074. const struct sk_buff *src)
  4075. {
  4076. dst->active_extensions = src->active_extensions;
  4077. if (src->active_extensions) {
  4078. struct skb_ext *ext = src->extensions;
  4079. refcount_inc(&ext->refcnt);
  4080. dst->extensions = ext;
  4081. }
  4082. }
  4083. static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src)
  4084. {
  4085. skb_ext_put(dst);
  4086. __skb_ext_copy(dst, src);
  4087. }
  4088. static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i)
  4089. {
  4090. return !!ext->offset[i];
  4091. }
  4092. static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id)
  4093. {
  4094. return skb->active_extensions & (1 << id);
  4095. }
  4096. static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
  4097. {
  4098. if (skb_ext_exist(skb, id))
  4099. __skb_ext_del(skb, id);
  4100. }
  4101. static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
  4102. {
  4103. if (skb_ext_exist(skb, id)) {
  4104. struct skb_ext *ext = skb->extensions;
  4105. return (void *)ext + (ext->offset[id] << 3);
  4106. }
  4107. return NULL;
  4108. }
  4109. static inline void skb_ext_reset(struct sk_buff *skb)
  4110. {
  4111. if (unlikely(skb->active_extensions)) {
  4112. __skb_ext_put(skb->extensions);
  4113. skb->active_extensions = 0;
  4114. }
  4115. }
  4116. static inline bool skb_has_extensions(struct sk_buff *skb)
  4117. {
  4118. return unlikely(skb->active_extensions);
  4119. }
  4120. #else
  4121. static inline void skb_ext_put(struct sk_buff *skb) {}
  4122. static inline void skb_ext_reset(struct sk_buff *skb) {}
  4123. static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
  4124. static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
  4125. static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
  4126. static inline bool skb_has_extensions(struct sk_buff *skb) { return false; }
  4127. #endif /* CONFIG_SKB_EXTENSIONS */
  4128. static inline void nf_reset_ct(struct sk_buff *skb)
  4129. {
  4130. #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
  4131. nf_conntrack_put(skb_nfct(skb));
  4132. skb->_nfct = 0;
  4133. #endif
  4134. }
  4135. static inline void nf_reset_trace(struct sk_buff *skb)
  4136. {
  4137. #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
  4138. skb->nf_trace = 0;
  4139. #endif
  4140. }
  4141. static inline void ipvs_reset(struct sk_buff *skb)
  4142. {
  4143. #if IS_ENABLED(CONFIG_IP_VS)
  4144. skb->ipvs_property = 0;
  4145. #endif
  4146. }
  4147. /* Note: This doesn't put any conntrack info in dst. */
  4148. static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
  4149. bool copy)
  4150. {
  4151. #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
  4152. dst->_nfct = src->_nfct;
  4153. nf_conntrack_get(skb_nfct(src));
  4154. #endif
  4155. #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
  4156. if (copy)
  4157. dst->nf_trace = src->nf_trace;
  4158. #endif
  4159. }
  4160. static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
  4161. {
  4162. #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
  4163. nf_conntrack_put(skb_nfct(dst));
  4164. #endif
  4165. dst->slow_gro = src->slow_gro;
  4166. __nf_copy(dst, src, true);
  4167. }
  4168. #ifdef CONFIG_NETWORK_SECMARK
  4169. static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
  4170. {
  4171. to->secmark = from->secmark;
  4172. }
  4173. static inline void skb_init_secmark(struct sk_buff *skb)
  4174. {
  4175. skb->secmark = 0;
  4176. }
  4177. #else
  4178. static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
  4179. { }
  4180. static inline void skb_init_secmark(struct sk_buff *skb)
  4181. { }
  4182. #endif
  4183. static inline int secpath_exists(const struct sk_buff *skb)
  4184. {
  4185. #ifdef CONFIG_XFRM
  4186. return skb_ext_exist(skb, SKB_EXT_SEC_PATH);
  4187. #else
  4188. return 0;
  4189. #endif
  4190. }
  4191. static inline bool skb_irq_freeable(const struct sk_buff *skb)
  4192. {
  4193. return !skb->destructor &&
  4194. !secpath_exists(skb) &&
  4195. !skb_nfct(skb) &&
  4196. !skb->_skb_refdst &&
  4197. !skb_has_frag_list(skb);
  4198. }
  4199. static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
  4200. {
  4201. skb->queue_mapping = queue_mapping;
  4202. }
  4203. static inline u16 skb_get_queue_mapping(const struct sk_buff *skb)
  4204. {
  4205. return skb->queue_mapping;
  4206. }
  4207. static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
  4208. {
  4209. to->queue_mapping = from->queue_mapping;
  4210. }
  4211. static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue)
  4212. {
  4213. skb->queue_mapping = rx_queue + 1;
  4214. }
  4215. static inline u16 skb_get_rx_queue(const struct sk_buff *skb)
  4216. {
  4217. return skb->queue_mapping - 1;
  4218. }
  4219. static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
  4220. {
  4221. return skb->queue_mapping != 0;
  4222. }
  4223. static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val)
  4224. {
  4225. skb->dst_pending_confirm = val;
  4226. }
  4227. static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
  4228. {
  4229. return skb->dst_pending_confirm != 0;
  4230. }
  4231. static inline struct sec_path *skb_sec_path(const struct sk_buff *skb)
  4232. {
  4233. #ifdef CONFIG_XFRM
  4234. return skb_ext_find(skb, SKB_EXT_SEC_PATH);
  4235. #else
  4236. return NULL;
  4237. #endif
  4238. }
  4239. /* Keeps track of mac header offset relative to skb->head.
  4240. * It is useful for TSO of Tunneling protocol. e.g. GRE.
  4241. * For non-tunnel skb it points to skb_mac_header() and for
  4242. * tunnel skb it points to outer mac header.
  4243. * Keeps track of level of encapsulation of network headers.
  4244. */
  4245. struct skb_gso_cb {
  4246. union {
  4247. int mac_offset;
  4248. int data_offset;
  4249. };
  4250. int encap_level;
  4251. __wsum csum;
  4252. __u16 csum_start;
  4253. };
  4254. #define SKB_GSO_CB_OFFSET 32
  4255. #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET))
  4256. static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
  4257. {
  4258. return (skb_mac_header(inner_skb) - inner_skb->head) -
  4259. SKB_GSO_CB(inner_skb)->mac_offset;
  4260. }
  4261. static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
  4262. {
  4263. int new_headroom, headroom;
  4264. int ret;
  4265. headroom = skb_headroom(skb);
  4266. ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
  4267. if (ret)
  4268. return ret;
  4269. new_headroom = skb_headroom(skb);
  4270. SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
  4271. return 0;
  4272. }
  4273. static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
  4274. {
  4275. /* Do not update partial checksums if remote checksum is enabled. */
  4276. if (skb->remcsum_offload)
  4277. return;
  4278. SKB_GSO_CB(skb)->csum = res;
  4279. SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
  4280. }
  4281. /* Compute the checksum for a gso segment. First compute the checksum value
  4282. * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
  4283. * then add in skb->csum (checksum from csum_start to end of packet).
  4284. * skb->csum and csum_start are then updated to reflect the checksum of the
  4285. * resultant packet starting from the transport header-- the resultant checksum
  4286. * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
  4287. * header.
  4288. */
  4289. static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
  4290. {
  4291. unsigned char *csum_start = skb_transport_header(skb);
  4292. int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
  4293. __wsum partial = SKB_GSO_CB(skb)->csum;
  4294. SKB_GSO_CB(skb)->csum = res;
  4295. SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
  4296. return csum_fold(csum_partial(csum_start, plen, partial));
  4297. }
  4298. static inline bool skb_is_gso(const struct sk_buff *skb)
  4299. {
  4300. return skb_shinfo(skb)->gso_size;
  4301. }
  4302. /* Note: Should be called only if skb_is_gso(skb) is true */
  4303. static inline bool skb_is_gso_v6(const struct sk_buff *skb)
  4304. {
  4305. return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
  4306. }
  4307. /* Note: Should be called only if skb_is_gso(skb) is true */
  4308. static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
  4309. {
  4310. return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
  4311. }
  4312. /* Note: Should be called only if skb_is_gso(skb) is true */
  4313. static inline bool skb_is_gso_tcp(const struct sk_buff *skb)
  4314. {
  4315. return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6);
  4316. }
  4317. static inline void skb_gso_reset(struct sk_buff *skb)
  4318. {
  4319. skb_shinfo(skb)->gso_size = 0;
  4320. skb_shinfo(skb)->gso_segs = 0;
  4321. skb_shinfo(skb)->gso_type = 0;
  4322. }
  4323. static inline void skb_increase_gso_size(struct skb_shared_info *shinfo,
  4324. u16 increment)
  4325. {
  4326. if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
  4327. return;
  4328. shinfo->gso_size += increment;
  4329. }
  4330. static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo,
  4331. u16 decrement)
  4332. {
  4333. if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
  4334. return;
  4335. shinfo->gso_size -= decrement;
  4336. }
  4337. void __skb_warn_lro_forwarding(const struct sk_buff *skb);
  4338. static inline bool skb_warn_if_lro(const struct sk_buff *skb)
  4339. {
  4340. /* LRO sets gso_size but not gso_type, whereas if GSO is really
  4341. * wanted then gso_type will be set. */
  4342. const struct skb_shared_info *shinfo = skb_shinfo(skb);
  4343. if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 &&
  4344. unlikely(shinfo->gso_type == 0)) {
  4345. __skb_warn_lro_forwarding(skb);
  4346. return true;
  4347. }
  4348. return false;
  4349. }
  4350. static inline void skb_forward_csum(struct sk_buff *skb)
  4351. {
  4352. /* Unfortunately we don't support this one. Any brave souls? */
  4353. if (skb->ip_summed == CHECKSUM_COMPLETE)
  4354. skb->ip_summed = CHECKSUM_NONE;
  4355. }
  4356. /**
  4357. * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE
  4358. * @skb: skb to check
  4359. *
  4360. * fresh skbs have their ip_summed set to CHECKSUM_NONE.
  4361. * Instead of forcing ip_summed to CHECKSUM_NONE, we can
  4362. * use this helper, to document places where we make this assertion.
  4363. */
  4364. static inline void skb_checksum_none_assert(const struct sk_buff *skb)
  4365. {
  4366. DEBUG_NET_WARN_ON_ONCE(skb->ip_summed != CHECKSUM_NONE);
  4367. }
  4368. bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
  4369. int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
  4370. struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
  4371. unsigned int transport_len,
  4372. __sum16(*skb_chkf)(struct sk_buff *skb));
  4373. /**
  4374. * skb_head_is_locked - Determine if the skb->head is locked down
  4375. * @skb: skb to check
  4376. *
  4377. * The head on skbs build around a head frag can be removed if they are
  4378. * not cloned. This function returns true if the skb head is locked down
  4379. * due to either being allocated via kmalloc, or by being a clone with
  4380. * multiple references to the head.
  4381. */
  4382. static inline bool skb_head_is_locked(const struct sk_buff *skb)
  4383. {
  4384. return !skb->head_frag || skb_cloned(skb);
  4385. }
  4386. /* Local Checksum Offload.
  4387. * Compute outer checksum based on the assumption that the
  4388. * inner checksum will be offloaded later.
  4389. * See Documentation/networking/checksum-offloads.rst for
  4390. * explanation of how this works.
  4391. * Fill in outer checksum adjustment (e.g. with sum of outer
  4392. * pseudo-header) before calling.
  4393. * Also ensure that inner checksum is in linear data area.
  4394. */
  4395. static inline __wsum lco_csum(struct sk_buff *skb)
  4396. {
  4397. unsigned char *csum_start = skb_checksum_start(skb);
  4398. unsigned char *l4_hdr = skb_transport_header(skb);
  4399. __wsum partial;
  4400. /* Start with complement of inner checksum adjustment */
  4401. partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
  4402. skb->csum_offset));
  4403. /* Add in checksum of our headers (incl. outer checksum
  4404. * adjustment filled in by caller) and return result.
  4405. */
  4406. return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
  4407. }
  4408. static inline bool skb_is_redirected(const struct sk_buff *skb)
  4409. {
  4410. return skb->redirected;
  4411. }
  4412. static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
  4413. {
  4414. skb->redirected = 1;
  4415. #ifdef CONFIG_NET_REDIRECT
  4416. skb->from_ingress = from_ingress;
  4417. if (skb->from_ingress)
  4418. skb_clear_tstamp(skb);
  4419. #endif
  4420. }
  4421. static inline void skb_reset_redirect(struct sk_buff *skb)
  4422. {
  4423. skb->redirected = 0;
  4424. }
  4425. static inline bool skb_csum_is_sctp(struct sk_buff *skb)
  4426. {
  4427. return skb->csum_not_inet;
  4428. }
  4429. static inline void skb_set_kcov_handle(struct sk_buff *skb,
  4430. const u64 kcov_handle)
  4431. {
  4432. #ifdef CONFIG_KCOV
  4433. skb->kcov_handle = kcov_handle;
  4434. #endif
  4435. }
  4436. static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
  4437. {
  4438. #ifdef CONFIG_KCOV
  4439. return skb->kcov_handle;
  4440. #else
  4441. return 0;
  4442. #endif
  4443. }
  4444. #ifdef CONFIG_PAGE_POOL
  4445. static inline void skb_mark_for_recycle(struct sk_buff *skb)
  4446. {
  4447. skb->pp_recycle = 1;
  4448. }
  4449. #endif
  4450. static inline bool skb_pp_recycle(struct sk_buff *skb, void *data)
  4451. {
  4452. if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
  4453. return false;
  4454. return page_pool_return_skb_page(virt_to_page(data));
  4455. }
  4456. #endif /* __KERNEL__ */
  4457. #endif /* _LINUX_SKBUFF_H */