Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
This commit is contained in:
@@ -68,6 +68,9 @@ size and dma_handle must all be the same as those passed into the
|
||||
consistent allocate. cpu_addr must be the virtual address returned by
|
||||
the consistent allocate.
|
||||
|
||||
Note that unlike their sibling allocation calls, these routines
|
||||
may only be called with IRQs enabled.
|
||||
|
||||
|
||||
Part Ib - Using small dma-coherent buffers
|
||||
------------------------------------------
|
||||
|
@@ -11,7 +11,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
|
||||
procfs-guide.xml writing_usb_driver.xml \
|
||||
kernel-api.xml filesystems.xml lsm.xml usb.xml \
|
||||
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
|
||||
genericirq.xml
|
||||
genericirq.xml s390-drivers.xml
|
||||
|
||||
###
|
||||
# The build process is as follows (targets):
|
||||
|
@@ -240,17 +240,23 @@ X!Ilib/string.c
|
||||
<sect1><title>Driver Support</title>
|
||||
!Enet/core/dev.c
|
||||
!Enet/ethernet/eth.c
|
||||
!Enet/sched/sch_generic.c
|
||||
!Iinclude/linux/etherdevice.h
|
||||
!Iinclude/linux/netdevice.h
|
||||
</sect1>
|
||||
<sect1><title>PHY Support</title>
|
||||
!Edrivers/net/phy/phy.c
|
||||
!Idrivers/net/phy/phy.c
|
||||
!Edrivers/net/phy/phy_device.c
|
||||
!Idrivers/net/phy/phy_device.c
|
||||
!Edrivers/net/phy/mdio_bus.c
|
||||
!Idrivers/net/phy/mdio_bus.c
|
||||
<!-- FIXME: Removed for now since no structured comments in source
|
||||
X!Enet/core/wireless.c
|
||||
-->
|
||||
</sect1>
|
||||
<!-- FIXME: Removed for now since no structured comments in source
|
||||
<sect1><title>Wireless</title>
|
||||
X!Enet/core/wireless.c
|
||||
</sect1>
|
||||
-->
|
||||
<sect1><title>Synchronous PPP</title>
|
||||
!Edrivers/net/wan/syncppp.c
|
||||
</sect1>
|
||||
|
149
Documentation/DocBook/s390-drivers.tmpl
Normal file
149
Documentation/DocBook/s390-drivers.tmpl
Normal file
@@ -0,0 +1,149 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
|
||||
|
||||
<book id="s390drivers">
|
||||
<bookinfo>
|
||||
<title>Writing s390 channel device drivers</title>
|
||||
|
||||
<authorgroup>
|
||||
<author>
|
||||
<firstname>Cornelia</firstname>
|
||||
<surname>Huck</surname>
|
||||
<affiliation>
|
||||
<address>
|
||||
<email>cornelia.huck@de.ibm.com</email>
|
||||
</address>
|
||||
</affiliation>
|
||||
</author>
|
||||
</authorgroup>
|
||||
|
||||
<copyright>
|
||||
<year>2007</year>
|
||||
<holder>IBM Corp.</holder>
|
||||
</copyright>
|
||||
|
||||
<legalnotice>
|
||||
<para>
|
||||
This documentation is free software; you can redistribute
|
||||
it and/or modify it under the terms of the GNU General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2 of the License, or (at your option) any later
|
||||
version.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
This program is distributed in the hope that it will be
|
||||
useful, but WITHOUT ANY WARRANTY; without even the implied
|
||||
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
You should have received a copy of the GNU General Public
|
||||
License along with this program; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
MA 02111-1307 USA
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For more details see the file COPYING in the source
|
||||
distribution of Linux.
|
||||
</para>
|
||||
</legalnotice>
|
||||
</bookinfo>
|
||||
|
||||
<toc></toc>
|
||||
|
||||
<chapter id="intro">
|
||||
<title>Introduction</title>
|
||||
<para>
|
||||
This document describes the interfaces available for device drivers that
|
||||
drive s390 based channel attached devices. This includes interfaces for
|
||||
interaction with the hardware and interfaces for interacting with the
|
||||
common driver core. Those interfaces are provided by the s390 common I/O
|
||||
layer.
|
||||
</para>
|
||||
<para>
|
||||
The document assumes a familarity with the technical terms associated
|
||||
with the s390 channel I/O architecture. For a description of this
|
||||
architecture, please refer to the "z/Architecture: Principles of
|
||||
Operation", IBM publication no. SA22-7832.
|
||||
</para>
|
||||
<para>
|
||||
While most I/O devices on a s390 system are typically driven through the
|
||||
channel I/O mechanism described here, there are various other methods
|
||||
(like the diag interface). These are out of the scope of this document.
|
||||
</para>
|
||||
<para>
|
||||
Some additional information can also be found in the kernel source
|
||||
under Documentation/s390/driver-model.txt.
|
||||
</para>
|
||||
</chapter>
|
||||
<chapter id="ccw">
|
||||
<title>The ccw bus</title>
|
||||
<para>
|
||||
The ccw bus typically contains the majority of devices available to
|
||||
a s390 system. Named after the channel command word (ccw), the basic
|
||||
command structure used to address its devices, the ccw bus contains
|
||||
so-called channel attached devices. They are addressed via subchannels,
|
||||
visible on the css bus. A device driver, however, will never interact
|
||||
with the subchannel directly, but only via the device on the ccw bus,
|
||||
the ccw device.
|
||||
</para>
|
||||
<sect1 id="channelIO">
|
||||
<title>I/O functions for channel-attached devices</title>
|
||||
<para>
|
||||
Some hardware structures have been translated into C structures for use
|
||||
by the common I/O layer and device drivers. For more information on
|
||||
the hardware structures represented here, please consult the Principles
|
||||
of Operation.
|
||||
</para>
|
||||
!Iinclude/asm-s390/cio.h
|
||||
</sect1>
|
||||
<sect1 id="ccwdev">
|
||||
<title>ccw devices</title>
|
||||
<para>
|
||||
Devices that want to initiate channel I/O need to attach to the ccw bus.
|
||||
Interaction with the driver core is done via the common I/O layer, which
|
||||
provides the abstractions of ccw devices and ccw device drivers.
|
||||
</para>
|
||||
<para>
|
||||
The functions that initiate or terminate channel I/O all act upon a
|
||||
ccw device structure. Device drivers must not bypass those functions
|
||||
or strange side effects may happen.
|
||||
</para>
|
||||
!Iinclude/asm-s390/ccwdev.h
|
||||
!Edrivers/s390/cio/device.c
|
||||
!Edrivers/s390/cio/device_ops.c
|
||||
</sect1>
|
||||
<sect1 id="cmf">
|
||||
<title>The channel-measurement facility</title>
|
||||
<para>
|
||||
The channel-measurement facility provides a means to collect
|
||||
measurement data which is made available by the channel subsystem
|
||||
for each channel attached device.
|
||||
</para>
|
||||
!Iinclude/asm-s390/cmb.h
|
||||
!Edrivers/s390/cio/cmf.c
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="ccwgroup">
|
||||
<title>The ccwgroup bus</title>
|
||||
<para>
|
||||
The ccwgroup bus only contains artificial devices, created by the user.
|
||||
Many networking devices (e.g. qeth) are in fact composed of several
|
||||
ccw devices (like read, write and data channel for qeth). The
|
||||
ccwgroup bus provides a mechanism to create a meta-device which
|
||||
contains those ccw devices as slave devices and can be associated
|
||||
with the netdevice.
|
||||
</para>
|
||||
<sect1 id="ccwgroupdevices">
|
||||
<title>ccw group devices</title>
|
||||
!Iinclude/asm-s390/ccwgroup.h
|
||||
!Edrivers/s390/cio/ccwgroup.c
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
</book>
|
@@ -241,68 +241,7 @@ address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
|
||||
will fail enabling MSI-X on its hardware device when it calls the function
|
||||
pci_enable_msix().
|
||||
|
||||
5.3.2 Handling MSI-X allocation
|
||||
|
||||
Determining the number of MSI-X vectors allocated to a function is
|
||||
dependent on the number of MSI capable devices and MSI-X capable
|
||||
devices populated in the system. The policy of allocating MSI-X
|
||||
vectors to a function is defined as the following:
|
||||
|
||||
#of MSI-X vectors allocated to a function = (x - y)/z where
|
||||
|
||||
x = The number of available PCI vector resources by the time
|
||||
the device driver calls pci_enable_msix(). The PCI vector
|
||||
resources is the sum of the number of unassigned vectors
|
||||
(new) and the number of released vectors when any MSI/MSI-X
|
||||
device driver switches its hardware device back to a legacy
|
||||
mode or is hot-removed. The number of unassigned vectors
|
||||
may exclude some vectors reserved, as defined in parameter
|
||||
NR_HP_RESERVED_VECTORS, for the case where the system is
|
||||
capable of supporting hot-add/hot-remove operations. Users
|
||||
may change the value defined in NR_HR_RESERVED_VECTORS to
|
||||
meet their specific needs.
|
||||
|
||||
y = The number of MSI capable devices populated in the system.
|
||||
This policy ensures that each MSI capable device has its
|
||||
vector reserved to avoid the case where some MSI-X capable
|
||||
drivers may attempt to claim all available vector resources.
|
||||
|
||||
z = The number of MSI-X capable devices populated in the system.
|
||||
This policy ensures that maximum (x - y) is distributed
|
||||
evenly among MSI-X capable devices.
|
||||
|
||||
Note that the PCI subsystem scans y and z during a bus enumeration.
|
||||
When the PCI subsystem completes configuring MSI/MSI-X capability
|
||||
structure of a device as requested by its device driver, y/z is
|
||||
decremented accordingly.
|
||||
|
||||
5.3.3 Handling MSI-X shortages
|
||||
|
||||
For the case where fewer MSI-X vectors are allocated to a function
|
||||
than requested, the function pci_enable_msix() will return the
|
||||
maximum number of MSI-X vectors available to the caller. A device
|
||||
driver may re-send its request with fewer or equal vectors indicated
|
||||
in the return. For example, if a device driver requests 5 vectors, but
|
||||
the number of available vectors is 3 vectors, a value of 3 will be
|
||||
returned as a result of pci_enable_msix() call. A function could be
|
||||
designed for its driver to use only 3 MSI-X table entries as
|
||||
different combinations as ABC--, A-B-C, A--CB, etc. Note that this
|
||||
patch does not support multiple entries with the same vector. Such
|
||||
attempt by a device driver to use 5 MSI-X table entries with 3 vectors
|
||||
as ABBCC, AABCC, BCCBA, etc will result as a failure by the function
|
||||
pci_enable_msix(). Below are the reasons why supporting multiple
|
||||
entries with the same vector is an undesirable solution.
|
||||
|
||||
- The PCI subsystem cannot determine the entry that
|
||||
generated the message to mask/unmask MSI while handling
|
||||
software driver ISR. Attempting to walk through all MSI-X
|
||||
table entries (2048 max) to mask/unmask any match vector
|
||||
is an undesirable solution.
|
||||
|
||||
- Walking through all MSI-X table entries (2048 max) to handle
|
||||
SMP affinity of any match vector is an undesirable solution.
|
||||
|
||||
5.3.4 API pci_enable_msix
|
||||
5.3.2 API pci_enable_msix
|
||||
|
||||
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
|
||||
|
||||
@@ -339,7 +278,7 @@ a failure. This failure may be a result of duplicate entries
|
||||
specified in second argument, or a result of no available vector,
|
||||
or a result of failing to initialize MSI-X table entries.
|
||||
|
||||
5.3.5 API pci_disable_msix
|
||||
5.3.3 API pci_disable_msix
|
||||
|
||||
void pci_disable_msix(struct pci_dev *dev)
|
||||
|
||||
@@ -349,7 +288,7 @@ always call free_irq() on all MSI-X vectors it has done request_irq()
|
||||
on before calling this API. Failure to do so results in a BUG_ON() and
|
||||
a device will be left with MSI-X enabled and leaks its vectors.
|
||||
|
||||
5.3.6 MSI-X mode vs. legacy mode diagram
|
||||
5.3.4 MSI-X mode vs. legacy mode diagram
|
||||
|
||||
The below diagram shows the events which switch the interrupt
|
||||
mode on the MSI-X capable device function between MSI-X mode and
|
||||
@@ -407,7 +346,7 @@ between MSI mod MSI-X mode during a run-time.
|
||||
MSI/MSI-X support requires support from both system hardware and
|
||||
individual hardware device functions.
|
||||
|
||||
5.5.1 System hardware support
|
||||
5.5.1 Required x86 hardware support
|
||||
|
||||
Since the target of MSI address is the local APIC CPU, enabling
|
||||
MSI/MSI-X support in the Linux kernel is dependent on whether existing
|
||||
|
@@ -477,9 +477,9 @@ With this multipage bio design:
|
||||
the same bi_io_vec array, but with the index and size accordingly modified)
|
||||
- A linked list of bios is used as before for unrelated merges (*) - this
|
||||
avoids reallocs and makes independent completions easier to handle.
|
||||
- Code that traverses the req list needs to make a distinction between
|
||||
segments of a request (bio_for_each_segment) and the distinct completion
|
||||
units/bios (rq_for_each_bio).
|
||||
- Code that traverses the req list can find all the segments of a bio
|
||||
by using rq_for_each_segment. This handles the fact that a request
|
||||
has multiple bios, each of which can have multiple segments.
|
||||
- Drivers which can't process a large bio in one shot can use the bi_idx
|
||||
field to keep track of the next bio_vec entry to process.
|
||||
(e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
|
||||
@@ -664,14 +664,14 @@ in lvm or md.
|
||||
|
||||
3.2.1 Traversing segments and completion units in a request
|
||||
|
||||
The macros bio_for_each_segment() and rq_for_each_bio() should be used for
|
||||
traversing the bios in the request list (drivers should avoid directly
|
||||
trying to do it themselves). Using these helpers should also make it easier
|
||||
to cope with block changes in the future.
|
||||
The macro rq_for_each_segment() should be used for traversing the bios
|
||||
in the request list (drivers should avoid directly trying to do it
|
||||
themselves). Using these helpers should also make it easier to cope
|
||||
with block changes in the future.
|
||||
|
||||
rq_for_each_bio(bio, rq)
|
||||
bio_for_each_segment(bio_vec, bio, i)
|
||||
/* bio_vec is now current segment */
|
||||
struct req_iterator iter;
|
||||
rq_for_each_segment(bio_vec, rq, iter)
|
||||
/* bio_vec is now current segment */
|
||||
|
||||
I/O completion callbacks are per-bio rather than per-segment, so drivers
|
||||
that traverse bio chains on completion need to keep that in mind. Drivers
|
||||
|
@@ -86,8 +86,15 @@ extern int sys_ioprio_get(int, int);
|
||||
#error "Unsupported arch"
|
||||
#endif
|
||||
|
||||
_syscall3(int, ioprio_set, int, which, int, who, int, ioprio);
|
||||
_syscall2(int, ioprio_get, int, which, int, who);
|
||||
static inline int ioprio_set(int which, int who, int ioprio)
|
||||
{
|
||||
return syscall(__NR_ioprio_set, which, who, ioprio);
|
||||
}
|
||||
|
||||
static inline int ioprio_get(int which, int who)
|
||||
{
|
||||
return syscall(__NR_ioprio_get, which, who);
|
||||
}
|
||||
|
||||
enum {
|
||||
IOPRIO_CLASS_NONE,
|
||||
|
@@ -150,7 +150,7 @@ Some very frequently asked questions about linuxtv-dvb
|
||||
- saa7146_vv: SAA7146 video and vbi functions. These are only needed
|
||||
for full-featured cards.
|
||||
|
||||
- video-buf: capture helper module for the saa7146_vv driver. This
|
||||
- videobuf-dma-sg: capture helper module for the saa7146_vv driver. This
|
||||
one is responsible to handle capture buffers.
|
||||
|
||||
- dvb-ttpci: The main driver for AV7110 based, full-featured
|
||||
|
@@ -306,3 +306,24 @@ Why: In kernel tree version of driver is unmaintained. Sk98lin driver
|
||||
Who: Stephen Hemminger <shemminger@linux-foundation.org>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i386/x86_64 bzImage symlinks
|
||||
When: April 2008
|
||||
|
||||
Why: The i386/x86_64 merge provides a symlink to the old bzImage
|
||||
location so not yet updated user space tools, e.g. package
|
||||
scripts, do not break.
|
||||
Who: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: shaper network driver
|
||||
When: January 2008
|
||||
Files: drivers/net/shaper.c, include/linux/if_shaper.h
|
||||
Why: This driver has been marked obsolete for many years.
|
||||
It was only designed to work on lower speed links and has design
|
||||
flaws that lead to machine crashes. The qdisc infrastructure in
|
||||
2.4 or later kernels, provides richer features and is more robust.
|
||||
Who: Stephen Hemminger <shemminger@linux-foundation.org>
|
||||
|
||||
---------------------------
|
||||
|
@@ -407,7 +407,7 @@ raiddev /dev/md0
|
||||
device /dev/hda5
|
||||
raid-disk 0
|
||||
device /dev/hdb1
|
||||
raid-disl 1
|
||||
raid-disk 1
|
||||
|
||||
For linear raid, just change the raid-level above to "raid-level linear", for
|
||||
mirrors, change it to "raid-level 1", and for stripe sets with parity, change
|
||||
@@ -457,6 +457,8 @@ ChangeLog
|
||||
|
||||
Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
|
||||
|
||||
2.1.29:
|
||||
- Fix a deadlock when mounting read-write.
|
||||
2.1.28:
|
||||
- Fix a deadlock.
|
||||
2.1.27:
|
||||
|
@@ -99,6 +99,20 @@ Transaction IDs
|
||||
request/response pairs. The upper 32 bits are reserved for use by
|
||||
the kernel and will be overwritten before a MAD is sent.
|
||||
|
||||
P_Key Index Handling
|
||||
|
||||
The old ib_umad interface did not allow setting the P_Key index for
|
||||
MADs that are sent and did not provide a way for obtaining the P_Key
|
||||
index of received MADs. A new layout for struct ib_user_mad_hdr
|
||||
with a pkey_index member has been defined; however, to preserve
|
||||
binary compatibility with older applications, this new layout will
|
||||
not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
|
||||
before a file descriptor is used for anything else.
|
||||
|
||||
In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
|
||||
to 6, the new layout of struct ib_user_mad_hdr will be used by
|
||||
default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
|
||||
|
||||
Setting IsSM Capability Bit
|
||||
|
||||
To set the IsSM capability bit for a port, simply open the
|
||||
|
@@ -1,4 +1,4 @@
|
||||
NOTE:
|
||||
NOTE:
|
||||
This is a version of Documentation/HOWTO translated into Japanese.
|
||||
This document is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
|
||||
and the JF Project team <www.linux.or.jp/JF>.
|
||||
@@ -11,14 +11,14 @@ for non English (read: Japanese) speakers and is not intended as a
|
||||
fork. So if you have any comments or updates for this file, please try
|
||||
to update the original English file first.
|
||||
|
||||
Last Updated: 2007/07/18
|
||||
Last Updated: 2007/09/23
|
||||
==================================
|
||||
これは、
|
||||
linux-2.6.22/Documentation/HOWTO
|
||||
linux-2.6.23/Documentation/HOWTO
|
||||
の和訳です。
|
||||
|
||||
翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
|
||||
翻訳日: 2007/07/16
|
||||
翻訳日: 2007/09/19
|
||||
翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
|
||||
校正者: 松倉さん <nbh--mats at nifty dot com>
|
||||
小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
|
||||
@@ -27,6 +27,7 @@ linux-2.6.22/Documentation/HOWTO
|
||||
野口さん (Kenji Noguchi) <tokyo246 at gmail dot com>
|
||||
河内さん (Takayoshi Kochi) <t-kochi at bq dot jp dot nec dot com>
|
||||
岩本さん (iwamoto) <iwamoto.kn at ncos dot nec dot co dot jp>
|
||||
内田さん (Satoshi Uchida) <s-uchida at ap dot jp dot nec dot com>
|
||||
==================================
|
||||
|
||||
Linux カーネル開発のやり方
|
||||
@@ -40,7 +41,7 @@ Linux カーネル開発コミュニティと共に活動するやり方を学
|
||||
手助けになります。
|
||||
|
||||
もし、このドキュメントのどこかが古くなっていた場合には、このドキュメン
|
||||
トの最後にリストしたメンテナーにパッチを送ってください。
|
||||
トの最後にリストしたメンテナにパッチを送ってください。
|
||||
|
||||
はじめに
|
||||
---------
|
||||
@@ -59,7 +60,7 @@ Linux カーネル開発コミュニティと共に活動するやり方を学
|
||||
ネル開発者には必要です。アーキテクチャ向けの低レベル部分の開発をするの
|
||||
でなければ、(どんなアーキテクチャでも)アセンブリ(訳注: 言語)は必要あり
|
||||
ません。以下の本は、C 言語の十分な知識や何年もの経験に取って代わるもの
|
||||
ではありませんが、少なくともリファレンスとしてはいい本です。
|
||||
ではありませんが、少なくともリファレンスとしては良い本です。
|
||||
- "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
|
||||
-『プログラミング言語C第2版』(B.W. カーニハン/D.M. リッチー著 石田晴久訳) [共立出版]
|
||||
- "Practical C Programming" by Steve Oualline [O'Reilly]
|
||||
@@ -76,7 +77,7 @@ Linux カーネル開発コミュニティと共に活動するやり方を学
|
||||
ときどき、カーネルがツールチェインや C 言語拡張に置いている前提がどう
|
||||
なっているのかわかりにくいことがあり、また、残念なことに決定的なリファ
|
||||
レンスは存在しません。情報を得るには、gcc の info ページ( info gcc )を
|
||||
みてください。
|
||||
見てください。
|
||||
|
||||
あなたは既存の開発コミュニティと一緒に作業する方法を学ぼうとしているこ
|
||||
とに留意してください。そのコミュニティは、コーディング、スタイル、
|
||||
@@ -92,7 +93,7 @@ Linux カーネル開発コミュニティと共に活動するやり方を学
|
||||
|
||||
Linux カーネルのソースコードは GPL ライセンスの下でリリースされていま
|
||||
す。ライセンスの詳細については、ソースツリーのメインディレクトリに存在
|
||||
する、COPYING のファイルをみてください。もしライセンスについてさらに質
|
||||
する、COPYING のファイルを見てください。もしライセンスについてさらに質
|
||||
問があれば、Linux Kernel メーリングリストに質問するのではなく、どうぞ
|
||||
法律家に相談してください。メーリングリストの人達は法律家ではなく、法的
|
||||
問題については彼らの声明はあてにするべきではありません。
|
||||
@@ -109,7 +110,8 @@ Linux カーネルソースツリーは幅広い範囲のドキュメントを
|
||||
新しいドキュメントファイルも追加することを勧めます。
|
||||
カーネルの変更が、カーネルがユーザ空間に公開しているインターフェイスの
|
||||
変更を引き起こす場合、その変更を説明するマニュアルページのパッチや情報
|
||||
をマニュアルページのメンテナ mtk-manpages@gmx.net に送ることを勧めます。
|
||||
をマニュアルページのメンテナ mtk-manpages@gmx.net に送ることを勧めま
|
||||
す。
|
||||
|
||||
以下はカーネルソースツリーに含まれている読んでおくべきファイルの一覧で
|
||||
す-
|
||||
@@ -117,7 +119,7 @@ Linux カーネルソースツリーは幅広い範囲のドキュメントを
|
||||
README
|
||||
このファイルは Linuxカーネルの簡単な背景とカーネルを設定(訳注
|
||||
configure )し、生成(訳注 build )するために必要なことは何かが書かれ
|
||||
ています。カーネルに関して初めての人はここからスタートするとよいで
|
||||
ています。カーネルに関して初めての人はここからスタートすると良いで
|
||||
しょう。
|
||||
|
||||
Documentation/Changes
|
||||
@@ -128,7 +130,7 @@ Linux カーネルソースツリーは幅広い範囲のドキュメントを
|
||||
Documentation/CodingStyle
|
||||
これは Linux カーネルのコーディングスタイルと背景にある理由を記述
|
||||
しています。全ての新しいコードはこのドキュメントにあるガイドライン
|
||||
に従っていることを期待されています。大部分のメンテナーはこれらのルー
|
||||
に従っていることを期待されています。大部分のメンテナはこれらのルー
|
||||
ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード
|
||||
だけをレビューします。
|
||||
|
||||
@@ -168,16 +170,16 @@ Linux カーネルソースツリーは幅広い範囲のドキュメントを
|
||||
支援してください。
|
||||
|
||||
Documentation/ManagementStyle
|
||||
このドキュメントは Linux カーネルのメンテナー達がどう行動するか、
|
||||
このドキュメントは Linux カーネルのメンテナ達がどう行動するか、
|
||||
彼らの手法の背景にある共有されている精神について記述しています。こ
|
||||
れはカーネル開発の初心者なら(もしくは、単に興味があるだけの人でも)
|
||||
重要です。なぜならこのドキュメントは、カーネルメンテナー達の独特な
|
||||
重要です。なぜならこのドキュメントは、カーネルメンテナ達の独特な
|
||||
行動についての多くの誤解や混乱を解消するからです。
|
||||
|
||||
Documentation/stable_kernel_rules.txt
|
||||
このファイルはどのように stable カーネルのリリースが行われるかのルー
|
||||
ルが記述されています。そしてこれらのリリースの中のどこかで変更を取
|
||||
り入れてもらいたい場合に何をすればいいかが示されています。
|
||||
り入れてもらいたい場合に何をすれば良いかが示されています。
|
||||
|
||||
Documentation/kernel-docs.txt
|
||||
カーネル開発に付随する外部ドキュメントのリストです。もしあなたが
|
||||
@@ -218,9 +220,9 @@ web サイトには、コードの構成、サブシステム、現在存在す
|
||||
ここには、また、カーネルのコンパイルのやり方やパッチの当て方などの間接
|
||||
的な基本情報も記述されています。
|
||||
|
||||
あなたがどこからスタートしてよいかわからないが、Linux カーネル開発コミュ
|
||||
あなたがどこからスタートして良いかわからないが、Linux カーネル開発コミュ
|
||||
ニティに参加して何かすることをさがしている場合には、Linux kernel
|
||||
Janitor's プロジェクトにいけばよいでしょう -
|
||||
Janitor's プロジェクトにいけば良いでしょう -
|
||||
http://janitor.kernelnewbies.org/
|
||||
ここはそのようなスタートをするのにうってつけの場所です。ここには、
|
||||
Linux カーネルソースツリーの中に含まれる、きれいにし、修正しなければな
|
||||
@@ -243,7 +245,7 @@ Linux カーネルソースツリーの中に含まれる、きれいにし、
|
||||
自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
|
||||
できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
|
||||
ます-
|
||||
http://sosdg.org/~coywolf/lxr/
|
||||
http://sosdg.org/~qiyong/lxr/
|
||||
|
||||
開発プロセス
|
||||
-----------------------
|
||||
@@ -265,9 +267,9 @@ Linux カーネルの開発プロセスは現在幾つかの異なるメイン
|
||||
以下のとおり-
|
||||
|
||||
- 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
|
||||
この期間中に、メンテナー達は Linus に大きな差分を送ることができま
|
||||
す。このような差分は通常 -mm カーネルに数週間含まれてきたパッチで
|
||||
す。 大きな変更は git(カーネルのソース管理ツール、詳細は
|
||||
この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
|
||||
このような差分は通常 -mm カーネルに数週間含まれてきたパッチです。
|
||||
大きな変更は git(カーネルのソース管理ツール、詳細は
|
||||
http://git.or.cz/ 参照) を使って送るのが好ましいやり方ですが、パッ
|
||||
チファイルの形式のまま送るのでも十分です。
|
||||
|
||||
@@ -285,6 +287,10 @@ Linux カーネルの開発プロセスは現在幾つかの異なるメイン
|
||||
に安定した状態にあると判断したときにリリースされます。目標は毎週新
|
||||
しい -rc カーネルをリリースすることです。
|
||||
|
||||
- 以下の URL で各 -rc リリースに存在する既知の後戻り問題のリスト
|
||||
が追跡されます-
|
||||
http://kernelnewbies.org/known_regressions
|
||||
|
||||
- このプロセスはカーネルが 「準備ができた」と考えられるまで継続しま
|
||||
す。このプロセスはだいたい 6週間継続します。
|
||||
|
||||
@@ -331,8 +337,8 @@ Andrew は個別のサブシステムカーネルツリーとパッチを全て
|
||||
linux-kernel メーリングリストで収集された多数のパッチと同時に一つにま
|
||||
とめます。
|
||||
このツリーは新機能とパッチが検証される場となります。ある期間の間パッチ
|
||||
が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、メ
|
||||
インラインへ入れるように Linus にプッシュします。
|
||||
が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、
|
||||
メインラインへ入れるように Linus にプッシュします。
|
||||
|
||||
メインカーネルツリーに含めるために Linus に送る前に、すべての新しいパッ
|
||||
チが -mm ツリーでテストされることが強く推奨されます。
|
||||
@@ -460,7 +466,7 @@ MAINTAINERS ファイルにリストがありますので参照してくださ
|
||||
せん-
|
||||
彼らはあなたのパッチの行毎にコメントを入れたいので、そのためにはそうす
|
||||
るしかありません。あなたのメールプログラムが空白やタブを圧縮しないよう
|
||||
に確認した方がいいです。最初の良いテストとしては、自分にメールを送って
|
||||
に確認した方が良いです。最初の良いテストとしては、自分にメールを送って
|
||||
みて、そのパッチを自分で当ててみることです。もしそれがうまく行かないな
|
||||
ら、あなたのメールプログラムを直してもらうか、正しく動くように変えるべ
|
||||
きです。
|
||||
@@ -507,14 +513,14 @@ MAINTAINERS ファイルにリストがありますので参照してくださ
|
||||
とも普通のことです。これはあなたのパッチが受け入れられないということで
|
||||
は *ありません*、そしてあなた自身に反対することを意味するのでも *ありま
|
||||
せん*。単に自分のパッチに対して指摘された問題を全て修正して再送すれば
|
||||
いいのです。
|
||||
良いのです。
|
||||
|
||||
|
||||
カーネルコミュニティと企業組織のちがい
|
||||
-----------------------------------------------------------------
|
||||
|
||||
カーネルコミュニティは大部分の伝統的な会社の開発環境とは異ったやり方で
|
||||
動いています。以下は問題を避けるためにできるとよいことののリストです-
|
||||
動いています。以下は問題を避けるためにできると良いことのリストです-
|
||||
|
||||
あなたの提案する変更について言うときのうまい言い方:
|
||||
|
||||
@@ -525,7 +531,7 @@ MAINTAINERS ファイルにリストがありますので参照してくださ
|
||||
- "以下は一連の小さなパッチ群ですが..."
|
||||
- "これは典型的なマシンでの性能を向上させます.."
|
||||
|
||||
やめた方がいい悪い言い方:
|
||||
やめた方が良い悪い言い方:
|
||||
|
||||
- このやり方で AIX/ptx/Solaris ではできたので、できるはずだ
|
||||
- 私はこれを20年もの間やってきた、だから
|
||||
@@ -575,10 +581,10 @@ Linux カーネルコミュニティは、一度に大量のコードの塊を
|
||||
|
||||
1) 小さいパッチはあなたのパッチが適用される見込みを大きくします、カー
|
||||
ネルの人達はパッチが正しいかどうかを確認する時間や労力をかけないか
|
||||
らです。5行のパッチはメンテナがたった1秒見るだけで適用できます。し
|
||||
かし、500行のパッチは、正しいことをレビューするのに数時間かかるかも
|
||||
しれません(時間はパッチのサイズなどにより指数関数に比例してかかりま
|
||||
す)
|
||||
らです。5行のパッチはメンテナがたった1秒見るだけで適用できます。
|
||||
しかし、500行のパッチは、正しいことをレビューするのに数時間かかるか
|
||||
もしれません(時間はパッチのサイズなどにより指数関数に比例してかかり
|
||||
ます)
|
||||
|
||||
小さいパッチは何かあったときにデバッグもとても簡単になります。パッ
|
||||
チを1個1個取り除くのは、とても大きなパッチを当てた後に(かつ、何かお
|
||||
@@ -587,23 +593,23 @@ Linux カーネルコミュニティは、一度に大量のコードの塊を
|
||||
2) 小さいパッチを送るだけでなく、送るまえに、書き直して、シンプルにす
|
||||
る(もしくは、単に順番を変えるだけでも)ことも、とても重要です。
|
||||
|
||||
以下はカーネル開発者の Al Viro のたとえ話しです:
|
||||
以下はカーネル開発者の Al Viro のたとえ話です:
|
||||
|
||||
"生徒の数学の宿題を採点する先生のことを考えてみてください、先
|
||||
生は生徒が解に到達するまでの試行錯誤をみたいとは思わないでしょ
|
||||
う。先生は簡潔な最高の解をみたいのです。良い生徒はこれを知って
|
||||
生は生徒が解に到達するまでの試行錯誤を見たいとは思わないでしょ
|
||||
う。先生は簡潔な最高の解を見たいのです。良い生徒はこれを知って
|
||||
おり、そして最終解の前の中間作業を提出することは決してないので
|
||||
す"
|
||||
|
||||
カーネル開発でもこれは同じです。メンテナー達とレビューア達は、
|
||||
問題を解決する解の背後になる思考プロセスをみたいとは思いません。
|
||||
彼らは単純であざやかな解決方法をみたいのです。
|
||||
カーネル開発でもこれは同じです。メンテナ達とレビューア達は、
|
||||
問題を解決する解の背後になる思考プロセスを見たいとは思いません。
|
||||
彼らは単純であざやかな解決方法を見たいのです。
|
||||
|
||||
あざやかな解を説明するのと、コミュニティと共に仕事をし、未解決の仕事を
|
||||
議論することのバランスをキープするのは難しいかもしれません。
|
||||
ですから、開発プロセスの早期段階で改善のためのフィードバックをもらうよ
|
||||
うにするのもいいですが、変更点を小さい部分に分割して全体ではまだ完成し
|
||||
ていない仕事を(部分的に)取り込んでもらえるようにすることもいいことです。
|
||||
うにするのも良いですが、変更点を小さい部分に分割して全体ではまだ完成し
|
||||
ていない仕事を(部分的に)取り込んでもらえるようにすることも良いことです。
|
||||
|
||||
また、でき上がっていないものや、"将来直す" ようなパッチを、本流に含め
|
||||
てもらうように送っても、それは受け付けられないことを理解してください。
|
||||
@@ -629,7 +635,7 @@ Linux カーネルコミュニティは、一度に大量のコードの塊を
|
||||
- テスト結果
|
||||
|
||||
これについて全てがどのようにあるべきかについての詳細は、以下のドキュメ
|
||||
ントの ChangeLog セクションをみてください-
|
||||
ントの ChangeLog セクションを見てください-
|
||||
"The Perfect Patch"
|
||||
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
|
||||
|
||||
|
@@ -35,6 +35,7 @@ parameter is applicable:
|
||||
APIC APIC support is enabled.
|
||||
APM Advanced Power Management support is enabled.
|
||||
AX25 Appropriate AX.25 support is enabled.
|
||||
BLACKFIN Blackfin architecture is enabled.
|
||||
DRM Direct Rendering Management support is enabled.
|
||||
EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
|
||||
EFI EFI Partitioning (GPT) is enabled
|
||||
@@ -67,6 +68,7 @@ parameter is applicable:
|
||||
PARIDE The ParIDE (parallel port IDE) subsystem is enabled.
|
||||
PARISC The PA-RISC architecture is enabled.
|
||||
PCI PCI bus support is enabled.
|
||||
PCIE PCI Express support is enabled.
|
||||
PCMCIA The PCMCIA subsystem is enabled.
|
||||
PNP Plug & Play support is enabled.
|
||||
PPC PowerPC architecture is enabled.
|
||||
@@ -550,7 +552,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
dtc3181e= [HW,SCSI]
|
||||
|
||||
earlyprintk= [X86-32,X86-64,SH]
|
||||
earlyprintk= [X86-32,X86-64,SH,BLACKFIN]
|
||||
earlyprintk=vga
|
||||
earlyprintk=serial[,ttySn[,baudrate]]
|
||||
|
||||
@@ -863,6 +865,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip
|
||||
Format: addr:<io>,irq:<irq>
|
||||
|
||||
libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
|
||||
when set.
|
||||
Format: <int>
|
||||
|
||||
load_ramdisk= [RAM] List of ramdisks to load from floppy
|
||||
See Documentation/ramdisk.txt.
|
||||
|
||||
@@ -1008,6 +1014,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
meye.*= [HW] Set MotionEye Camera parameters
|
||||
See Documentation/video4linux/meye.txt.
|
||||
|
||||
mfgpt_irq= [IA-32] Specify the IRQ to use for the
|
||||
Multi-Function General Purpose Timers on AMD Geode
|
||||
platforms.
|
||||
|
||||
mga= [HW,DRM]
|
||||
|
||||
mousedev.tap_time=
|
||||
@@ -1079,10 +1089,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
emulation library even if a 387 maths coprocessor
|
||||
is present.
|
||||
|
||||
noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
|
||||
when set.
|
||||
Format: <int>
|
||||
|
||||
noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
|
||||
caches in the slab allocator. Saves per-node memory,
|
||||
but will impact performance.
|
||||
@@ -1159,6 +1165,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
nomce [X86-32] Machine Check Exception
|
||||
|
||||
nomfgpt [X86-32] Disable Multi-Function General Purpose
|
||||
Timer usage (for AMD Geode machines).
|
||||
|
||||
noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops
|
||||
|
||||
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
|
||||
@@ -1269,6 +1278,11 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Mechanism 1.
|
||||
conf2 [X86-32] Force use of PCI Configuration
|
||||
Mechanism 2.
|
||||
noaer [PCIE] If the PCIEAER kernel config parameter is
|
||||
enabled, this kernel boot option can be used to
|
||||
disable the use of PCIE advanced error reporting.
|
||||
nodomains [PCI] Disable support for multiple PCI
|
||||
root domains (aka PCI segments, in ACPI-speak).
|
||||
nommconf [X86-32,X86_64] Disable use of MMCONFIG for PCI
|
||||
Configuration
|
||||
nomsi [MSI] If the PCI_MSI kernel config parameter is
|
||||
@@ -1313,6 +1327,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
IRQ routing is enabled.
|
||||
noacpi [X86-32] Do not use ACPI for IRQ routing
|
||||
or for PCI scanning.
|
||||
use_crs [X86-32] Use _CRS for PCI resource
|
||||
allocation.
|
||||
routeirq Do IRQ routing for all PCI devices.
|
||||
This is normally done in pci_enable_device(),
|
||||
so this option is a temporary workaround
|
||||
@@ -1429,6 +1445,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
pt. [PARIDE]
|
||||
See Documentation/paride.txt.
|
||||
|
||||
pty.legacy_count=
|
||||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||
default number.
|
||||
|
||||
quiet [KNL] Disable most log messages
|
||||
|
||||
r128= [HW,DRM]
|
||||
|
@@ -54,7 +54,6 @@ embedded in larger data structures and replace fields they duplicate.
|
||||
|
||||
struct kobject {
|
||||
const char * k_name;
|
||||
char name[KOBJ_NAME_LEN];
|
||||
struct kref kref;
|
||||
struct list_head entry;
|
||||
struct kobject * parent;
|
||||
@@ -223,18 +222,15 @@ decl_subsys(devices, &ktype_device, &device_uevent_ops);
|
||||
is equivalent to doing:
|
||||
|
||||
struct kset devices_subsys = {
|
||||
.kobj = {
|
||||
.name = "devices",
|
||||
},
|
||||
.ktype = &ktype_devices,
|
||||
.uevent_ops = &device_uevent_ops,
|
||||
};
|
||||
|
||||
kobject_set_name(&devices_subsys, name);
|
||||
|
||||
The objects that are registered with a subsystem that use the
|
||||
subsystem's default list must have their kset ptr set properly. These
|
||||
objects may have embedded kobjects or ksets. The
|
||||
following helpers make setting the kset easier:
|
||||
following helper makes setting the kset easier:
|
||||
|
||||
|
||||
kobj_set_kset_s(obj,subsys)
|
||||
@@ -242,22 +238,8 @@ kobj_set_kset_s(obj,subsys)
|
||||
- Assumes that obj->kobj exists, and is a struct kobject.
|
||||
- Sets the kset of that kobject to the kset <subsys>.
|
||||
|
||||
|
||||
kset_set_kset_s(obj,subsys)
|
||||
|
||||
- Assumes that obj->kset exists, and is a struct kset.
|
||||
- Sets the kset of the embedded kobject to the kset <subsys>.
|
||||
|
||||
subsys_set_kset(obj,subsys)
|
||||
|
||||
- Assumes obj->subsys exists, and is a struct subsystem.
|
||||
- Sets obj->subsys.kset.kobj.kset to the subsystem's embedded kset.
|
||||
|
||||
void subsystem_init(struct kset *s);
|
||||
int subsystem_register(struct kset *s);
|
||||
void subsystem_unregister(struct kset *s);
|
||||
struct kset *subsys_get(struct kset *s);
|
||||
void kset_put(struct kset *s);
|
||||
|
||||
These are just wrappers around the respective kset_* functions.
|
||||
|
||||
|
@@ -46,7 +46,7 @@ typedef uint32_t u32;
|
||||
typedef uint16_t u16;
|
||||
typedef uint8_t u8;
|
||||
#include "../../include/linux/lguest_launcher.h"
|
||||
#include "../../include/asm-i386/e820.h"
|
||||
#include "../../include/asm-x86/e820_32.h"
|
||||
/*:*/
|
||||
|
||||
#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
|
||||
|
@@ -1,766 +0,0 @@
|
||||
HISTORY:
|
||||
February 16/2002 -- revision 0.2.1:
|
||||
COR typo corrected
|
||||
February 10/2002 -- revision 0.2:
|
||||
some spell checking ;->
|
||||
January 12/2002 -- revision 0.1
|
||||
This is still work in progress so may change.
|
||||
To keep up to date please watch this space.
|
||||
|
||||
Introduction to NAPI
|
||||
====================
|
||||
|
||||
NAPI is a proven (www.cyberus.ca/~hadi/usenix-paper.tgz) technique
|
||||
to improve network performance on Linux. For more details please
|
||||
read that paper.
|
||||
NAPI provides a "inherent mitigation" which is bound by system capacity
|
||||
as can be seen from the following data collected by Robert on Gigabit
|
||||
ethernet (e1000):
|
||||
|
||||
Psize Ipps Tput Rxint Txint Done Ndone
|
||||
---------------------------------------------------------------
|
||||
60 890000 409362 17 27622 7 6823
|
||||
128 758150 464364 21 9301 10 7738
|
||||
256 445632 774646 42 15507 21 12906
|
||||
512 232666 994445 241292 19147 241192 1062
|
||||
1024 119061 1000003 872519 19258 872511 0
|
||||
1440 85193 1000003 946576 19505 946569 0
|
||||
|
||||
|
||||
Legend:
|
||||
"Ipps" stands for input packets per second.
|
||||
"Tput" == packets out of total 1M that made it out.
|
||||
"txint" == transmit completion interrupts seen
|
||||
"Done" == The number of times that the poll() managed to pull all
|
||||
packets out of the rx ring. Note from this that the lower the
|
||||
load the more we could clean up the rxring
|
||||
"Ndone" == is the converse of "Done". Note again, that the higher
|
||||
the load the more times we couldn't clean up the rxring.
|
||||
|
||||
Observe that:
|
||||
when the NIC receives 890Kpackets/sec only 17 rx interrupts are generated.
|
||||
The system cant handle the processing at 1 interrupt/packet at that load level.
|
||||
At lower rates on the other hand, rx interrupts go up and therefore the
|
||||
interrupt/packet ratio goes up (as observable from that table). So there is
|
||||
possibility that under low enough input, you get one poll call for each
|
||||
input packet caused by a single interrupt each time. And if the system
|
||||
cant handle interrupt per packet ratio of 1, then it will just have to
|
||||
chug along ....
|
||||
|
||||
|
||||
0) Prerequisites:
|
||||
==================
|
||||
A driver MAY continue using the old 2.4 technique for interfacing
|
||||
to the network stack and not benefit from the NAPI changes.
|
||||
NAPI additions to the kernel do not break backward compatibility.
|
||||
NAPI, however, requires the following features to be available:
|
||||
|
||||
A) DMA ring or enough RAM to store packets in software devices.
|
||||
|
||||
B) Ability to turn off interrupts or maybe events that send packets up
|
||||
the stack.
|
||||
|
||||
NAPI processes packet events in what is known as dev->poll() method.
|
||||
Typically, only packet receive events are processed in dev->poll().
|
||||
The rest of the events MAY be processed by the regular interrupt handler
|
||||
to reduce processing latency (justified also because there are not that
|
||||
many of them).
|
||||
Note, however, NAPI does not enforce that dev->poll() only processes
|
||||
receive events.
|
||||
Tests with the tulip driver indicated slightly increased latency if
|
||||
all of the interrupt handler is moved to dev->poll(). Also MII handling
|
||||
gets a little trickier.
|
||||
The example used in this document is to move the receive processing only
|
||||
to dev->poll(); this is shown with the patch for the tulip driver.
|
||||
For an example of code that moves all the interrupt driver to
|
||||
dev->poll() look at the ported e1000 code.
|
||||
|
||||
There are caveats that might force you to go with moving everything to
|
||||
dev->poll(). Different NICs work differently depending on their status/event
|
||||
acknowledgement setup.
|
||||
There are two types of event register ACK mechanisms.
|
||||
I) what is known as Clear-on-read (COR).
|
||||
when you read the status/event register, it clears everything!
|
||||
The natsemi and sunbmac NICs are known to do this.
|
||||
In this case your only choice is to move all to dev->poll()
|
||||
|
||||
II) Clear-on-write (COW)
|
||||
i) you clear the status by writing a 1 in the bit-location you want.
|
||||
These are the majority of the NICs and work the best with NAPI.
|
||||
Put only receive events in dev->poll(); leave the rest in
|
||||
the old interrupt handler.
|
||||
ii) whatever you write in the status register clears every thing ;->
|
||||
Cant seem to find any supported by Linux which do this. If
|
||||
someone knows such a chip email us please.
|
||||
Move all to dev->poll()
|
||||
|
||||
C) Ability to detect new work correctly.
|
||||
NAPI works by shutting down event interrupts when there's work and
|
||||
turning them on when there's none.
|
||||
New packets might show up in the small window while interrupts were being
|
||||
re-enabled (refer to appendix 2). A packet might sneak in during the period
|
||||
we are enabling interrupts. We only get to know about such a packet when the
|
||||
next new packet arrives and generates an interrupt.
|
||||
Essentially, there is a small window of opportunity for a race condition
|
||||
which for clarity we'll refer to as the "rotting packet".
|
||||
|
||||
This is a very important topic and appendix 2 is dedicated for more
|
||||
discussion.
|
||||
|
||||
Locking rules and environmental guarantees
|
||||
==========================================
|
||||
|
||||
-Guarantee: Only one CPU at any time can call dev->poll(); this is because
|
||||
only one CPU can pick the initial interrupt and hence the initial
|
||||
netif_rx_schedule(dev);
|
||||
- The core layer invokes devices to send packets in a round robin format.
|
||||
This implies receive is totally lockless because of the guarantee that only
|
||||
one CPU is executing it.
|
||||
- contention can only be the result of some other CPU accessing the rx
|
||||
ring. This happens only in close() and suspend() (when these methods
|
||||
try to clean the rx ring);
|
||||
****guarantee: driver authors need not worry about this; synchronization
|
||||
is taken care for them by the top net layer.
|
||||
-local interrupts are enabled (if you dont move all to dev->poll()). For
|
||||
example link/MII and txcomplete continue functioning just same old way.
|
||||
This improves the latency of processing these events. It is also assumed that
|
||||
the receive interrupt is the largest cause of noise. Note this might not
|
||||
always be true.
|
||||
[according to Manfred Spraul, the winbond insists on sending one
|
||||
txmitcomplete interrupt for each packet (although this can be mitigated)].
|
||||
For these broken drivers, move all to dev->poll().
|
||||
|
||||
For the rest of this text, we'll assume that dev->poll() only
|
||||
processes receive events.
|
||||
|
||||
new methods introduce by NAPI
|
||||
=============================
|
||||
|
||||
a) netif_rx_schedule(dev)
|
||||
Called by an IRQ handler to schedule a poll for device
|
||||
|
||||
b) netif_rx_schedule_prep(dev)
|
||||
puts the device in a state which allows for it to be added to the
|
||||
CPU polling list if it is up and running. You can look at this as
|
||||
the first half of netif_rx_schedule(dev) above; the second half
|
||||
being c) below.
|
||||
|
||||
c) __netif_rx_schedule(dev)
|
||||
Add device to the poll list for this CPU; assuming that _prep above
|
||||
has already been called and returned 1.
|
||||
|
||||
d) netif_rx_reschedule(dev, undo)
|
||||
Called to reschedule polling for device specifically for some
|
||||
deficient hardware. Read Appendix 2 for more details.
|
||||
|
||||
e) netif_rx_complete(dev)
|
||||
|
||||
Remove interface from the CPU poll list: it must be in the poll list
|
||||
on current cpu. This primitive is called by dev->poll(), when
|
||||
it completes its work. The device cannot be out of poll list at this
|
||||
call, if it is then clearly it is a BUG(). You'll know ;->
|
||||
|
||||
All of the above methods are used below, so keep reading for clarity.
|
||||
|
||||
Device driver changes to be made when porting NAPI
|
||||
==================================================
|
||||
|
||||
Below we describe what kind of changes are required for NAPI to work.
|
||||
|
||||
1) introduction of dev->poll() method
|
||||
=====================================
|
||||
|
||||
This is the method that is invoked by the network core when it requests
|
||||
for new packets from the driver. A driver is allowed to send upto
|
||||
dev->quota packets by the current CPU before yielding to the network
|
||||
subsystem (so other devices can also get opportunity to send to the stack).
|
||||
|
||||
dev->poll() prototype looks as follows:
|
||||
int my_poll(struct net_device *dev, int *budget)
|
||||
|
||||
budget is the remaining number of packets the network subsystem on the
|
||||
current CPU can send up the stack before yielding to other system tasks.
|
||||
*Each driver is responsible for decrementing budget by the total number of
|
||||
packets sent.
|
||||
Total number of packets cannot exceed dev->quota.
|
||||
|
||||
dev->poll() method is invoked by the top layer, the driver just sends if it
|
||||
can to the stack the packet quantity requested.
|
||||
|
||||
more on dev->poll() below after the interrupt changes are explained.
|
||||
|
||||
2) registering dev->poll() method
|
||||
===================================
|
||||
|
||||
dev->poll should be set in the dev->probe() method.
|
||||
e.g:
|
||||
dev->open = my_open;
|
||||
.
|
||||
.
|
||||
/* two new additions */
|
||||
/* first register my poll method */
|
||||
dev->poll = my_poll;
|
||||
/* next register my weight/quanta; can be overridden in /proc */
|
||||
dev->weight = 16;
|
||||
.
|
||||
.
|
||||
dev->stop = my_close;
|
||||
|
||||
|
||||
|
||||
3) scheduling dev->poll()
|
||||
=============================
|
||||
This involves modifying the interrupt handler and the code
|
||||
path which takes the packet off the NIC and sends them to the
|
||||
stack.
|
||||
|
||||
it's important at this point to introduce the classical D Becker
|
||||
interrupt processor:
|
||||
|
||||
------------------
|
||||
static irqreturn_t
|
||||
netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs)
|
||||
{
|
||||
|
||||
struct net_device *dev = (struct net_device *)dev_instance;
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
|
||||
int work_count = my_work_count;
|
||||
status = read_interrupt_status_reg();
|
||||
if (status == 0)
|
||||
return IRQ_NONE; /* Shared IRQ: not us */
|
||||
if (status == 0xffff)
|
||||
return IRQ_HANDLED; /* Hot unplug */
|
||||
if (status & error)
|
||||
do_some_error_handling()
|
||||
|
||||
do {
|
||||
acknowledge_ints_ASAP();
|
||||
|
||||
if (status & link_interrupt) {
|
||||
spin_lock(&tp->link_lock);
|
||||
do_some_link_stat_stuff();
|
||||
spin_lock(&tp->link_lock);
|
||||
}
|
||||
|
||||
if (status & rx_interrupt) {
|
||||
receive_packets(dev);
|
||||
}
|
||||
|
||||
if (status & rx_nobufs) {
|
||||
make_rx_buffs_avail();
|
||||
}
|
||||
|
||||
if (status & tx_related) {
|
||||
spin_lock(&tp->lock);
|
||||
tx_ring_free(dev);
|
||||
if (tx_died)
|
||||
restart_tx();
|
||||
spin_unlock(&tp->lock);
|
||||
}
|
||||
|
||||
status = read_interrupt_status_reg();
|
||||
|
||||
} while (!(status & error) || more_work_to_be_done);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
We now change this to what is shown below to NAPI-enable it:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
static irqreturn_t
|
||||
netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs)
|
||||
{
|
||||
struct net_device *dev = (struct net_device *)dev_instance;
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
|
||||
status = read_interrupt_status_reg();
|
||||
if (status == 0)
|
||||
return IRQ_NONE; /* Shared IRQ: not us */
|
||||
if (status == 0xffff)
|
||||
return IRQ_HANDLED; /* Hot unplug */
|
||||
if (status & error)
|
||||
do_some_error_handling();
|
||||
|
||||
do {
|
||||
/************************ start note *********************************/
|
||||
acknowledge_ints_ASAP(); // dont ack rx and rxnobuff here
|
||||
/************************ end note *********************************/
|
||||
|
||||
if (status & link_interrupt) {
|
||||
spin_lock(&tp->link_lock);
|
||||
do_some_link_stat_stuff();
|
||||
spin_unlock(&tp->link_lock);
|
||||
}
|
||||
/************************ start note *********************************/
|
||||
if (status & rx_interrupt || (status & rx_nobuffs)) {
|
||||
if (netif_rx_schedule_prep(dev)) {
|
||||
|
||||
/* disable interrupts caused
|
||||
* by arriving packets */
|
||||
disable_rx_and_rxnobuff_ints();
|
||||
/* tell system we have work to be done. */
|
||||
__netif_rx_schedule(dev);
|
||||
} else {
|
||||
printk("driver bug! interrupt while in poll\n");
|
||||
/* FIX by disabling interrupts */
|
||||
disable_rx_and_rxnobuff_ints();
|
||||
}
|
||||
}
|
||||
/************************ end note note *********************************/
|
||||
|
||||
if (status & tx_related) {
|
||||
spin_lock(&tp->lock);
|
||||
tx_ring_free(dev);
|
||||
|
||||
if (tx_died)
|
||||
restart_tx();
|
||||
spin_unlock(&tp->lock);
|
||||
}
|
||||
|
||||
status = read_interrupt_status_reg();
|
||||
|
||||
/************************ start note *********************************/
|
||||
} while (!(status & error) || more_work_to_be_done(status));
|
||||
/************************ end note note *********************************/
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
||||
We note several things from above:
|
||||
|
||||
I) Any interrupt source which is caused by arriving packets is now
|
||||
turned off when it occurs. Depending on the hardware, there could be
|
||||
several reasons that arriving packets would cause interrupts; these are the
|
||||
interrupt sources we wish to avoid. The two common ones are a) a packet
|
||||
arriving (rxint) b) a packet arriving and finding no DMA buffers available
|
||||
(rxnobuff) .
|
||||
This means also acknowledge_ints_ASAP() will not clear the status
|
||||
register for those two items above; clearing is done in the place where
|
||||
proper work is done within NAPI; at the poll() and refill_rx_ring()
|
||||
discussed further below.
|
||||
netif_rx_schedule_prep() returns 1 if device is in running state and
|
||||
gets successfully added to the core poll list. If we get a zero value
|
||||
we can _almost_ assume are already added to the list (instead of not running.
|
||||
Logic based on the fact that you shouldn't get interrupt if not running)
|
||||
We rectify this by disabling rx and rxnobuf interrupts.
|
||||
|
||||
II) that receive_packets(dev) and make_rx_buffs_avail() may have disappeared.
|
||||
These functionalities are still around actually......
|
||||
|
||||
infact, receive_packets(dev) is very close to my_poll() and
|
||||
make_rx_buffs_avail() is invoked from my_poll()
|
||||
|
||||
4) converting receive_packets() to dev->poll()
|
||||
===============================================
|
||||
|
||||
We need to convert the classical D Becker receive_packets(dev) to my_poll()
|
||||
|
||||
First the typical receive_packets() below:
|
||||
-------------------------------------------------------------------
|
||||
|
||||
/* this is called by interrupt handler */
|
||||
static void receive_packets (struct net_device *dev)
|
||||
{
|
||||
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
rx_ring = tp->rx_ring;
|
||||
cur_rx = tp->cur_rx;
|
||||
int entry = cur_rx % RX_RING_SIZE;
|
||||
int received = 0;
|
||||
int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
|
||||
|
||||
while (rx_ring_not_empty) {
|
||||
u32 rx_status;
|
||||
unsigned int rx_size;
|
||||
unsigned int pkt_size;
|
||||
struct sk_buff *skb;
|
||||
/* read size+status of next frame from DMA ring buffer */
|
||||
/* the number 16 and 4 are just examples */
|
||||
rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
|
||||
rx_size = rx_status >> 16;
|
||||
pkt_size = rx_size - 4;
|
||||
|
||||
/* process errors */
|
||||
if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
|
||||
(!(rx_status & RxStatusOK))) {
|
||||
netdrv_rx_err (rx_status, dev, tp, ioaddr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (--rx_work_limit < 0)
|
||||
break;
|
||||
|
||||
/* grab a skb */
|
||||
skb = dev_alloc_skb (pkt_size + 2);
|
||||
if (skb) {
|
||||
.
|
||||
.
|
||||
netif_rx (skb);
|
||||
.
|
||||
.
|
||||
} else { /* OOM */
|
||||
/*seems very driver specific ... some just pass
|
||||
whatever is on the ring already. */
|
||||
}
|
||||
|
||||
/* move to the next skb on the ring */
|
||||
entry = (++tp->cur_rx) % RX_RING_SIZE;
|
||||
received++ ;
|
||||
|
||||
}
|
||||
|
||||
/* store current ring pointer state */
|
||||
tp->cur_rx = cur_rx;
|
||||
|
||||
/* Refill the Rx ring buffers if they are needed */
|
||||
refill_rx_ring();
|
||||
.
|
||||
.
|
||||
|
||||
}
|
||||
-------------------------------------------------------------------
|
||||
We change it to a new one below; note the additional parameter in
|
||||
the call.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
/* this is called by the network core */
|
||||
static int my_poll (struct net_device *dev, int *budget)
|
||||
{
|
||||
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
rx_ring = tp->rx_ring;
|
||||
cur_rx = tp->cur_rx;
|
||||
int entry = cur_rx % RX_BUF_LEN;
|
||||
/* maximum packets to send to the stack */
|
||||
/************************ note note *********************************/
|
||||
int rx_work_limit = dev->quota;
|
||||
|
||||
/************************ end note note *********************************/
|
||||
do { // outer beginning loop starts here
|
||||
|
||||
clear_rx_status_register_bit();
|
||||
|
||||
while (rx_ring_not_empty) {
|
||||
u32 rx_status;
|
||||
unsigned int rx_size;
|
||||
unsigned int pkt_size;
|
||||
struct sk_buff *skb;
|
||||
/* read size+status of next frame from DMA ring buffer */
|
||||
/* the number 16 and 4 are just examples */
|
||||
rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
|
||||
rx_size = rx_status >> 16;
|
||||
pkt_size = rx_size - 4;
|
||||
|
||||
/* process errors */
|
||||
if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
|
||||
(!(rx_status & RxStatusOK))) {
|
||||
netdrv_rx_err (rx_status, dev, tp, ioaddr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/************************ note note *********************************/
|
||||
if (--rx_work_limit < 0) { /* we got packets, but no quota */
|
||||
/* store current ring pointer state */
|
||||
tp->cur_rx = cur_rx;
|
||||
|
||||
/* Refill the Rx ring buffers if they are needed */
|
||||
refill_rx_ring(dev);
|
||||
goto not_done;
|
||||
}
|
||||
/********************** end note **********************************/
|
||||
|
||||
/* grab a skb */
|
||||
skb = dev_alloc_skb (pkt_size + 2);
|
||||
if (skb) {
|
||||
.
|
||||
.
|
||||
/************************ note note *********************************/
|
||||
netif_receive_skb (skb);
|
||||
/********************** end note **********************************/
|
||||
.
|
||||
.
|
||||
} else { /* OOM */
|
||||
/*seems very driver specific ... common is just pass
|
||||
whatever is on the ring already. */
|
||||
}
|
||||
|
||||
/* move to the next skb on the ring */
|
||||
entry = (++tp->cur_rx) % RX_RING_SIZE;
|
||||
received++ ;
|
||||
|
||||
}
|
||||
|
||||
/* store current ring pointer state */
|
||||
tp->cur_rx = cur_rx;
|
||||
|
||||
/* Refill the Rx ring buffers if they are needed */
|
||||
refill_rx_ring(dev);
|
||||
|
||||
/* no packets on ring; but new ones can arrive since we last
|
||||
checked */
|
||||
status = read_interrupt_status_reg();
|
||||
if (rx status is not set) {
|
||||
/* If something arrives in this narrow window,
|
||||
an interrupt will be generated */
|
||||
goto done;
|
||||
}
|
||||
/* done! at least that's what it looks like ;->
|
||||
if new packets came in after our last check on status bits
|
||||
they'll be caught by the while check and we go back and clear them
|
||||
since we havent exceeded our quota */
|
||||
} while (rx_status_is_set);
|
||||
|
||||
done:
|
||||
|
||||
/************************ note note *********************************/
|
||||
dev->quota -= received;
|
||||
*budget -= received;
|
||||
|
||||
/* If RX ring is not full we are out of memory. */
|
||||
if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
goto oom;
|
||||
|
||||
/* we are happy/done, no more packets on ring; put us back
|
||||
to where we can start processing interrupts again */
|
||||
netif_rx_complete(dev);
|
||||
enable_rx_and_rxnobuf_ints();
|
||||
|
||||
/* The last op happens after poll completion. Which means the following:
|
||||
* 1. it can race with disabling irqs in irq handler (which are done to
|
||||
* schedule polls)
|
||||
* 2. it can race with dis/enabling irqs in other poll threads
|
||||
* 3. if an irq raised after the beginning of the outer beginning
|
||||
* loop (marked in the code above), it will be immediately
|
||||
* triggered here.
|
||||
*
|
||||
* Summarizing: the logic may result in some redundant irqs both
|
||||
* due to races in masking and due to too late acking of already
|
||||
* processed irqs. The good news: no events are ever lost.
|
||||
*/
|
||||
|
||||
return 0; /* done */
|
||||
|
||||
not_done:
|
||||
if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
|
||||
tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
refill_rx_ring(dev);
|
||||
|
||||
if (!received) {
|
||||
printk("received==0\n");
|
||||
received = 1;
|
||||
}
|
||||
dev->quota -= received;
|
||||
*budget -= received;
|
||||
return 1; /* not_done */
|
||||
|
||||
oom:
|
||||
/* Start timer, stop polling, but do not enable rx interrupts. */
|
||||
start_poll_timer(dev);
|
||||
return 0; /* we'll take it from here so tell core "done"*/
|
||||
|
||||
/************************ End note note *********************************/
|
||||
}
|
||||
-------------------------------------------------------------------
|
||||
|
||||
From above we note that:
|
||||
0) rx_work_limit = dev->quota
|
||||
1) refill_rx_ring() is in charge of clearing the bit for rxnobuff when
|
||||
it does the work.
|
||||
2) We have a done and not_done state.
|
||||
3) instead of netif_rx() we call netif_receive_skb() to pass the skb.
|
||||
4) we have a new way of handling oom condition
|
||||
5) A new outer for (;;) loop has been added. This serves the purpose of
|
||||
ensuring that if a new packet has come in, after we are all set and done,
|
||||
and we have not exceeded our quota that we continue sending packets up.
|
||||
|
||||
|
||||
-----------------------------------------------------------
|
||||
Poll timer code will need to do the following:
|
||||
|
||||
a)
|
||||
|
||||
if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
|
||||
tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
refill_rx_ring(dev);
|
||||
|
||||
/* If RX ring is not full we are still out of memory.
|
||||
Restart the timer again. Else we re-add ourselves
|
||||
to the master poll list.
|
||||
*/
|
||||
|
||||
if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
restart_timer();
|
||||
|
||||
else netif_rx_schedule(dev); /* we are back on the poll list */
|
||||
|
||||
5) dev->close() and dev->suspend() issues
|
||||
==========================================
|
||||
The driver writer needn't worry about this; the top net layer takes
|
||||
care of it.
|
||||
|
||||
6) Adding new Stats to /proc
|
||||
=============================
|
||||
In order to debug some of the new features, we introduce new stats
|
||||
that need to be collected.
|
||||
TODO: Fill this later.
|
||||
|
||||
APPENDIX 1: discussion on using ethernet HW FC
|
||||
==============================================
|
||||
Most chips with FC only send a pause packet when they run out of Rx buffers.
|
||||
Since packets are pulled off the DMA ring by a softirq in NAPI,
|
||||
if the system is slow in grabbing them and we have a high input
|
||||
rate (faster than the system's capacity to remove packets), then theoretically
|
||||
there will only be one rx interrupt for all packets during a given packetstorm.
|
||||
Under low load, we might have a single interrupt per packet.
|
||||
FC should be programmed to apply in the case when the system cant pull out
|
||||
packets fast enough i.e send a pause only when you run out of rx buffers.
|
||||
Note FC in itself is a good solution but we have found it to not be
|
||||
much of a commodity feature (both in NICs and switches) and hence falls
|
||||
under the same category as using NIC based mitigation. Also, experiments
|
||||
indicate that it's much harder to resolve the resource allocation
|
||||
issue (aka lazy receiving that NAPI offers) and hence quantify its usefulness
|
||||
proved harder. In any case, FC works even better with NAPI but is not
|
||||
necessary.
|
||||
|
||||
|
||||
APPENDIX 2: the "rotting packet" race-window avoidance scheme
|
||||
=============================================================
|
||||
|
||||
There are two types of associations seen here
|
||||
|
||||
1) status/int which honors level triggered IRQ
|
||||
|
||||
If a status bit for receive or rxnobuff is set and the corresponding
|
||||
interrupt-enable bit is not on, then no interrupts will be generated. However,
|
||||
as soon as the "interrupt-enable" bit is unmasked, an immediate interrupt is
|
||||
generated. [assuming the status bit was not turned off].
|
||||
Generally the concept of level triggered IRQs in association with a status and
|
||||
interrupt-enable CSR register set is used to avoid the race.
|
||||
|
||||
If we take the example of the tulip:
|
||||
"pending work" is indicated by the status bit(CSR5 in tulip).
|
||||
the corresponding interrupt bit (CSR7 in tulip) might be turned off (but
|
||||
the CSR5 will continue to be turned on with new packet arrivals even if
|
||||
we clear it the first time)
|
||||
Very important is the fact that if we turn on the interrupt bit on when
|
||||
status is set that an immediate irq is triggered.
|
||||
|
||||
If we cleared the rx ring and proclaimed there was "no more work
|
||||
to be done" and then went on to do a few other things; then when we enable
|
||||
interrupts, there is a possibility that a new packet might sneak in during
|
||||
this phase. It helps to look at the pseudo code for the tulip poll
|
||||
routine:
|
||||
|
||||
--------------------------
|
||||
do {
|
||||
ACK;
|
||||
while (ring_is_not_empty()) {
|
||||
work-work-work
|
||||
if quota is exceeded: exit, no touching irq status/mask
|
||||
}
|
||||
/* No packets, but new can arrive while we are doing this*/
|
||||
CSR5 := read
|
||||
if (CSR5 is not set) {
|
||||
/* If something arrives in this narrow window here,
|
||||
* where the comments are ;-> irq will be generated */
|
||||
unmask irqs;
|
||||
exit poll;
|
||||
}
|
||||
} while (rx_status_is_set);
|
||||
------------------------
|
||||
|
||||
CSR5 bit of interest is only the rx status.
|
||||
If you look at the last if statement:
|
||||
you just finished grabbing all the packets from the rx ring .. you check if
|
||||
status bit says there are more packets just in ... it says none; you then
|
||||
enable rx interrupts again; if a new packet just came in during this check,
|
||||
we are counting that CSR5 will be set in that small window of opportunity
|
||||
and that by re-enabling interrupts, we would actually trigger an interrupt
|
||||
to register the new packet for processing.
|
||||
|
||||
[The above description nay be very verbose, if you have better wording
|
||||
that will make this more understandable, please suggest it.]
|
||||
|
||||
2) non-capable hardware
|
||||
|
||||
These do not generally respect level triggered IRQs. Normally,
|
||||
irqs may be lost while being masked and the only way to leave poll is to do
|
||||
a double check for new input after netif_rx_complete() is invoked
|
||||
and re-enable polling (after seeing this new input).
|
||||
|
||||
Sample code:
|
||||
|
||||
---------
|
||||
.
|
||||
.
|
||||
restart_poll:
|
||||
while (ring_is_not_empty()) {
|
||||
work-work-work
|
||||
if quota is exceeded: exit, not touching irq status/mask
|
||||
}
|
||||
.
|
||||
.
|
||||
.
|
||||
enable_rx_interrupts()
|
||||
netif_rx_complete(dev);
|
||||
if (ring_has_new_packet() && netif_rx_reschedule(dev, received)) {
|
||||
disable_rx_and_rxnobufs()
|
||||
goto restart_poll
|
||||
} while (rx_status_is_set);
|
||||
---------
|
||||
|
||||
Basically netif_rx_complete() removes us from the poll list, but because a
|
||||
new packet which will never be caught due to the possibility of a race
|
||||
might come in, we attempt to re-add ourselves to the poll list.
|
||||
|
||||
|
||||
|
||||
|
||||
APPENDIX 3: Scheduling issues.
|
||||
==============================
|
||||
As seen NAPI moves processing to softirq level. Linux uses the ksoftirqd as the
|
||||
general solution to schedule softirq's to run before next interrupt and by putting
|
||||
them under scheduler control. Also this prevents consecutive softirq's from
|
||||
monopolize the CPU. This also have the effect that the priority of ksoftirq needs
|
||||
to be considered when running very CPU-intensive applications and networking to
|
||||
get the proper balance of softirq/user balance. Increasing ksoftirq priority to 0
|
||||
(eventually more) is reported cure problems with low network performance at high
|
||||
CPU load.
|
||||
|
||||
Most used processes in a GIGE router:
|
||||
USER PID %CPU %MEM SIZE RSS TTY STAT START TIME COMMAND
|
||||
root 3 0.2 0.0 0 0 ? RWN Aug 15 602:00 (ksoftirqd_CPU0)
|
||||
root 232 0.0 7.9 41400 40884 ? S Aug 15 74:12 gated
|
||||
|
||||
--------------------------------------------------------------------
|
||||
|
||||
relevant sites:
|
||||
==================
|
||||
ftp://robur.slu.se/pub/Linux/net-development/NAPI/
|
||||
|
||||
|
||||
--------------------------------------------------------------------
|
||||
TODO: Write net-skeleton.c driver.
|
||||
-------------------------------------------------------------
|
||||
|
||||
Authors:
|
||||
========
|
||||
Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
|
||||
Jamal Hadi Salim <hadi@cyberus.ca>
|
||||
Robert Olsson <Robert.Olsson@data.slu.se>
|
||||
|
||||
Acknowledgements:
|
||||
================
|
||||
People who made this document better:
|
||||
|
||||
Lennert Buytenhek <buytenh@gnu.org>
|
||||
Andrew Morton <akpm@zip.com.au>
|
||||
Manfred Spraul <manfred@colorfullife.com>
|
||||
Donald Becker <becker@scyld.com>
|
||||
Jeff Garzik <jgarzik@pobox.com>
|
@@ -38,8 +38,13 @@ Socket options
|
||||
DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
|
||||
service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
|
||||
the socket will fall back to 0 (which means that no meaningful service code
|
||||
is present). Connecting sockets set at most one service option; for
|
||||
listening sockets, multiple service codes can be specified.
|
||||
is present). On active sockets this is set before connect(); specifying more
|
||||
than one code has no effect (all subsequent service codes are ignored). The
|
||||
case is different for passive sockets, where multiple service codes (up to 32)
|
||||
can be set before calling bind().
|
||||
|
||||
DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
|
||||
size (application payload size) in bytes, see RFC 4340, section 14.
|
||||
|
||||
DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
|
||||
partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
|
||||
@@ -50,12 +55,13 @@ be enabled at the receiver, too with suitable choice of CsCov.
|
||||
DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the
|
||||
range 0..15 are acceptable. The default setting is 0 (full coverage),
|
||||
values between 1..15 indicate partial coverage.
|
||||
DCCP_SOCKOPT_SEND_CSCOV is for the receiver and has a different meaning: it
|
||||
DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it
|
||||
sets a threshold, where again values 0..15 are acceptable. The default
|
||||
of 0 means that all packets with a partial coverage will be discarded.
|
||||
Values in the range 1..15 indicate that packets with minimally such a
|
||||
coverage value are also acceptable. The higher the number, the more
|
||||
restrictive this setting (see [RFC 4340, sec. 9.2.1]).
|
||||
restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage
|
||||
settings are inherited to the child socket after accept().
|
||||
|
||||
The following two options apply to CCID 3 exclusively and are getsockopt()-only.
|
||||
In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
|
||||
@@ -112,9 +118,14 @@ tx_qlen = 5
|
||||
The size of the transmit buffer in packets. A value of 0 corresponds
|
||||
to an unbounded transmit buffer.
|
||||
|
||||
sync_ratelimit = 125 ms
|
||||
The timeout between subsequent DCCP-Sync packets sent in response to
|
||||
sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
|
||||
of this parameter is milliseconds; a value of 0 disables rate-limiting.
|
||||
|
||||
Notes
|
||||
=====
|
||||
|
||||
DCCP does not travel through NAT successfully at present on many boxes. This is
|
||||
because the checksum covers the psuedo-header as per TCP and UDP. Linux NAT
|
||||
because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
|
||||
support for DCCP has been added.
|
||||
|
@@ -1,52 +0,0 @@
|
||||
The Digi International RightSwitch SE-X (dgrs) Device Driver
|
||||
|
||||
This is a Linux driver for the Digi International RightSwitch SE-X
|
||||
EISA and PCI boards. These are 4 (EISA) or 6 (PCI) port Ethernet
|
||||
switches and a NIC combined into a single board. This driver can
|
||||
be compiled into the kernel statically or as a loadable module.
|
||||
|
||||
There is also a companion management tool, called "xrightswitch".
|
||||
The management tool lets you watch the performance graphically,
|
||||
as well as set the SNMP agent IP and IPX addresses, IEEE Spanning
|
||||
Tree, and Aging time. These can also be set from the command line
|
||||
when the driver is loaded. The driver command line options are:
|
||||
|
||||
debug=NNN Debug printing level
|
||||
dma=0/1 Disable/Enable DMA on PCI card
|
||||
spantree=0/1 Disable/Enable IEEE spanning tree
|
||||
hashexpire=NNN Change address aging time (default 300 seconds)
|
||||
ipaddr=A,B,C,D Set SNMP agent IP address i.e. 199,86,8,221
|
||||
iptrap=A,B,C,D Set SNMP agent IP trap address i.e. 199,86,8,221
|
||||
ipxnet=NNN Set SNMP agent IPX network number
|
||||
nicmode=0/1 Disable/Enable multiple NIC mode
|
||||
|
||||
There is also a tool for setting up input and output packet filters
|
||||
on each port, called "dgrsfilt".
|
||||
|
||||
Both the management tool and the filtering tool are available
|
||||
separately from the following FTP site:
|
||||
|
||||
ftp://ftp.dgii.com/drivers/rightswitch/linux/
|
||||
|
||||
When nicmode=1, the board and driver operate as 4 or 6 individual
|
||||
NIC ports (eth0...eth5) instead of as a switch. All switching
|
||||
functions are disabled. In the future, the board firmware may include
|
||||
a routing cache when in this mode.
|
||||
|
||||
Copyright 1995-1996 Digi International Inc.
|
||||
|
||||
This software may be used and distributed according to the terms
|
||||
of the GNU General Public License, incorporated herein by reference.
|
||||
|
||||
For information on purchasing a RightSwitch SE-4 or SE-6
|
||||
board, please contact Digi's sales department at 1-612-912-3444
|
||||
or 1-800-DIGIBRD. Outside the U.S., please check our Web page at:
|
||||
|
||||
http://www.dgii.com
|
||||
|
||||
for sales offices worldwide. Tech support is also available through
|
||||
the channels listed on the Web site, although as long as I am
|
||||
employed on networking products at Digi I will be happy to provide
|
||||
any bug fixes that may be needed.
|
||||
|
||||
-Rick Richardson, rick@dgii.com
|
@@ -180,13 +180,20 @@ tcp_fin_timeout - INTEGER
|
||||
to live longer. Cf. tcp_max_orphans.
|
||||
|
||||
tcp_frto - INTEGER
|
||||
Enables F-RTO, an enhanced recovery algorithm for TCP retransmission
|
||||
Enables Forward RTO-Recovery (F-RTO) defined in RFC4138.
|
||||
F-RTO is an enhanced recovery algorithm for TCP retransmission
|
||||
timeouts. It is particularly beneficial in wireless environments
|
||||
where packet loss is typically due to random radio interference
|
||||
rather than intermediate router congestion. If set to 1, basic
|
||||
version is enabled. 2 enables SACK enhanced F-RTO, which is
|
||||
EXPERIMENTAL. The basic version can be used also when SACK is
|
||||
enabled for a flow through tcp_sack sysctl.
|
||||
rather than intermediate router congestion. FRTO is sender-side
|
||||
only modification. Therefore it does not require any support from
|
||||
the peer, but in a typical case, however, where wireless link is
|
||||
the local access link and most of the data flows downlink, the
|
||||
faraway servers should have FRTO enabled to take advantage of it.
|
||||
If set to 1, basic version is enabled. 2 enables SACK enhanced
|
||||
F-RTO if flow uses SACK. The basic version can be used also when
|
||||
SACK is in use though scenario(s) with it exists where FRTO
|
||||
interacts badly with the packet counting of the SACK enabled TCP
|
||||
flow.
|
||||
|
||||
tcp_frto_response - INTEGER
|
||||
When F-RTO has detected that a TCP retransmission timeout was
|
||||
|
@@ -13,15 +13,35 @@ The radiotap format is discussed in
|
||||
./Documentation/networking/radiotap-headers.txt.
|
||||
|
||||
Despite 13 radiotap argument types are currently defined, most only make sense
|
||||
to appear on received packets. Currently three kinds of argument are used by
|
||||
the injection code, although it knows to skip any other arguments that are
|
||||
present (facilitating replay of captured radiotap headers directly):
|
||||
to appear on received packets. The following information is parsed from the
|
||||
radiotap headers and used to control injection:
|
||||
|
||||
- IEEE80211_RADIOTAP_RATE - u8 arg in 500kbps units (0x02 --> 1Mbps)
|
||||
* IEEE80211_RADIOTAP_RATE
|
||||
|
||||
- IEEE80211_RADIOTAP_ANTENNA - u8 arg, 0x00 = ant1, 0x01 = ant2
|
||||
rate in 500kbps units, automatic if invalid or not present
|
||||
|
||||
- IEEE80211_RADIOTAP_DBM_TX_POWER - u8 arg, dBm
|
||||
|
||||
* IEEE80211_RADIOTAP_ANTENNA
|
||||
|
||||
antenna to use, automatic if not present
|
||||
|
||||
|
||||
* IEEE80211_RADIOTAP_DBM_TX_POWER
|
||||
|
||||
transmit power in dBm, automatic if not present
|
||||
|
||||
|
||||
* IEEE80211_RADIOTAP_FLAGS
|
||||
|
||||
IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated
|
||||
IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available
|
||||
IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the
|
||||
current fragmentation threshold. Note that
|
||||
this flag is only reliable when software
|
||||
fragmentation is enabled)
|
||||
|
||||
The injection code can also skip all other currently defined radiotap fields
|
||||
facilitating replay of captured radiotap headers directly.
|
||||
|
||||
Here is an example valid radiotap header defining these three parameters
|
||||
|
||||
|
@@ -3,6 +3,10 @@ started by Ingo Molnar <mingo@redhat.com>, 2001.09.17
|
||||
2.6 port and netpoll api by Matt Mackall <mpm@selenic.com>, Sep 9 2003
|
||||
|
||||
Please send bug reports to Matt Mackall <mpm@selenic.com>
|
||||
and Satyam Sharma <satyam.sharma@gmail.com>
|
||||
|
||||
Introduction:
|
||||
=============
|
||||
|
||||
This module logs kernel printk messages over UDP allowing debugging of
|
||||
problem where disk logging fails and serial consoles are impractical.
|
||||
@@ -13,6 +17,9 @@ the specified interface as soon as possible. While this doesn't allow
|
||||
capture of early kernel panics, it does capture most of the boot
|
||||
process.
|
||||
|
||||
Sender and receiver configuration:
|
||||
==================================
|
||||
|
||||
It takes a string configuration parameter "netconsole" in the
|
||||
following format:
|
||||
|
||||
@@ -34,21 +41,113 @@ Examples:
|
||||
|
||||
insmod netconsole netconsole=@/,@10.0.0.2/
|
||||
|
||||
It also supports logging to multiple remote agents by specifying
|
||||
parameters for the multiple agents separated by semicolons and the
|
||||
complete string enclosed in "quotes", thusly:
|
||||
|
||||
modprobe netconsole netconsole="@/,@10.0.0.2/;@/eth1,6892@10.0.0.3/"
|
||||
|
||||
Built-in netconsole starts immediately after the TCP stack is
|
||||
initialized and attempts to bring up the supplied dev at the supplied
|
||||
address.
|
||||
|
||||
The remote host can run either 'netcat -u -l -p <port>' or syslogd.
|
||||
|
||||
Dynamic reconfiguration:
|
||||
========================
|
||||
|
||||
Dynamic reconfigurability is a useful addition to netconsole that enables
|
||||
remote logging targets to be dynamically added, removed, or have their
|
||||
parameters reconfigured at runtime from a configfs-based userspace interface.
|
||||
[ Note that the parameters of netconsole targets that were specified/created
|
||||
from the boot/module option are not exposed via this interface, and hence
|
||||
cannot be modified dynamically. ]
|
||||
|
||||
To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the
|
||||
netconsole module (or kernel, if netconsole is built-in).
|
||||
|
||||
Some examples follow (where configfs is mounted at the /sys/kernel/config
|
||||
mountpoint).
|
||||
|
||||
To add a remote logging target (target names can be arbitrary):
|
||||
|
||||
cd /sys/kernel/config/netconsole/
|
||||
mkdir target1
|
||||
|
||||
Note that newly created targets have default parameter values (as mentioned
|
||||
above) and are disabled by default -- they must first be enabled by writing
|
||||
"1" to the "enabled" attribute (usually after setting parameters accordingly)
|
||||
as described below.
|
||||
|
||||
To remove a target:
|
||||
|
||||
rmdir /sys/kernel/config/netconsole/othertarget/
|
||||
|
||||
The interface exposes these parameters of a netconsole target to userspace:
|
||||
|
||||
enabled Is this target currently enabled? (read-write)
|
||||
dev_name Local network interface name (read-write)
|
||||
local_port Source UDP port to use (read-write)
|
||||
remote_port Remote agent's UDP port (read-write)
|
||||
local_ip Source IP address to use (read-write)
|
||||
remote_ip Remote agent's IP address (read-write)
|
||||
local_mac Local interface's MAC address (read-only)
|
||||
remote_mac Remote agent's MAC address (read-write)
|
||||
|
||||
The "enabled" attribute is also used to control whether the parameters of
|
||||
a target can be updated or not -- you can modify the parameters of only
|
||||
disabled targets (i.e. if "enabled" is 0).
|
||||
|
||||
To update a target's parameters:
|
||||
|
||||
cat enabled # check if enabled is 1
|
||||
echo 0 > enabled # disable the target (if required)
|
||||
echo eth2 > dev_name # set local interface
|
||||
echo 10.0.0.4 > remote_ip # update some parameter
|
||||
echo cb:a9:87:65:43:21 > remote_mac # update more parameters
|
||||
echo 1 > enabled # enable target again
|
||||
|
||||
You can also update the local interface dynamically. This is especially
|
||||
useful if you want to use interfaces that have newly come up (and may not
|
||||
have existed when netconsole was loaded / initialized).
|
||||
|
||||
Miscellaneous notes:
|
||||
====================
|
||||
|
||||
WARNING: the default target ethernet setting uses the broadcast
|
||||
ethernet address to send packets, which can cause increased load on
|
||||
other systems on the same ethernet segment.
|
||||
|
||||
TIP: some LAN switches may be configured to suppress ethernet broadcasts
|
||||
so it is advised to explicitly specify the remote agents' MAC addresses
|
||||
from the config parameters passed to netconsole.
|
||||
|
||||
TIP: to find out the MAC address of, say, 10.0.0.2, you may try using:
|
||||
|
||||
ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2
|
||||
|
||||
TIP: in case the remote logging agent is on a separate LAN subnet than
|
||||
the sender, it is suggested to try specifying the MAC address of the
|
||||
default gateway (you may use /sbin/route -n to find it out) as the
|
||||
remote MAC address instead.
|
||||
|
||||
NOTE: the network device (eth1 in the above case) can run any kind
|
||||
of other network traffic, netconsole is not intrusive. Netconsole
|
||||
might cause slight delays in other traffic if the volume of kernel
|
||||
messages is high, but should have no other impact.
|
||||
|
||||
NOTE: if you find that the remote logging agent is not receiving or
|
||||
printing all messages from the sender, it is likely that you have set
|
||||
the "console_loglevel" parameter (on the sender) to only send high
|
||||
priority messages to the console. You can change this at runtime using:
|
||||
|
||||
dmesg -n 8
|
||||
|
||||
or by specifying "debug" on the kernel command line at boot, to send
|
||||
all kernel messages to the console. A specific value for this parameter
|
||||
can also be set using the "loglevel" kernel boot option. See the
|
||||
dmesg(8) man page and Documentation/kernel-parameters.txt for details.
|
||||
|
||||
Netconsole was designed to be as instantaneous as possible, to
|
||||
enable the logging of even the most critical kernel bugs. It works
|
||||
from IRQ contexts as well, and does not enable interrupts while
|
||||
|
@@ -73,7 +73,8 @@ dev->hard_start_xmit:
|
||||
has to lock by itself when needed. It is recommended to use a try lock
|
||||
for this and return NETDEV_TX_LOCKED when the spin lock fails.
|
||||
The locking there should also properly protect against
|
||||
set_multicast_list.
|
||||
set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated.
|
||||
Dont use it for new drivers.
|
||||
|
||||
Context: Process with BHs disabled or BH (timer),
|
||||
will be called with interrupts disabled by netconsole.
|
||||
@@ -95,9 +96,13 @@ dev->set_multicast_list:
|
||||
Synchronization: netif_tx_lock spinlock.
|
||||
Context: BHs disabled
|
||||
|
||||
dev->poll:
|
||||
Synchronization: __LINK_STATE_RX_SCHED bit in dev->state. See
|
||||
dev_close code and comments in net/core/dev.c for more info.
|
||||
struct napi_struct synchronization rules
|
||||
========================================
|
||||
napi->poll:
|
||||
Synchronization: NAPI_STATE_SCHED bit in napi->state. Device
|
||||
driver's dev->close method will invoke napi_disable() on
|
||||
all NAPI instances which will do a sleeping poll on the
|
||||
NAPI_STATE_SCHED napi->state bit, waiting for all pending
|
||||
NAPI activity to cease.
|
||||
Context: softirq
|
||||
will be called with interrupts disabled by netconsole.
|
||||
|
||||
|
@@ -50,7 +50,7 @@ Table of Contents
|
||||
g) Freescale SOC SEC Security Engines
|
||||
h) Board Control and Status (BCSR)
|
||||
i) Freescale QUICC Engine module (QE)
|
||||
j) Flash chip nodes
|
||||
j) CFI or JEDEC memory-mapped NOR flash
|
||||
k) Global Utilities Block
|
||||
|
||||
VII - Specifying interrupt information for devices
|
||||
@@ -1510,7 +1510,10 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
|
||||
i) Freescale QUICC Engine module (QE)
|
||||
This represents qe module that is installed on PowerQUICC II Pro.
|
||||
Hopefully it will merge backward compatibility with CPM/CPM2.
|
||||
|
||||
NOTE: This is an interim binding; it should be updated to fit
|
||||
in with the CPM binding later in this document.
|
||||
|
||||
Basically, it is a bus of devices, that could act more or less
|
||||
as a complete entity (UCC, USB etc ). All of them should be siblings on
|
||||
the "root" qe node, using the common properties from there.
|
||||
@@ -1548,7 +1551,7 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
Required properties:
|
||||
- device_type : should be "spi".
|
||||
- compatible : should be "fsl_spi".
|
||||
- mode : the SPI operation mode, it can be "cpu" or "qe".
|
||||
- mode : the SPI operation mode, it can be "cpu" or "cpu-qe".
|
||||
- reg : Offset and length of the register set for the device
|
||||
- interrupts : <a b> where a is the interrupt number and b is a
|
||||
field that represents an encoding of the sense and level
|
||||
@@ -1757,45 +1760,69 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
};
|
||||
};
|
||||
|
||||
j) Flash chip nodes
|
||||
j) CFI or JEDEC memory-mapped NOR flash
|
||||
|
||||
Flash chips (Memory Technology Devices) are often used for solid state
|
||||
file systems on embedded devices.
|
||||
|
||||
Required properties:
|
||||
- compatible : should contain the specific model of flash chip(s)
|
||||
used, if known, followed by either "cfi-flash" or "jedec-flash"
|
||||
- reg : Address range of the flash chip
|
||||
- bank-width : Width (in bytes) of the flash bank. Equal to the
|
||||
device width times the number of interleaved chips.
|
||||
- device-width : (optional) Width of a single flash chip. If
|
||||
omitted, assumed to be equal to 'bank-width'.
|
||||
- #address-cells, #size-cells : Must be present if the flash has
|
||||
sub-nodes representing partitions (see below). In this case
|
||||
both #address-cells and #size-cells must be equal to 1.
|
||||
|
||||
- device_type : has to be "rom"
|
||||
- compatible : Should specify what this flash device is compatible with.
|
||||
Currently, this is most likely to be "direct-mapped" (which
|
||||
corresponds to the MTD physmap mapping driver).
|
||||
- reg : Offset and length of the register set (or memory mapping) for
|
||||
the device.
|
||||
- bank-width : Width of the flash data bus in bytes. Required
|
||||
for the NOR flashes (compatible == "direct-mapped" and others) ONLY.
|
||||
For JEDEC compatible devices, the following additional properties
|
||||
are defined:
|
||||
|
||||
Recommended properties :
|
||||
- vendor-id : Contains the flash chip's vendor id (1 byte).
|
||||
- device-id : Contains the flash chip's device id (1 byte).
|
||||
|
||||
- partitions : Several pairs of 32-bit values where the first value is
|
||||
partition's offset from the start of the device and the second one is
|
||||
partition size in bytes with LSB used to signify a read only
|
||||
partition (so, the partition size should always be an even number).
|
||||
- partition-names : The list of concatenated zero terminated strings
|
||||
representing the partition names.
|
||||
- probe-type : The type of probe which should be done for the chip
|
||||
(JEDEC vs CFI actually). Valid ONLY for NOR flashes.
|
||||
In addition to the information on the flash bank itself, the
|
||||
device tree may optionally contain additional information
|
||||
describing partitions of the flash address space. This can be
|
||||
used on platforms which have strong conventions about which
|
||||
portions of the flash are used for what purposes, but which don't
|
||||
use an on-flash partition table such as RedBoot.
|
||||
|
||||
Example:
|
||||
Each partition is represented as a sub-node of the flash device.
|
||||
Each node's name represents the name of the corresponding
|
||||
partition of the flash device.
|
||||
|
||||
flash@ff000000 {
|
||||
device_type = "rom";
|
||||
compatible = "direct-mapped";
|
||||
probe-type = "CFI";
|
||||
reg = <ff000000 01000000>;
|
||||
bank-width = <4>;
|
||||
partitions = <00000000 00f80000
|
||||
00f80000 00080001>;
|
||||
partition-names = "fs\0firmware";
|
||||
};
|
||||
Flash partitions
|
||||
- reg : The partition's offset and size within the flash bank.
|
||||
- label : (optional) The label / name for this flash partition.
|
||||
If omitted, the label is taken from the node name (excluding
|
||||
the unit address).
|
||||
- read-only : (optional) This parameter, if present, is a hint to
|
||||
Linux that this flash partition should only be mounted
|
||||
read-only. This is usually used for flash partitions
|
||||
containing early-boot firmware images or data which should not
|
||||
be clobbered.
|
||||
|
||||
Example:
|
||||
|
||||
flash@ff000000 {
|
||||
compatible = "amd,am29lv128ml", "cfi-flash";
|
||||
reg = <ff000000 01000000>;
|
||||
bank-width = <4>;
|
||||
device-width = <1>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
fs@0 {
|
||||
label = "fs";
|
||||
reg = <0 f80000>;
|
||||
};
|
||||
firmware@f80000 {
|
||||
label ="firmware";
|
||||
reg = <f80000 80000>;
|
||||
read-only;
|
||||
};
|
||||
};
|
||||
|
||||
k) Global Utilities Block
|
||||
|
||||
@@ -1824,6 +1851,397 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
fsl,has-rstcr;
|
||||
};
|
||||
|
||||
l) Freescale Communications Processor Module
|
||||
|
||||
NOTE: This is an interim binding, and will likely change slightly,
|
||||
as more devices are supported. The QE bindings especially are
|
||||
incomplete.
|
||||
|
||||
i) Root CPM node
|
||||
|
||||
Properties:
|
||||
- compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe".
|
||||
- reg : A 48-byte region beginning with CPCR.
|
||||
|
||||
Example:
|
||||
cpm@119c0 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
#interrupt-cells = <2>;
|
||||
compatible = "fsl,mpc8272-cpm", "fsl,cpm2";
|
||||
reg = <119c0 30>;
|
||||
}
|
||||
|
||||
ii) Properties common to mulitple CPM/QE devices
|
||||
|
||||
- fsl,cpm-command : This value is ORed with the opcode and command flag
|
||||
to specify the device on which a CPM command operates.
|
||||
|
||||
- fsl,cpm-brg : Indicates which baud rate generator the device
|
||||
is associated with. If absent, an unused BRG
|
||||
should be dynamically allocated. If zero, the
|
||||
device uses an external clock rather than a BRG.
|
||||
|
||||
- reg : Unless otherwise specified, the first resource represents the
|
||||
scc/fcc/ucc registers, and the second represents the device's
|
||||
parameter RAM region (if it has one).
|
||||
|
||||
iii) Serial
|
||||
|
||||
Currently defined compatibles:
|
||||
- fsl,cpm1-smc-uart
|
||||
- fsl,cpm2-smc-uart
|
||||
- fsl,cpm1-scc-uart
|
||||
- fsl,cpm2-scc-uart
|
||||
- fsl,qe-uart
|
||||
|
||||
Example:
|
||||
|
||||
serial@11a00 {
|
||||
device_type = "serial";
|
||||
compatible = "fsl,mpc8272-scc-uart",
|
||||
"fsl,cpm2-scc-uart";
|
||||
reg = <11a00 20 8000 100>;
|
||||
interrupts = <28 8>;
|
||||
interrupt-parent = <&PIC>;
|
||||
fsl,cpm-brg = <1>;
|
||||
fsl,cpm-command = <00800000>;
|
||||
};
|
||||
|
||||
iii) Network
|
||||
|
||||
Currently defined compatibles:
|
||||
- fsl,cpm1-scc-enet
|
||||
- fsl,cpm2-scc-enet
|
||||
- fsl,cpm1-fec-enet
|
||||
- fsl,cpm2-fcc-enet (third resource is GFEMR)
|
||||
- fsl,qe-enet
|
||||
|
||||
Example:
|
||||
|
||||
ethernet@11300 {
|
||||
device_type = "network";
|
||||
compatible = "fsl,mpc8272-fcc-enet",
|
||||
"fsl,cpm2-fcc-enet";
|
||||
reg = <11300 20 8400 100 11390 1>;
|
||||
local-mac-address = [ 00 00 00 00 00 00 ];
|
||||
interrupts = <20 8>;
|
||||
interrupt-parent = <&PIC>;
|
||||
phy-handle = <&PHY0>;
|
||||
linux,network-index = <0>;
|
||||
fsl,cpm-command = <12000300>;
|
||||
};
|
||||
|
||||
iv) MDIO
|
||||
|
||||
Currently defined compatibles:
|
||||
fsl,pq1-fec-mdio (reg is same as first resource of FEC device)
|
||||
fsl,cpm2-mdio-bitbang (reg is port C registers)
|
||||
|
||||
Properties for fsl,cpm2-mdio-bitbang:
|
||||
fsl,mdio-pin : pin of port C controlling mdio data
|
||||
fsl,mdc-pin : pin of port C controlling mdio clock
|
||||
|
||||
Example:
|
||||
|
||||
mdio@10d40 {
|
||||
device_type = "mdio";
|
||||
compatible = "fsl,mpc8272ads-mdio-bitbang",
|
||||
"fsl,mpc8272-mdio-bitbang",
|
||||
"fsl,cpm2-mdio-bitbang";
|
||||
reg = <10d40 14>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
fsl,mdio-pin = <12>;
|
||||
fsl,mdc-pin = <13>;
|
||||
};
|
||||
|
||||
v) Baud Rate Generators
|
||||
|
||||
Currently defined compatibles:
|
||||
fsl,cpm-brg
|
||||
fsl,cpm1-brg
|
||||
fsl,cpm2-brg
|
||||
|
||||
Properties:
|
||||
- reg : There may be an arbitrary number of reg resources; BRG
|
||||
numbers are assigned to these in order.
|
||||
- clock-frequency : Specifies the base frequency driving
|
||||
the BRG.
|
||||
|
||||
Example:
|
||||
|
||||
brg@119f0 {
|
||||
compatible = "fsl,mpc8272-brg",
|
||||
"fsl,cpm2-brg",
|
||||
"fsl,cpm-brg";
|
||||
reg = <119f0 10 115f0 10>;
|
||||
clock-frequency = <d#25000000>;
|
||||
};
|
||||
|
||||
vi) Interrupt Controllers
|
||||
|
||||
Currently defined compatibles:
|
||||
- fsl,cpm1-pic
|
||||
- only one interrupt cell
|
||||
- fsl,pq1-pic
|
||||
- fsl,cpm2-pic
|
||||
- second interrupt cell is level/sense:
|
||||
- 2 is falling edge
|
||||
- 8 is active low
|
||||
|
||||
Example:
|
||||
|
||||
interrupt-controller@10c00 {
|
||||
#interrupt-cells = <2>;
|
||||
interrupt-controller;
|
||||
reg = <10c00 80>;
|
||||
compatible = "mpc8272-pic", "fsl,cpm2-pic";
|
||||
};
|
||||
|
||||
vii) USB (Universal Serial Bus Controller)
|
||||
|
||||
Properties:
|
||||
- compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb"
|
||||
|
||||
Example:
|
||||
usb@11bc0 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
compatible = "fsl,cpm2-usb";
|
||||
reg = <11b60 18 8b00 100>;
|
||||
interrupts = <b 8>;
|
||||
interrupt-parent = <&PIC>;
|
||||
fsl,cpm-command = <2e600000>;
|
||||
};
|
||||
|
||||
viii) Multi-User RAM (MURAM)
|
||||
|
||||
The multi-user/dual-ported RAM is expressed as a bus under the CPM node.
|
||||
|
||||
Ranges must be set up subject to the following restrictions:
|
||||
|
||||
- Children's reg nodes must be offsets from the start of all muram, even
|
||||
if the user-data area does not begin at zero.
|
||||
- If multiple range entries are used, the difference between the parent
|
||||
address and the child address must be the same in all, so that a single
|
||||
mapping can cover them all while maintaining the ability to determine
|
||||
CPM-side offsets with pointer subtraction. It is recommended that
|
||||
multiple range entries not be used.
|
||||
- A child address of zero must be translatable, even if no reg resources
|
||||
contain it.
|
||||
|
||||
A child "data" node must exist, compatible with "fsl,cpm-muram-data", to
|
||||
indicate the portion of muram that is usable by the OS for arbitrary
|
||||
purposes. The data node may have an arbitrary number of reg resources,
|
||||
all of which contribute to the allocatable muram pool.
|
||||
|
||||
Example, based on mpc8272:
|
||||
|
||||
muram@0 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0 0 10000>;
|
||||
|
||||
data@0 {
|
||||
compatible = "fsl,cpm-muram-data";
|
||||
reg = <0 2000 9800 800>;
|
||||
};
|
||||
};
|
||||
|
||||
m) Chipselect/Local Bus
|
||||
|
||||
Properties:
|
||||
- name : Should be localbus
|
||||
- #address-cells : Should be either two or three. The first cell is the
|
||||
chipselect number, and the remaining cells are the
|
||||
offset into the chipselect.
|
||||
- #size-cells : Either one or two, depending on how large each chipselect
|
||||
can be.
|
||||
- ranges : Each range corresponds to a single chipselect, and cover
|
||||
the entire access window as configured.
|
||||
|
||||
Example:
|
||||
localbus@f0010100 {
|
||||
compatible = "fsl,mpc8272ads-localbus",
|
||||
"fsl,mpc8272-localbus",
|
||||
"fsl,pq2-localbus";
|
||||
#address-cells = <2>;
|
||||
#size-cells = <1>;
|
||||
reg = <f0010100 40>;
|
||||
|
||||
ranges = <0 0 fe000000 02000000
|
||||
1 0 f4500000 00008000>;
|
||||
|
||||
flash@0,0 {
|
||||
compatible = "jedec-flash";
|
||||
reg = <0 0 2000000>;
|
||||
bank-width = <4>;
|
||||
device-width = <1>;
|
||||
};
|
||||
|
||||
board-control@1,0 {
|
||||
reg = <1 0 20>;
|
||||
compatible = "fsl,mpc8272ads-bcsr";
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
n) 4xx/Axon EMAC ethernet nodes
|
||||
|
||||
The EMAC ethernet controller in IBM and AMCC 4xx chips, and also
|
||||
the Axon bridge. To operate this needs to interact with a ths
|
||||
special McMAL DMA controller, and sometimes an RGMII or ZMII
|
||||
interface. In addition to the nodes and properties described
|
||||
below, the node for the OPB bus on which the EMAC sits must have a
|
||||
correct clock-frequency property.
|
||||
|
||||
i) The EMAC node itself
|
||||
|
||||
Required properties:
|
||||
- device_type : "network"
|
||||
|
||||
- compatible : compatible list, contains 2 entries, first is
|
||||
"ibm,emac-CHIP" where CHIP is the host ASIC (440gx,
|
||||
405gp, Axon) and second is either "ibm,emac" or
|
||||
"ibm,emac4". For Axon, thus, we have: "ibm,emac-axon",
|
||||
"ibm,emac4"
|
||||
- interrupts : <interrupt mapping for EMAC IRQ and WOL IRQ>
|
||||
- interrupt-parent : optional, if needed for interrupt mapping
|
||||
- reg : <registers mapping>
|
||||
- local-mac-address : 6 bytes, MAC address
|
||||
- mal-device : phandle of the associated McMAL node
|
||||
- mal-tx-channel : 1 cell, index of the tx channel on McMAL associated
|
||||
with this EMAC
|
||||
- mal-rx-channel : 1 cell, index of the rx channel on McMAL associated
|
||||
with this EMAC
|
||||
- cell-index : 1 cell, hardware index of the EMAC cell on a given
|
||||
ASIC (typically 0x0 and 0x1 for EMAC0 and EMAC1 on
|
||||
each Axon chip)
|
||||
- max-frame-size : 1 cell, maximum frame size supported in bytes
|
||||
- rx-fifo-size : 1 cell, Rx fifo size in bytes for 10 and 100 Mb/sec
|
||||
operations.
|
||||
For Axon, 2048
|
||||
- tx-fifo-size : 1 cell, Tx fifo size in bytes for 10 and 100 Mb/sec
|
||||
operations.
|
||||
For Axon, 2048.
|
||||
- fifo-entry-size : 1 cell, size of a fifo entry (used to calculate
|
||||
thresholds).
|
||||
For Axon, 0x00000010
|
||||
- mal-burst-size : 1 cell, MAL burst size (used to calculate thresholds)
|
||||
in bytes.
|
||||
For Axon, 0x00000100 (I think ...)
|
||||
- phy-mode : string, mode of operations of the PHY interface.
|
||||
Supported values are: "mii", "rmii", "smii", "rgmii",
|
||||
"tbi", "gmii", rtbi", "sgmii".
|
||||
For Axon on CAB, it is "rgmii"
|
||||
- mdio-device : 1 cell, required iff using shared MDIO registers
|
||||
(440EP). phandle of the EMAC to use to drive the
|
||||
MDIO lines for the PHY used by this EMAC.
|
||||
- zmii-device : 1 cell, required iff connected to a ZMII. phandle of
|
||||
the ZMII device node
|
||||
- zmii-channel : 1 cell, required iff connected to a ZMII. Which ZMII
|
||||
channel or 0xffffffff if ZMII is only used for MDIO.
|
||||
- rgmii-device : 1 cell, required iff connected to an RGMII. phandle
|
||||
of the RGMII device node.
|
||||
For Axon: phandle of plb5/plb4/opb/rgmii
|
||||
- rgmii-channel : 1 cell, required iff connected to an RGMII. Which
|
||||
RGMII channel is used by this EMAC.
|
||||
Fox Axon: present, whatever value is appropriate for each
|
||||
EMAC, that is the content of the current (bogus) "phy-port"
|
||||
property.
|
||||
|
||||
Recommended properties:
|
||||
- linux,network-index : This is the intended "index" of this
|
||||
network device. This is used by the bootwrapper to interpret
|
||||
MAC addresses passed by the firmware when no information other
|
||||
than indices is available to associate an address with a device.
|
||||
|
||||
Optional properties:
|
||||
- phy-address : 1 cell, optional, MDIO address of the PHY. If absent,
|
||||
a search is performed.
|
||||
- phy-map : 1 cell, optional, bitmap of addresses to probe the PHY
|
||||
for, used if phy-address is absent. bit 0x00000001 is
|
||||
MDIO address 0.
|
||||
For Axon it can be absent, thouugh my current driver
|
||||
doesn't handle phy-address yet so for now, keep
|
||||
0x00ffffff in it.
|
||||
- rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
|
||||
operations (if absent the value is the same as
|
||||
rx-fifo-size). For Axon, either absent or 2048.
|
||||
- tx-fifo-size-gige : 1 cell, Tx fifo size in bytes for 1000 Mb/sec
|
||||
operations (if absent the value is the same as
|
||||
tx-fifo-size). For Axon, either absent or 2048.
|
||||
- tah-device : 1 cell, optional. If connected to a TAH engine for
|
||||
offload, phandle of the TAH device node.
|
||||
- tah-channel : 1 cell, optional. If appropriate, channel used on the
|
||||
TAH engine.
|
||||
|
||||
Example:
|
||||
|
||||
EMAC0: ethernet@40000800 {
|
||||
linux,network-index = <0>;
|
||||
device_type = "network";
|
||||
compatible = "ibm,emac-440gp", "ibm,emac";
|
||||
interrupt-parent = <&UIC1>;
|
||||
interrupts = <1c 4 1d 4>;
|
||||
reg = <40000800 70>;
|
||||
local-mac-address = [00 04 AC E3 1B 1E];
|
||||
mal-device = <&MAL0>;
|
||||
mal-tx-channel = <0 1>;
|
||||
mal-rx-channel = <0>;
|
||||
cell-index = <0>;
|
||||
max-frame-size = <5dc>;
|
||||
rx-fifo-size = <1000>;
|
||||
tx-fifo-size = <800>;
|
||||
phy-mode = "rmii";
|
||||
phy-map = <00000001>;
|
||||
zmii-device = <&ZMII0>;
|
||||
zmii-channel = <0>;
|
||||
};
|
||||
|
||||
ii) McMAL node
|
||||
|
||||
Required properties:
|
||||
- device_type : "dma-controller"
|
||||
- compatible : compatible list, containing 2 entries, first is
|
||||
"ibm,mcmal-CHIP" where CHIP is the host ASIC (like
|
||||
emac) and the second is either "ibm,mcmal" or
|
||||
"ibm,mcmal2".
|
||||
For Axon, "ibm,mcmal-axon","ibm,mcmal2"
|
||||
- interrupts : <interrupt mapping for the MAL interrupts sources:
|
||||
5 sources: tx_eob, rx_eob, serr, txde, rxde>.
|
||||
For Axon: This is _different_ from the current
|
||||
firmware. We use the "delayed" interrupts for txeob
|
||||
and rxeob. Thus we end up with mapping those 5 MPIC
|
||||
interrupts, all level positive sensitive: 10, 11, 32,
|
||||
33, 34 (in decimal)
|
||||
- dcr-reg : < DCR registers range >
|
||||
- dcr-parent : if needed for dcr-reg
|
||||
- num-tx-chans : 1 cell, number of Tx channels
|
||||
- num-rx-chans : 1 cell, number of Rx channels
|
||||
|
||||
iii) ZMII node
|
||||
|
||||
Required properties:
|
||||
- compatible : compatible list, containing 2 entries, first is
|
||||
"ibm,zmii-CHIP" where CHIP is the host ASIC (like
|
||||
EMAC) and the second is "ibm,zmii".
|
||||
For Axon, there is no ZMII node.
|
||||
- reg : <registers mapping>
|
||||
|
||||
iv) RGMII node
|
||||
|
||||
Required properties:
|
||||
- compatible : compatible list, containing 2 entries, first is
|
||||
"ibm,rgmii-CHIP" where CHIP is the host ASIC (like
|
||||
EMAC) and the second is "ibm,rgmii".
|
||||
For Axon, "ibm,rgmii-axon","ibm,rgmii"
|
||||
- reg : <registers mapping>
|
||||
- revision : as provided by the RGMII new version register if
|
||||
available.
|
||||
For Axon: 0x0000012a
|
||||
|
||||
More devices will be defined as this spec matures.
|
||||
|
||||
VII - Specifying interrupt information for devices
|
||||
|
89
Documentation/rfkill.txt
Normal file
89
Documentation/rfkill.txt
Normal file
@@ -0,0 +1,89 @@
|
||||
rfkill - RF switch subsystem support
|
||||
====================================
|
||||
|
||||
1 Implementation details
|
||||
2 Driver support
|
||||
3 Userspace support
|
||||
|
||||
===============================================================================
|
||||
1: Implementation details
|
||||
|
||||
The rfkill switch subsystem offers support for keys often found on laptops
|
||||
to enable wireless devices like WiFi and Bluetooth.
|
||||
|
||||
This is done by providing the user 3 possibilities:
|
||||
1 - The rfkill system handles all events; userspace is not aware of events.
|
||||
2 - The rfkill system handles all events; userspace is informed about the events.
|
||||
3 - The rfkill system does not handle events; userspace handles all events.
|
||||
|
||||
The buttons to enable and disable the wireless radios are important in
|
||||
situations where the user is for example using his laptop on a location where
|
||||
wireless radios _must_ be disabled (e.g. airplanes).
|
||||
Because of this requirement, userspace support for the keys should not be
|
||||
made mandatory. Because userspace might want to perform some additional smarter
|
||||
tasks when the key is pressed, rfkill still provides userspace the possibility
|
||||
to take over the task to handle the key events.
|
||||
|
||||
The system inside the kernel has been split into 2 separate sections:
|
||||
1 - RFKILL
|
||||
2 - RFKILL_INPUT
|
||||
|
||||
The first option enables rfkill support and will make sure userspace will
|
||||
be notified of any events through the input device. It also creates several
|
||||
sysfs entries which can be used by userspace. See section "Userspace support".
|
||||
|
||||
The second option provides an rfkill input handler. This handler will
|
||||
listen to all rfkill key events and will toggle the radio accordingly.
|
||||
With this option enabled userspace could either do nothing or simply
|
||||
perform monitoring tasks.
|
||||
|
||||
====================================
|
||||
2: Driver support
|
||||
|
||||
To build a driver with rfkill subsystem support, the driver should
|
||||
depend on the Kconfig symbol RFKILL; it should _not_ depend on
|
||||
RKFILL_INPUT.
|
||||
|
||||
Unless key events trigger an interrupt to which the driver listens, polling
|
||||
will be required to determine the key state changes. For this the input
|
||||
layer providers the input-polldev handler.
|
||||
|
||||
A driver should implement a few steps to correctly make use of the
|
||||
rfkill subsystem. First for non-polling drivers:
|
||||
|
||||
- rfkill_allocate()
|
||||
- input_allocate_device()
|
||||
- rfkill_register()
|
||||
- input_register_device()
|
||||
|
||||
For polling drivers:
|
||||
|
||||
- rfkill_allocate()
|
||||
- input_allocate_polled_device()
|
||||
- rfkill_register()
|
||||
- input_register_polled_device()
|
||||
|
||||
When a key event has been detected, the correct event should be
|
||||
sent over the input device which has been registered by the driver.
|
||||
|
||||
====================================
|
||||
3: Userspace support
|
||||
|
||||
For each key an input device will be created which will send out the correct
|
||||
key event when the rfkill key has been pressed.
|
||||
|
||||
The following sysfs entries will be created:
|
||||
|
||||
name: Name assigned by driver to this key (interface or driver name).
|
||||
type: Name of the key type ("wlan", "bluetooth", etc).
|
||||
state: Current state of the key. 1: On, 0: Off.
|
||||
claim: 1: Userspace handles events, 0: Kernel handles events
|
||||
|
||||
Both the "state" and "claim" entries are also writable. For the "state" entry
|
||||
this means that when 1 or 0 is written all radios, not yet in the requested
|
||||
state, will be will be toggled accordingly.
|
||||
For the "claim" entry writing 1 to it means that the kernel no longer handles
|
||||
key events even though RFKILL_INPUT input was enabled. When "claim" has been
|
||||
set to 0, userspace should make sure that it listens for the input events or
|
||||
check the sysfs "state" entry regularly to correctly perform the required
|
||||
tasks when the rkfill key is pressed.
|
26
Documentation/s390/00-INDEX
Normal file
26
Documentation/s390/00-INDEX
Normal file
@@ -0,0 +1,26 @@
|
||||
00-INDEX
|
||||
- this file.
|
||||
3270.ChangeLog
|
||||
- ChangeLog for the UTS Global 3270-support patch (outdated).
|
||||
3270.txt
|
||||
- how to use the IBM 3270 display system support.
|
||||
cds.txt
|
||||
- s390 common device support (common I/O layer).
|
||||
CommonIO
|
||||
- common I/O layer command line parameters, procfs and debugfs entries
|
||||
config3270.sh
|
||||
- example configuration for 3270 devices.
|
||||
DASD
|
||||
- information on the DASD disk device driver.
|
||||
Debugging390.txt
|
||||
- hints for debugging on s390 systems.
|
||||
driver-model.txt
|
||||
- information on s390 devices and the driver model.
|
||||
monreader.txt
|
||||
- information on accessing the z/VM monitor stream from Linux.
|
||||
s390dbf.txt
|
||||
- information on using the s390 debug feature.
|
||||
TAPE
|
||||
- information on the driver for channel-attached tapes.
|
||||
zfcpdump
|
||||
- information on the s390 SCSI dump tool.
|
@@ -1,5 +1,5 @@
|
||||
S/390 common I/O-Layer - command line parameters and /proc entries
|
||||
==================================================================
|
||||
S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
|
||||
============================================================================
|
||||
|
||||
Command line parameters
|
||||
-----------------------
|
||||
@@ -7,9 +7,9 @@ Command line parameters
|
||||
* cio_msg = yes | no
|
||||
|
||||
Determines whether information on found devices and sensed device
|
||||
characteristics should be shown during startup, i. e. messages of the types
|
||||
"Detected device 0.0.4711 on subchannel 0.0.0042" and "SenseID: Device
|
||||
0.0.4711 reports: ...".
|
||||
characteristics should be shown during startup or when new devices are
|
||||
found, i. e. messages of the types "Detected device 0.0.4711 on subchannel
|
||||
0.0.0042" and "SenseID: Device 0.0.4711 reports: ...".
|
||||
|
||||
Default is off.
|
||||
|
||||
@@ -26,8 +26,10 @@ Command line parameters
|
||||
An ignored device can be un-ignored later; see the "/proc entries"-section for
|
||||
details.
|
||||
|
||||
The devices must be given either as bus ids (0.0.abcd) or as hexadecimal
|
||||
device numbers (0xabcd or abcd, for 2.4 backward compatibility).
|
||||
The devices must be given either as bus ids (0.x.abcd) or as hexadecimal
|
||||
device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
|
||||
give a device number 0xabcd, it will be interpreted as 0.0.abcd.
|
||||
|
||||
You can use the 'all' keyword to ignore all devices.
|
||||
The '!' operator will cause the I/O-layer to _not_ ignore a device.
|
||||
The command line is parsed from left to right.
|
||||
@@ -81,31 +83,36 @@ Command line parameters
|
||||
will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
|
||||
devices.
|
||||
|
||||
The devices can be specified either by bus id (0.0.abcd) or, for 2.4 backward
|
||||
compatibility, by the device number in hexadecimal (0xabcd or abcd).
|
||||
The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
|
||||
compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
|
||||
numbers given as 0xabcd will be interpreted as 0.0.abcd.
|
||||
|
||||
* For some of the information present in the /proc filesystem in 2.4 (namely,
|
||||
/proc/subchannels and /proc/chpids), see driver-model.txt.
|
||||
Information formerly in /proc/irq_count is now in /proc/interrupts.
|
||||
|
||||
|
||||
* /proc/s390dbf/cio_*/ (S/390 debug feature)
|
||||
debugfs entries
|
||||
---------------
|
||||
|
||||
* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature)
|
||||
|
||||
Some views generated by the debug feature to hold various debug outputs.
|
||||
|
||||
- /proc/s390dbf/cio_crw/sprintf
|
||||
- /sys/kernel/debug/s390dbf/cio_crw/sprintf
|
||||
Messages from the processing of pending channel report words (machine check
|
||||
handling), which will also show when CONFIG_DEBUG_CRW is defined.
|
||||
handling).
|
||||
|
||||
- /proc/s390dbf/cio_msg/sprintf
|
||||
Various debug messages from the common I/O-layer; generally, messages which
|
||||
will also show when CONFIG_DEBUG_IO is defined.
|
||||
- /sys/kernel/debug/s390dbf/cio_msg/sprintf
|
||||
Various debug messages from the common I/O-layer, including messages
|
||||
printed when cio_msg=yes.
|
||||
|
||||
- /proc/s390dbf/cio_trace/hex_ascii
|
||||
- /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
|
||||
Logs the calling of functions in the common I/O-layer and, if applicable,
|
||||
which subchannel they were called for, as well as dumps of some data
|
||||
structures (like irb in an error case).
|
||||
|
||||
The level of logging can be changed to be more or less verbose by piping to
|
||||
/proc/s390dbf/cio_*/level a number between 0 and 6; see the documentation on
|
||||
the S/390 debug feature (Documentation/s390/s390dbf.txt) for details.
|
||||
|
||||
* For some of the information present in the /proc filesystem in 2.4 (namely,
|
||||
/proc/subchannels and /proc/chpids), see driver-model.txt.
|
||||
Information formerly in /proc/irq_count is now in /proc/interrupts.
|
||||
/sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
|
||||
documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
|
||||
for details.
|
||||
|
@@ -286,10 +286,10 @@ first:
|
||||
timeout value
|
||||
-EIO: the common I/O layer terminated the request due to an error state
|
||||
|
||||
If the concurrent sense flag in the extended status word in the irb is set, the
|
||||
field irb->scsw.count describes the number of device specific sense bytes
|
||||
available in the extended control word irb->scsw.ecw[0]. No device sensing by
|
||||
the device driver itself is required.
|
||||
If the concurrent sense flag in the extended status word (esw) in the irb is
|
||||
set, the field erw.scnt in the esw describes the number of device specific
|
||||
sense bytes available in the extended control word irb->scsw.ecw[]. No device
|
||||
sensing by the device driver itself is required.
|
||||
|
||||
The device interrupt handler can use the following definitions to investigate
|
||||
the primary unit check source coded in sense byte 0 :
|
||||
|
92
Documentation/usb/authorization.txt
Normal file
92
Documentation/usb/authorization.txt
Normal file
@@ -0,0 +1,92 @@
|
||||
|
||||
Authorizing (or not) your USB devices to connect to the system
|
||||
|
||||
(C) 2007 Inaky Perez-Gonzalez <inaky@linux.intel.com> Intel Corporation
|
||||
|
||||
This feature allows you to control if a USB device can be used (or
|
||||
not) in a system. This feature will allow you to implement a lock-down
|
||||
of USB devices, fully controlled by user space.
|
||||
|
||||
As of now, when a USB device is connected it is configured and
|
||||
it's interfaces inmediately made available to the users. With this
|
||||
modification, only if root authorizes the device to be configured will
|
||||
then it be possible to use it.
|
||||
|
||||
Usage:
|
||||
|
||||
Authorize a device to connect:
|
||||
|
||||
$ echo 1 > /sys/usb/devices/DEVICE/authorized
|
||||
|
||||
Deauthorize a device:
|
||||
|
||||
$ echo 0 > /sys/usb/devices/DEVICE/authorized
|
||||
|
||||
Set new devices connected to hostX to be deauthorized by default (ie:
|
||||
lock down):
|
||||
|
||||
$ echo 0 > /sys/bus/devices/usbX/authorized_default
|
||||
|
||||
Remove the lock down:
|
||||
|
||||
$ echo 1 > /sys/bus/devices/usbX/authorized_default
|
||||
|
||||
By default, Wired USB devices are authorized by default to
|
||||
connect. Wireless USB hosts deauthorize by default all new connected
|
||||
devices (this is so because we need to do an authentication phase
|
||||
before authorizing).
|
||||
|
||||
|
||||
Example system lockdown (lame)
|
||||
-----------------------
|
||||
|
||||
Imagine you want to implement a lockdown so only devices of type XYZ
|
||||
can be connected (for example, it is a kiosk machine with a visible
|
||||
USB port):
|
||||
|
||||
boot up
|
||||
rc.local ->
|
||||
|
||||
for host in /sys/bus/devices/usb*
|
||||
do
|
||||
echo 0 > $host/authorized_default
|
||||
done
|
||||
|
||||
Hookup an script to udev, for new USB devices
|
||||
|
||||
if device_is_my_type $DEV
|
||||
then
|
||||
echo 1 > $device_path/authorized
|
||||
done
|
||||
|
||||
|
||||
Now, device_is_my_type() is where the juice for a lockdown is. Just
|
||||
checking if the class, type and protocol match something is the worse
|
||||
security verification you can make (or the best, for someone willing
|
||||
to break it). If you need something secure, use crypto and Certificate
|
||||
Authentication or stuff like that. Something simple for an storage key
|
||||
could be:
|
||||
|
||||
function device_is_my_type()
|
||||
{
|
||||
echo 1 > authorized # temporarily authorize it
|
||||
# FIXME: make sure none can mount it
|
||||
mount DEVICENODE /mntpoint
|
||||
sum=$(md5sum /mntpoint/.signature)
|
||||
if [ $sum = $(cat /etc/lockdown/keysum) ]
|
||||
then
|
||||
echo "We are good, connected"
|
||||
umount /mntpoint
|
||||
# Other stuff so others can use it
|
||||
else
|
||||
echo 0 > authorized
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
Of course, this is lame, you'd want to do a real certificate
|
||||
verification stuff with PKI, so you don't depend on a shared secret,
|
||||
etc, but you get the idea. Anybody with access to a device gadget kit
|
||||
can fake descriptors and device info. Don't trust that. You are
|
||||
welcome.
|
||||
|
517
Documentation/usb/power-management.txt
Normal file
517
Documentation/usb/power-management.txt
Normal file
@@ -0,0 +1,517 @@
|
||||
Power Management for USB
|
||||
|
||||
Alan Stern <stern@rowland.harvard.edu>
|
||||
|
||||
October 5, 2007
|
||||
|
||||
|
||||
|
||||
What is Power Management?
|
||||
-------------------------
|
||||
|
||||
Power Management (PM) is the practice of saving energy by suspending
|
||||
parts of a computer system when they aren't being used. While a
|
||||
component is "suspended" it is in a nonfunctional low-power state; it
|
||||
might even be turned off completely. A suspended component can be
|
||||
"resumed" (returned to a functional full-power state) when the kernel
|
||||
needs to use it. (There also are forms of PM in which components are
|
||||
placed in a less functional but still usable state instead of being
|
||||
suspended; an example would be reducing the CPU's clock rate. This
|
||||
document will not discuss those other forms.)
|
||||
|
||||
When the parts being suspended include the CPU and most of the rest of
|
||||
the system, we speak of it as a "system suspend". When a particular
|
||||
device is turned off while the system as a whole remains running, we
|
||||
call it a "dynamic suspend" (also known as a "runtime suspend" or
|
||||
"selective suspend"). This document concentrates mostly on how
|
||||
dynamic PM is implemented in the USB subsystem, although system PM is
|
||||
covered to some extent (see Documentation/power/*.txt for more
|
||||
information about system PM).
|
||||
|
||||
Note: Dynamic PM support for USB is present only if the kernel was
|
||||
built with CONFIG_USB_SUSPEND enabled. System PM support is present
|
||||
only if the kernel was built with CONFIG_SUSPEND or CONFIG_HIBERNATION
|
||||
enabled.
|
||||
|
||||
|
||||
What is Remote Wakeup?
|
||||
----------------------
|
||||
|
||||
When a device has been suspended, it generally doesn't resume until
|
||||
the computer tells it to. Likewise, if the entire computer has been
|
||||
suspended, it generally doesn't resume until the user tells it to, say
|
||||
by pressing a power button or opening the cover.
|
||||
|
||||
However some devices have the capability of resuming by themselves, or
|
||||
asking the kernel to resume them, or even telling the entire computer
|
||||
to resume. This capability goes by several names such as "Wake On
|
||||
LAN"; we will refer to it generically as "remote wakeup". When a
|
||||
device is enabled for remote wakeup and it is suspended, it may resume
|
||||
itself (or send a request to be resumed) in response to some external
|
||||
event. Examples include a suspended keyboard resuming when a key is
|
||||
pressed, or a suspended USB hub resuming when a device is plugged in.
|
||||
|
||||
|
||||
When is a USB device idle?
|
||||
--------------------------
|
||||
|
||||
A device is idle whenever the kernel thinks it's not busy doing
|
||||
anything important and thus is a candidate for being suspended. The
|
||||
exact definition depends on the device's driver; drivers are allowed
|
||||
to declare that a device isn't idle even when there's no actual
|
||||
communication taking place. (For example, a hub isn't considered idle
|
||||
unless all the devices plugged into that hub are already suspended.)
|
||||
In addition, a device isn't considered idle so long as a program keeps
|
||||
its usbfs file open, whether or not any I/O is going on.
|
||||
|
||||
If a USB device has no driver, its usbfs file isn't open, and it isn't
|
||||
being accessed through sysfs, then it definitely is idle.
|
||||
|
||||
|
||||
Forms of dynamic PM
|
||||
-------------------
|
||||
|
||||
Dynamic suspends can occur in two ways: manual and automatic.
|
||||
"Manual" means that the user has told the kernel to suspend a device,
|
||||
whereas "automatic" means that the kernel has decided all by itself to
|
||||
suspend a device. Automatic suspend is called "autosuspend" for
|
||||
short. In general, a device won't be autosuspended unless it has been
|
||||
idle for some minimum period of time, the so-called idle-delay time.
|
||||
|
||||
Of course, nothing the kernel does on its own initiative should
|
||||
prevent the computer or its devices from working properly. If a
|
||||
device has been autosuspended and a program tries to use it, the
|
||||
kernel will automatically resume the device (autoresume). For the
|
||||
same reason, an autosuspended device will usually have remote wakeup
|
||||
enabled, if the device supports remote wakeup.
|
||||
|
||||
It is worth mentioning that many USB drivers don't support
|
||||
autosuspend. In fact, at the time of this writing (Linux 2.6.23) the
|
||||
only drivers which do support it are the hub driver, kaweth, asix,
|
||||
usblp, usblcd, and usb-skeleton (which doesn't count). If a
|
||||
non-supporting driver is bound to a device, the device won't be
|
||||
autosuspended. In effect, the kernel pretends the device is never
|
||||
idle.
|
||||
|
||||
We can categorize power management events in two broad classes:
|
||||
external and internal. External events are those triggered by some
|
||||
agent outside the USB stack: system suspend/resume (triggered by
|
||||
userspace), manual dynamic suspend/resume (also triggered by
|
||||
userspace), and remote wakeup (triggered by the device). Internal
|
||||
events are those triggered within the USB stack: autosuspend and
|
||||
autoresume.
|
||||
|
||||
|
||||
The user interface for dynamic PM
|
||||
---------------------------------
|
||||
|
||||
The user interface for controlling dynamic PM is located in the power/
|
||||
subdirectory of each USB device's sysfs directory, that is, in
|
||||
/sys/bus/usb/devices/.../power/ where "..." is the device's ID. The
|
||||
relevant attribute files are: wakeup, level, and autosuspend.
|
||||
|
||||
power/wakeup
|
||||
|
||||
This file is empty if the device does not support
|
||||
remote wakeup. Otherwise the file contains either the
|
||||
word "enabled" or the word "disabled", and you can
|
||||
write those words to the file. The setting determines
|
||||
whether or not remote wakeup will be enabled when the
|
||||
device is next suspended. (If the setting is changed
|
||||
while the device is suspended, the change won't take
|
||||
effect until the following suspend.)
|
||||
|
||||
power/level
|
||||
|
||||
This file contains one of three words: "on", "auto",
|
||||
or "suspend". You can write those words to the file
|
||||
to change the device's setting.
|
||||
|
||||
"on" means that the device should be resumed and
|
||||
autosuspend is not allowed. (Of course, system
|
||||
suspends are still allowed.)
|
||||
|
||||
"auto" is the normal state in which the kernel is
|
||||
allowed to autosuspend and autoresume the device.
|
||||
|
||||
"suspend" means that the device should remain
|
||||
suspended, and autoresume is not allowed. (But remote
|
||||
wakeup may still be allowed, since it is controlled
|
||||
separately by the power/wakeup attribute.)
|
||||
|
||||
power/autosuspend
|
||||
|
||||
This file contains an integer value, which is the
|
||||
number of seconds the device should remain idle before
|
||||
the kernel will autosuspend it (the idle-delay time).
|
||||
The default is 2. 0 means to autosuspend as soon as
|
||||
the device becomes idle, and -1 means never to
|
||||
autosuspend. You can write a number to the file to
|
||||
change the autosuspend idle-delay time.
|
||||
|
||||
Writing "-1" to power/autosuspend and writing "on" to power/level do
|
||||
essentially the same thing -- they both prevent the device from being
|
||||
autosuspended. Yes, this is a redundancy in the API.
|
||||
|
||||
(In 2.6.21 writing "0" to power/autosuspend would prevent the device
|
||||
from being autosuspended; the behavior was changed in 2.6.22. The
|
||||
power/autosuspend attribute did not exist prior to 2.6.21, and the
|
||||
power/level attribute did not exist prior to 2.6.22.)
|
||||
|
||||
|
||||
Changing the default idle-delay time
|
||||
------------------------------------
|
||||
|
||||
The default autosuspend idle-delay time is controlled by a module
|
||||
parameter in usbcore. You can specify the value when usbcore is
|
||||
loaded. For example, to set it to 5 seconds instead of 2 you would
|
||||
do:
|
||||
|
||||
modprobe usbcore autosuspend=5
|
||||
|
||||
Equivalently, you could add to /etc/modprobe.conf a line saying:
|
||||
|
||||
options usbcore autosuspend=5
|
||||
|
||||
Some distributions load the usbcore module very early during the boot
|
||||
process, by means of a program or script running from an initramfs
|
||||
image. To alter the parameter value you would have to rebuild that
|
||||
image.
|
||||
|
||||
If usbcore is compiled into the kernel rather than built as a loadable
|
||||
module, you can add
|
||||
|
||||
usbcore.autosuspend=5
|
||||
|
||||
to the kernel's boot command line.
|
||||
|
||||
Finally, the parameter value can be changed while the system is
|
||||
running. If you do:
|
||||
|
||||
echo 5 >/sys/module/usbcore/parameters/autosuspend
|
||||
|
||||
then each new USB device will have its autosuspend idle-delay
|
||||
initialized to 5. (The idle-delay values for already existing devices
|
||||
will not be affected.)
|
||||
|
||||
Setting the initial default idle-delay to -1 will prevent any
|
||||
autosuspend of any USB device. This is a simple alternative to
|
||||
disabling CONFIG_USB_SUSPEND and rebuilding the kernel, and it has the
|
||||
added benefit of allowing you to enable autosuspend for selected
|
||||
devices.
|
||||
|
||||
|
||||
Warnings
|
||||
--------
|
||||
|
||||
The USB specification states that all USB devices must support power
|
||||
management. Nevertheless, the sad fact is that many devices do not
|
||||
support it very well. You can suspend them all right, but when you
|
||||
try to resume them they disconnect themselves from the USB bus or
|
||||
they stop working entirely. This seems to be especially prevalent
|
||||
among printers and scanners, but plenty of other types of device have
|
||||
the same deficiency.
|
||||
|
||||
For this reason, by default the kernel disables autosuspend (the
|
||||
power/level attribute is initialized to "on") for all devices other
|
||||
than hubs. Hubs, at least, appear to be reasonably well-behaved in
|
||||
this regard.
|
||||
|
||||
(In 2.6.21 and 2.6.22 this wasn't the case. Autosuspend was enabled
|
||||
by default for almost all USB devices. A number of people experienced
|
||||
problems as a result.)
|
||||
|
||||
This means that non-hub devices won't be autosuspended unless the user
|
||||
or a program explicitly enables it. As of this writing there aren't
|
||||
any widespread programs which will do this; we hope that in the near
|
||||
future device managers such as HAL will take on this added
|
||||
responsibility. In the meantime you can always carry out the
|
||||
necessary operations by hand or add them to a udev script. You can
|
||||
also change the idle-delay time; 2 seconds is not the best choice for
|
||||
every device.
|
||||
|
||||
Sometimes it turns out that even when a device does work okay with
|
||||
autosuspend there are still problems. For example, there are
|
||||
experimental patches adding autosuspend support to the usbhid driver,
|
||||
which manages keyboards and mice, among other things. Tests with a
|
||||
number of keyboards showed that typing on a suspended keyboard, while
|
||||
causing the keyboard to do a remote wakeup all right, would
|
||||
nonetheless frequently result in lost keystrokes. Tests with mice
|
||||
showed that some of them would issue a remote-wakeup request in
|
||||
response to button presses but not to motion, and some in response to
|
||||
neither.
|
||||
|
||||
The kernel will not prevent you from enabling autosuspend on devices
|
||||
that can't handle it. It is even possible in theory to damage a
|
||||
device by suspending it at the wrong time -- for example, suspending a
|
||||
USB hard disk might cause it to spin down without parking the heads.
|
||||
(Highly unlikely, but possible.) Take care.
|
||||
|
||||
|
||||
The driver interface for Power Management
|
||||
-----------------------------------------
|
||||
|
||||
The requirements for a USB driver to support external power management
|
||||
are pretty modest; the driver need only define
|
||||
|
||||
.suspend
|
||||
.resume
|
||||
.reset_resume
|
||||
|
||||
methods in its usb_driver structure, and the reset_resume method is
|
||||
optional. The methods' jobs are quite simple:
|
||||
|
||||
The suspend method is called to warn the driver that the
|
||||
device is going to be suspended. If the driver returns a
|
||||
negative error code, the suspend will be aborted. Normally
|
||||
the driver will return 0, in which case it must cancel all
|
||||
outstanding URBs (usb_kill_urb()) and not submit any more.
|
||||
|
||||
The resume method is called to tell the driver that the
|
||||
device has been resumed and the driver can return to normal
|
||||
operation. URBs may once more be submitted.
|
||||
|
||||
The reset_resume method is called to tell the driver that
|
||||
the device has been resumed and it also has been reset.
|
||||
The driver should redo any necessary device initialization,
|
||||
since the device has probably lost most or all of its state
|
||||
(although the interfaces will be in the same altsettings as
|
||||
before the suspend).
|
||||
|
||||
The reset_resume method is used by the USB Persist facility (see
|
||||
Documentation/usb/persist.txt) and it can also be used under certain
|
||||
circumstances when CONFIG_USB_PERSIST is not enabled. Currently, if a
|
||||
device is reset during a resume and the driver does not have a
|
||||
reset_resume method, the driver won't receive any notification about
|
||||
the resume. Later kernels will call the driver's disconnect method;
|
||||
2.6.23 doesn't do this.
|
||||
|
||||
USB drivers are bound to interfaces, so their suspend and resume
|
||||
methods get called when the interfaces are suspended or resumed. In
|
||||
principle one might want to suspend some interfaces on a device (i.e.,
|
||||
force the drivers for those interface to stop all activity) without
|
||||
suspending the other interfaces. The USB core doesn't allow this; all
|
||||
interfaces are suspended when the device itself is suspended and all
|
||||
interfaces are resumed when the device is resumed. It isn't possible
|
||||
to suspend or resume some but not all of a device's interfaces. The
|
||||
closest you can come is to unbind the interfaces' drivers.
|
||||
|
||||
|
||||
The driver interface for autosuspend and autoresume
|
||||
---------------------------------------------------
|
||||
|
||||
To support autosuspend and autoresume, a driver should implement all
|
||||
three of the methods listed above. In addition, a driver indicates
|
||||
that it supports autosuspend by setting the .supports_autosuspend flag
|
||||
in its usb_driver structure. It is then responsible for informing the
|
||||
USB core whenever one of its interfaces becomes busy or idle. The
|
||||
driver does so by calling these three functions:
|
||||
|
||||
int usb_autopm_get_interface(struct usb_interface *intf);
|
||||
void usb_autopm_put_interface(struct usb_interface *intf);
|
||||
int usb_autopm_set_interface(struct usb_interface *intf);
|
||||
|
||||
The functions work by maintaining a counter in the usb_interface
|
||||
structure. When intf->pm_usage_count is > 0 then the interface is
|
||||
deemed to be busy, and the kernel will not autosuspend the interface's
|
||||
device. When intf->pm_usage_count is <= 0 then the interface is
|
||||
considered to be idle, and the kernel may autosuspend the device.
|
||||
|
||||
(There is a similar pm_usage_count field in struct usb_device,
|
||||
associated with the device itself rather than any of its interfaces.
|
||||
This field is used only by the USB core.)
|
||||
|
||||
The driver owns intf->pm_usage_count; it can modify the value however
|
||||
and whenever it likes. A nice aspect of the usb_autopm_* routines is
|
||||
that the changes they make are protected by the usb_device structure's
|
||||
PM mutex (udev->pm_mutex); however drivers may change pm_usage_count
|
||||
without holding the mutex.
|
||||
|
||||
usb_autopm_get_interface() increments pm_usage_count and
|
||||
attempts an autoresume if the new value is > 0 and the
|
||||
device is suspended.
|
||||
|
||||
usb_autopm_put_interface() decrements pm_usage_count and
|
||||
attempts an autosuspend if the new value is <= 0 and the
|
||||
device isn't suspended.
|
||||
|
||||
usb_autopm_set_interface() leaves pm_usage_count alone.
|
||||
It attempts an autoresume if the value is > 0 and the device
|
||||
is suspended, and it attempts an autosuspend if the value is
|
||||
<= 0 and the device isn't suspended.
|
||||
|
||||
There also are a couple of utility routines drivers can use:
|
||||
|
||||
usb_autopm_enable() sets pm_usage_cnt to 1 and then calls
|
||||
usb_autopm_set_interface(), which will attempt an autoresume.
|
||||
|
||||
usb_autopm_disable() sets pm_usage_cnt to 0 and then calls
|
||||
usb_autopm_set_interface(), which will attempt an autosuspend.
|
||||
|
||||
The conventional usage pattern is that a driver calls
|
||||
usb_autopm_get_interface() in its open routine and
|
||||
usb_autopm_put_interface() in its close or release routine. But
|
||||
other patterns are possible.
|
||||
|
||||
The autosuspend attempts mentioned above will often fail for one
|
||||
reason or another. For example, the power/level attribute might be
|
||||
set to "on", or another interface in the same device might not be
|
||||
idle. This is perfectly normal. If the reason for failure was that
|
||||
the device hasn't been idle for long enough, a delayed workqueue
|
||||
routine is automatically set up to carry out the operation when the
|
||||
autosuspend idle-delay has expired.
|
||||
|
||||
Autoresume attempts also can fail. This will happen if power/level is
|
||||
set to "suspend" or if the device doesn't manage to resume properly.
|
||||
Unlike autosuspend, there's no delay for an autoresume.
|
||||
|
||||
|
||||
Other parts of the driver interface
|
||||
-----------------------------------
|
||||
|
||||
Sometimes a driver needs to make sure that remote wakeup is enabled
|
||||
during autosuspend. For example, there's not much point
|
||||
autosuspending a keyboard if the user can't cause the keyboard to do a
|
||||
remote wakeup by typing on it. If the driver sets
|
||||
intf->needs_remote_wakeup to 1, the kernel won't autosuspend the
|
||||
device if remote wakeup isn't available or has been disabled through
|
||||
the power/wakeup attribute. (If the device is already autosuspended,
|
||||
though, setting this flag won't cause the kernel to autoresume it.
|
||||
Normally a driver would set this flag in its probe method, at which
|
||||
time the device is guaranteed not to be autosuspended.)
|
||||
|
||||
The usb_autopm_* routines have to run in a sleepable process context;
|
||||
they must not be called from an interrupt handler or while holding a
|
||||
spinlock. In fact, the entire autosuspend mechanism is not well geared
|
||||
toward interrupt-driven operation. However there is one thing a
|
||||
driver can do in an interrupt handler:
|
||||
|
||||
usb_mark_last_busy(struct usb_device *udev);
|
||||
|
||||
This sets udev->last_busy to the current time. udev->last_busy is the
|
||||
field used for idle-delay calculations; updating it will cause any
|
||||
pending autosuspend to be moved back. The usb_autopm_* routines will
|
||||
also set the last_busy field to the current time.
|
||||
|
||||
Calling urb_mark_last_busy() from within an URB completion handler is
|
||||
subject to races: The kernel may have just finished deciding the
|
||||
device has been idle for long enough but not yet gotten around to
|
||||
calling the driver's suspend method. The driver would have to be
|
||||
responsible for synchronizing its suspend method with its URB
|
||||
completion handler and causing the autosuspend to fail with -EBUSY if
|
||||
an URB had completed too recently.
|
||||
|
||||
External suspend calls should never be allowed to fail in this way,
|
||||
only autosuspend calls. The driver can tell them apart by checking
|
||||
udev->auto_pm; this flag will be set to 1 for internal PM events
|
||||
(autosuspend or autoresume) and 0 for external PM events.
|
||||
|
||||
Many of the ingredients in the autosuspend framework are oriented
|
||||
towards interfaces: The usb_interface structure contains the
|
||||
pm_usage_cnt field, and the usb_autopm_* routines take an interface
|
||||
pointer as their argument. But somewhat confusingly, a few of the
|
||||
pieces (usb_mark_last_busy() and udev->auto_pm) use the usb_device
|
||||
structure instead. Drivers need to keep this straight; they can call
|
||||
interface_to_usbdev() to find the device structure for a given
|
||||
interface.
|
||||
|
||||
|
||||
Locking requirements
|
||||
--------------------
|
||||
|
||||
All three suspend/resume methods are always called while holding the
|
||||
usb_device's PM mutex. For external events -- but not necessarily for
|
||||
autosuspend or autoresume -- the device semaphore (udev->dev.sem) will
|
||||
also be held. This implies that external suspend/resume events are
|
||||
mutually exclusive with calls to probe, disconnect, pre_reset, and
|
||||
post_reset; the USB core guarantees that this is true of internal
|
||||
suspend/resume events as well.
|
||||
|
||||
If a driver wants to block all suspend/resume calls during some
|
||||
critical section, it can simply acquire udev->pm_mutex.
|
||||
Alternatively, if the critical section might call some of the
|
||||
usb_autopm_* routines, the driver can avoid deadlock by doing:
|
||||
|
||||
down(&udev->dev.sem);
|
||||
rc = usb_autopm_get_interface(intf);
|
||||
|
||||
and at the end of the critical section:
|
||||
|
||||
if (!rc)
|
||||
usb_autopm_put_interface(intf);
|
||||
up(&udev->dev.sem);
|
||||
|
||||
Holding the device semaphore will block all external PM calls, and the
|
||||
usb_autopm_get_interface() will prevent any internal PM calls, even if
|
||||
it fails. (Exercise: Why?)
|
||||
|
||||
The rules for locking order are:
|
||||
|
||||
Never acquire any device semaphore while holding any PM mutex.
|
||||
|
||||
Never acquire udev->pm_mutex while holding the PM mutex for
|
||||
a device that isn't a descendant of udev.
|
||||
|
||||
In other words, PM mutexes should only be acquired going up the device
|
||||
tree, and they should be acquired only after locking all the device
|
||||
semaphores you need to hold. These rules don't matter to drivers very
|
||||
much; they usually affect just the USB core.
|
||||
|
||||
Still, drivers do need to be careful. For example, many drivers use a
|
||||
private mutex to synchronize their normal I/O activities with their
|
||||
disconnect method. Now if the driver supports autosuspend then it
|
||||
must call usb_autopm_put_interface() from somewhere -- maybe from its
|
||||
close method. It should make the call while holding the private mutex,
|
||||
since a driver shouldn't call any of the usb_autopm_* functions for an
|
||||
interface from which it has been unbound.
|
||||
|
||||
But the usb_autpm_* routines always acquire the device's PM mutex, and
|
||||
consequently the locking order has to be: private mutex first, PM
|
||||
mutex second. Since the suspend method is always called with the PM
|
||||
mutex held, it mustn't try to acquire the private mutex. It has to
|
||||
synchronize with the driver's I/O activities in some other way.
|
||||
|
||||
|
||||
Interaction between dynamic PM and system PM
|
||||
--------------------------------------------
|
||||
|
||||
Dynamic power management and system power management can interact in
|
||||
a couple of ways.
|
||||
|
||||
Firstly, a device may already be manually suspended or autosuspended
|
||||
when a system suspend occurs. Since system suspends are supposed to
|
||||
be as transparent as possible, the device should remain suspended
|
||||
following the system resume. The 2.6.23 kernel obeys this principle
|
||||
for manually suspended devices but not for autosuspended devices; they
|
||||
do get resumed when the system wakes up. (Presumably they will be
|
||||
autosuspended again after their idle-delay time expires.) In later
|
||||
kernels this behavior will be fixed.
|
||||
|
||||
(There is an exception. If a device would undergo a reset-resume
|
||||
instead of a normal resume, and the device is enabled for remote
|
||||
wakeup, then the reset-resume takes place even if the device was
|
||||
already suspended when the system suspend began. The justification is
|
||||
that a reset-resume is a kind of remote-wakeup event. Or to put it
|
||||
another way, a device which needs a reset won't be able to generate
|
||||
normal remote-wakeup signals, so it ought to be resumed immediately.)
|
||||
|
||||
Secondly, a dynamic power-management event may occur as a system
|
||||
suspend is underway. The window for this is short, since system
|
||||
suspends don't take long (a few seconds usually), but it can happen.
|
||||
For example, a suspended device may send a remote-wakeup signal while
|
||||
the system is suspending. The remote wakeup may succeed, which would
|
||||
cause the system suspend to abort. If the remote wakeup doesn't
|
||||
succeed, it may still remain active and thus cause the system to
|
||||
resume as soon as the system suspend is complete. Or the remote
|
||||
wakeup may fail and get lost. Which outcome occurs depends on timing
|
||||
and on the hardware and firmware design.
|
||||
|
||||
More interestingly, a device might undergo a manual resume or
|
||||
autoresume during system suspend. With current kernels this shouldn't
|
||||
happen, because manual resumes must be initiated by userspace and
|
||||
autoresumes happen in response to I/O requests, but all user processes
|
||||
and I/O should be quiescent during a system suspend -- thanks to the
|
||||
freezer. However there are plans to do away with the freezer, which
|
||||
would mean these things would become possible. If and when this comes
|
||||
about, the USB core will carefully arrange matters so that either type
|
||||
of resume will block until the entire system has resumed.
|
@@ -428,6 +428,17 @@ Options supported:
|
||||
See http://www.uuhaus.de/linux/palmconnect.html for up-to-date
|
||||
information on this driver.
|
||||
|
||||
Winchiphead CH341 Driver
|
||||
|
||||
This driver is for the Winchiphead CH341 USB-RS232 Converter. This chip
|
||||
also implements an IEEE 1284 parallel port, I2C and SPI, but that is not
|
||||
supported by the driver. The protocol was analyzed from the behaviour
|
||||
of the Windows driver, no datasheet is available at present.
|
||||
The manufacturer's website: http://www.winchiphead.com/.
|
||||
For any questions or problems with this driver, please contact
|
||||
frank@kingswood-consulting.co.uk.
|
||||
|
||||
|
||||
Generic Serial driver
|
||||
|
||||
If your device is not one of the above listed devices, compatible with
|
||||
|
@@ -34,9 +34,12 @@ if usbmon is built into the kernel.
|
||||
Verify that bus sockets are present.
|
||||
|
||||
# ls /sys/kernel/debug/usbmon
|
||||
1s 1t 1u 2s 2t 2u 3s 3t 3u 4s 4t 4u
|
||||
0s 0t 0u 1s 1t 1u 2s 2t 2u 3s 3t 3u 4s 4t 4u
|
||||
#
|
||||
|
||||
Now you can choose to either use the sockets numbered '0' (to capture packets on
|
||||
all buses), and skip to step #3, or find the bus used by your device with step #2.
|
||||
|
||||
2. Find which bus connects to the desired device
|
||||
|
||||
Run "cat /proc/bus/usb/devices", and find the T-line which corresponds to
|
||||
@@ -56,6 +59,10 @@ Bus=03 means it's bus 3.
|
||||
|
||||
# cat /sys/kernel/debug/usbmon/3u > /tmp/1.mon.out
|
||||
|
||||
to listen on a single bus, otherwise, to listen on all buses, type:
|
||||
|
||||
# cat /sys/kernel/debug/usbmon/0u > /tmp/1.mon.out
|
||||
|
||||
This process will be reading until killed. Naturally, the output can be
|
||||
redirected to a desirable location. This is preferred, because it is going
|
||||
to be quite long.
|
||||
|
@@ -147,3 +147,4 @@
|
||||
146 -> SSAI Ultrasound Video Interface [414a:5353]
|
||||
147 -> VoodooTV 200 (USA) [121a:3000]
|
||||
148 -> DViCO FusionHDTV 2 [dbc0:d200]
|
||||
149 -> Typhoon TV-Tuner PCI (50684)
|
||||
|
5
Documentation/video4linux/CARDLIST.cx23885
Normal file
5
Documentation/video4linux/CARDLIST.cx23885
Normal file
@@ -0,0 +1,5 @@
|
||||
0 -> UNKNOWN/GENERIC [0070:3400]
|
||||
1 -> Hauppauge WinTV-HVR1800lp [0070:7600]
|
||||
2 -> Hauppauge WinTV-HVR1800 [0070:7800,0070:7801]
|
||||
3 -> Hauppauge WinTV-HVR1250 [0070:7911]
|
||||
4 -> DViCO FusionHDTV5 Express [18ac:d500]
|
@@ -88,11 +88,11 @@
|
||||
87 -> ADS Instant TV Duo Cardbus PTV331 [0331:1421]
|
||||
88 -> Tevion/KWorld DVB-T 220RF [17de:7201]
|
||||
89 -> ELSA EX-VISION 700TV [1048:226c]
|
||||
90 -> Kworld ATSC110 [17de:7350]
|
||||
90 -> Kworld ATSC110/115 [17de:7350,17de:7352]
|
||||
91 -> AVerMedia A169 B [1461:7360]
|
||||
92 -> AVerMedia A169 B1 [1461:6360]
|
||||
93 -> Medion 7134 Bridge #2 [16be:0005]
|
||||
94 -> LifeView FlyDVB-T Hybrid Cardbus [5168:3306,5168:3502]
|
||||
94 -> LifeView FlyDVB-T Hybrid Cardbus/MSI TV @nywhere A/D NB [5168:3306,5168:3502,4e42:3502]
|
||||
95 -> LifeView FlyVIDEO3000 (NTSC) [5169:0138]
|
||||
96 -> Medion Md8800 Quadro [16be:0007,16be:0008]
|
||||
97 -> LifeView FlyDVB-S /Acorp TV134DS [5168:0300,4e42:0300]
|
||||
@@ -115,3 +115,4 @@
|
||||
114 -> KWorld DVB-T 210 [17de:7250]
|
||||
115 -> Sabrent PCMCIA TV-PCB05 [0919:2003]
|
||||
116 -> 10MOONS TM300 TV Card [1131:2304]
|
||||
117 -> Avermedia Super 007 [1461:f01d]
|
||||
|
Reference in New Issue
Block a user