This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
RE: Patch [1/3] Userspace probes new interfaces
- From: "Zhang, Yanmin" <yanmin dot zhang at intel dot com>
- To: <prasanna at in dot ibm dot com>
- Cc: <systemtap at sources dot redhat dot com>, "Keshavamurthy, Anil S" <anil dot s dot keshavamurthy at intel dot com>, "Mao, Bibo" <bibo dot mao at intel dot com>
- Date: Wed, 25 Jan 2006 17:21:45 +0800
- Subject: RE: Patch [1/3] Userspace probes new interfaces
>>-----Original Message-----
>>From: systemtap-owner@sourceware.org [mailto:systemtap-owner@sourceware.org] On Behalf Of Prasanna S Panchamukhi
>>Sent: 2006年1月19日 22:37
>>To: systemtap@sources.redhat.com
>>Subject: Patch [1/3] Userspace probes new interfaces
>>struct uprobe - Allocated per probe by the user.
>>struct uprobe_module - Allocated per application by the userspace probe
>> mechanism.
>>struct uprobe {
>> /* pointer to the pathname of the application */
>> char *pathname;
>> /* kprobe structure with user specified handlers */
>> struct kprobe kp;
>> /* hlist of all the userspace probes per application */
>> struct hlist_node ulist;
>> /* inode of the probed application */
>> struct inode *inode;
>> /* probe offset within the file */
>> unsigned long offset;
>>};
>>
>>struct uprobe_module {
>> /* hlist head of all userspace probes per application */
>> struct hlist_head ulist_head;
>> /* list of all uprobe_module for probed application */
>> struct list_head mlist;
>> /* to hold path/dentry etc. */
>> struct nameidata nd;
nameidata is too big. I still think a pointer to dentry is enough and it's very to use dentry.
>> Before inserting the probe, specify the pathname of the application
>> on which the probe is to be inserted.
>>
>> const char *pname = "/home/prasanna/bin/myapp";
>>
>> p.kp.pre_handler=handler_pre;
>> p.kp.post_handler=handler_post;
>> p.kp.fault_handler=handler_fault;
>>
>> /* Specify the offset within the application/executable*/
>> p.offset = (unsigned long)0x4d4;
>> /* Secify the probe address */
>> /* $nm appln |grep func1 */
>> p.kp.addr = (kprobe_opcode_t *)0x080484d4;
Why should we assign a value to p.kp.addr in the example?
>>
>>diff -puN include/linux/kprobes.h~kprobes_userspace_probes-base-interface include/linux/kprobes.h
>>--- linux-2.6.15/include/linux/kprobes.h~kprobes_userspace_probes-base-interface 2006-01-19 18:48:04.000000000 +0530
>>+++ linux-2.6.15-prasanna/include/linux/kprobes.h 2006-01-19 19:35:53.000000000 +0530
>>@@ -37,6 +37,10 @@
>> #include <linux/spinlock.h>
>> #include <linux/rcupdate.h>
>> #include <linux/mutex.h>
>>+#include <linux/mm.h>
>>+#include <linux/dcache.h>
>>+#include <linux/namei.h>
>>+#include <linux/pagemap.h>
>>
>> #ifdef CONFIG_KPROBES
>> #include <asm/kprobes.h>
>>@@ -117,6 +121,32 @@ struct jprobe {
>> DECLARE_PER_CPU(struct kprobe *, current_kprobe);
>> DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
>>
>>+struct uprobe {
>>+ /* pointer to the pathname of the application */
>>+ char *pathname;
>>+ /* kprobe structure with user specified handlers */
>>+ struct kprobe kp;
>>+ /* hlist of all the userspace probes per application */
>>+ struct hlist_node ulist;
>>+ /* inode of the probed application */
>>+ struct inode *inode;
>>+ /* probe offset within the file */
>>+ unsigned long offset;
>>+};
>>+
>>+struct uprobe_module {
>>+ /* hlist head of all userspace probes per application */
>>+ struct hlist_head ulist_head;
>>+ /* list of all uprobe_module for probed application */
>>+ struct list_head mlist;
>>+ /* to hold path/dentry etc. */
>>+ struct nameidata nd;
>>+ /* original readpage operations */
>>+ struct address_space_operations *ori_a_ops;
>>+ /* readpage hooks added operations */
>>+ struct address_space_operations user_a_ops;
>>+};
>>+
>> #ifdef ARCH_SUPPORTS_KRETPROBES
>> extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs);
>> #else /* ARCH_SUPPORTS_KRETPROBES */
>>@@ -162,6 +192,9 @@ extern void show_registers(struct pt_reg
>> extern kprobe_opcode_t *get_insn_slot(void);
>> extern void free_insn_slot(kprobe_opcode_t *slot);
>> extern void kprobes_inc_nmissed_count(struct kprobe *p);
>>+extern void arch_copy_uprobe(struct kprobe *p, unsigned long *address);
>>+extern void arch_arm_uprobe(unsigned long *address);
>>+extern void arch_disarm_uprobe(struct kprobe *p, kprobe_opcode_t *address);
>>
>> /* Get the kprobe at this addr (if any) - called with preemption disabled */
>> struct kprobe *get_kprobe(void *addr);
>>@@ -194,6 +227,9 @@ void jprobe_return(void);
>> int register_kretprobe(struct kretprobe *rp);
>> void unregister_kretprobe(struct kretprobe *rp);
>>
>>+int register_uprobe(struct uprobe *uprobe);
>>+void unregister_uprobe(struct uprobe *uprobe);
>>+
>> struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp);
>> void add_rp_inst(struct kretprobe_instance *ri);
>> void kprobe_flush_task(struct task_struct *tk);
>>diff -puN kernel/kprobes.c~kprobes_userspace_probes-base-interface kernel/kprobes.c
>>--- linux-2.6.15/kernel/kprobes.c~kprobes_userspace_probes-base-interface 2006-01-19 18:48:04.000000000 +0530
>>+++ linux-2.6.15-prasanna/kernel/kprobes.c 2006-01-19 19:37:25.000000000 +0530
>>@@ -47,10 +47,13 @@
>>
>> static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
>> static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
>>+static struct list_head uprobe_module_list;
>>
>> DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
>>+DEFINE_MUTEX(uprobe_mutex); /* Protects uprobe_module */
>> DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
>> static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
>>+extern DEFINE_PER_CPU(struct uprobe *, current_uprobe);
>>
>> #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
>> /*
>>@@ -166,26 +169,6 @@ static inline void reset_kprobe_instance
>> }
>>
>> /*
>>- * This routine is called either:
>>- * - under the kprobe_mutex - during kprobe_[un]register()
>>- * OR
>>- * - with preemption disabled - from arch/xxx/kernel/kprobes.c
>>- */
>>-struct kprobe __kprobes *get_kprobe(void *addr)
>>-{
>>- struct hlist_head *head;
>>- struct hlist_node *node;
>>- struct kprobe *p;
>>-
>>- head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
>>- hlist_for_each_entry_rcu(p, node, head, hlist) {
>>- if (p->addr == addr)
>>- return p;
>>- }
>>- return NULL;
>>-}
>>-
>>-/*
>> * Aggregate handlers for multiple kprobes support - these handlers
>> * take care of invoking the individual kprobe handlers on p->list
>> */
>>@@ -248,6 +231,107 @@ static int __kprobes aggr_break_handler(
>> return ret;
>> }
>>
>>+/**
>>+ * This routine looks for an existing uprobe at the given offset and inode.
>>+ * If it's found, returns the corresponding kprobe pointer.
>>+ */
>>+static struct kprobe __kprobes *get_kprobe_user(struct inode *inode,
>>+ unsigned long offset)
>>+{
>>+ struct hlist_head *head;
>>+ struct hlist_node *node;
>>+ struct kprobe *p, *kpr;
>>+ struct uprobe *uprobe;
>>+
>>+ head = &kprobe_table[hash_ptr((kprobe_opcode_t *)
>>+ (((unsigned long)inode) * offset), KPROBE_HASH_BITS)];
>>+
>>+ hlist_for_each_entry(p, node, head, hlist) {
>>+ if (kernel_text_address((unsigned long)p->addr))
>>+ continue;
>>+
>>+ if (p->pre_handler == aggr_pre_handler) {
>>+ kpr = list_entry(rcu_dereference(p)->list.next,
>>+ typeof(*kpr), list);
>>+ uprobe = container_of(kpr, struct uprobe, kp);
>>+ } else
>>+ uprobe = container_of(p, struct uprobe, kp);
>>+
>>+ if ((uprobe->inode == inode) && (uprobe->offset == offset))
>>+ return p;
>>+ }
>>+
>>+ return NULL;
>>+}
>>+
>>+/**
>>+ * Finds a uprobe at the specified user-space address in the current task.
>>+ * Points current_uprobe at that uprobe and returns the corresponding kprobe.
>>+ */
>>+static struct kprobe __kprobes *get_uprobe(void *addr)
>>+{
>>+ struct mm_struct *mm = current->mm;
>>+ struct vm_area_struct *vma;
>>+ struct inode *inode;
>>+ unsigned long offset;
>>+ struct kprobe *p, *kpr;
>>+ struct uprobe *uprobe;
>>+
>>+ spin_lock(&mm->page_table_lock);
>>+ vma = find_vma(mm, (unsigned long)addr);
Find_vma is protected by mm->mmap_sem instead of mm->page_table_lock.
>>+
>>+ BUG_ON(!vma); /* this should not happen, not in our memory map */
>>+
>>+ offset = (unsigned long)addr - vma->vm_start +
>>+ (vma->vm_pgoff << PAGE_SHIFT);
>>+ if (!vma->vm_file) {
>>+ spin_unlock(&mm->page_table_lock);
>>+ return NULL;
>>+ }
>>+
>>+ inode = vma->vm_file->f_dentry->d_inode;
>>+ spin_unlock(&mm->page_table_lock);
>>+
>>+ p = get_kprobe_user(inode, offset);
>>+ if (!p)
>>+ return NULL;
>>+
>>+ if (p->pre_handler == aggr_pre_handler) {
>>+ kpr = list_entry(rcu_dereference(p)->list.next, typeof(*kpr),
>>+ list);
>>+ uprobe = container_of(kpr, struct uprobe, kp);
>>+ } else
>>+ uprobe = container_of(p, struct uprobe, kp);
>>+
>>+ if (uprobe)
>>+ __get_cpu_var(current_uprobe) = uprobe;
>>+
>>+ return p;
>>+}
>>+
>>+/*
>>+ * This routine is called either:
>>+ * - under the kprobe_mutex - during kprobe_[un]register()
>>+ * OR
>>+ * - with preemption disabled - from arch/xxx/kernel/kprobes.c
>>+ */
>>+struct kprobe __kprobes *get_kprobe(void *addr)
>>+{
>>+ struct hlist_head *head;
>>+ struct hlist_node *node;
>>+ struct kprobe *p;
>>+
>>+ if (!kernel_text_address((unsigned long)addr))
>>+ return get_uprobe(addr);
>>+
>>+ head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
>>+ hlist_for_each_entry_rcu(p, node, head, hlist) {
>>+ if (p->addr == addr)
>>+ return p;
>>+ }
>>+ return NULL;
>>+}
>>+
>> /* Walks the list and increments nmissed count for multiprobe case */
>> void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
>> {
>>@@ -576,6 +660,386 @@ void __kprobes unregister_jprobe(struct
>> unregister_kprobe(&jp->kp);
>> }
>>
>>+typedef int (*process_uprobe_func_t)(struct uprobe *uprobe,
>>+ unsigned long *address, struct page *page,
>>+ struct vm_area_struct *vma);
>>+
>>+/**
>>+ * Adds the kprobe structure for the specified uprobe to either the
>>+ * kprobe_table or to the aggregate hash list for a given inode and offset.
>>+ * Also copies the instructions and inserts the breakpoint.
>>+ */
>>+int __kprobes insert_kprobe_user(struct uprobe *uprobe, unsigned long *address,
>>+ struct page *page, struct vm_area_struct *vma)
Parameter vma is not needed because flush_vma will do the flush for all vma which maps the page.
process_uprobe_func_t could also delete parameter vma. Same thing of function remove_kprobe_user.
Another reason to delete the parameter is it might introduce a race between find_get_vma and map_uprobe_page. Vma might be changed or release just after returning from find_get_vma.
>>+{
>>+ struct kprobe *old_p;
>>+ struct hlist_head *head;
>>+
>>+ uprobe->kp.nmissed = 0;
>>+ old_p = get_kprobe_user(uprobe->inode, uprobe->offset);
>>+ if (old_p)
>>+ return register_aggr_kprobe(old_p, &uprobe->kp);
>>+
>>+ head = &kprobe_table[hash_ptr((kprobe_opcode_t *)(uprobe->offset *
>>+ (unsigned long)uprobe->inode), KPROBE_HASH_BITS)];
>>+
>>+ INIT_HLIST_NODE(&uprobe->kp.hlist);
>>+ hlist_add_head_rcu(&uprobe->kp.hlist, head);
>>+
>>+ arch_copy_uprobe(&uprobe->kp, address);
>>+ arch_arm_uprobe(address);
>>+ flush_icache_user_range(vma, page, (unsigned long)address,
>>+ sizeof(kprobe_opcode_t));
>>+
>>+ return 0;
>>+}
>>+
>>+/**
>>+ * Wait for the page to be unlocked if someone else had locked it,
>>+ * then map the page and insert or remove the breakpoint.
>>+ */
>>+static int __kprobes map_uprobe_page(struct page *page,
>>+ struct vm_area_struct *vma,
>>+ struct uprobe *uprobe,
>>+ process_uprobe_func_t process_kprobe_user)
>>+{
>>+ int ret = 0;
>>+ unsigned long *uprobe_address;
>>+
>>+ if (!page)
>>+ return -EINVAL; /* TODO: more suitable errno */
>>+
>>+ wait_on_page_locked(page);
>>+ /* could probably retry readpage here. */
>>+ if (!PageUptodate(page))
>>+ return -EINVAL; /* TODO: more suitable errno */
>>+
>>+ lock_page(page);
>>+
>>+ uprobe_address = kmap(page);
>>+ uprobe_address = (unsigned long *)((unsigned long)uprobe_address +
>>+ (unsigned long) (uprobe->offset & ~PAGE_MASK));
>>+ ret = (*process_kprobe_user)(uprobe, uprobe_address, page, vma);
>>+ kunmap(page);
>>+
>>+ unlock_page(page);
>>+
>>+ return ret;
>>+}
>>+
>>+
>>+/**
>>+ * find_get_vma walks through the list of process private mappings and
>>+ * gets the vma containing the offset.
>>+ */
>>+static struct vm_area_struct __kprobes *find_get_vma(struct uprobe *uprobe,
>>+ struct page *page, struct address_space *mapping)
I would like to suggest you to delete this function, because it might introduce race.
>>+{
>>+ struct vm_area_struct *vma = NULL;
>>+ struct prio_tree_iter iter;
>>+ struct prio_tree_root *head = &mapping->i_mmap;
>>+ struct mm_struct *mm;
>>+ unsigned long start, end, offset = uprobe->offset;
>>+
>>+ vma_prio_tree_foreach(vma, &iter, head, offset, offset) {
>>+ mm = vma->vm_mm;
>>+
>>+ spin_lock(&mm->page_table_lock);
vma->vm_start and vma->vm_end are protected by mm->mmap_sem. So the lock becomes complicated because spinlock and sema are used together.
>>+ start = vma->vm_start - (vma->vm_pgoff << PAGE_SHIFT);
>>+ end = vma->vm_end - (vma->vm_pgoff << PAGE_SHIFT);
>>+ spin_unlock(&mm->page_table_lock);
>>+
>>+ if ((start + offset) < end)
>>+ return vma;
>>+ }
>>+
>>+ return NULL;
>>+}
>>+
>>+/**
>>+ * flush_vma walks through the list of process private mappings,
>>+ * gets the vma containing the offset and flush all the vma's
>>+ * containing the probed page.
>>+ */
>>+static void __kprobes flush_vma(struct address_space *mapping,
>>+ struct page *page, struct uprobe *uprobe)
>>+{
>>+ struct vm_area_struct *vma = NULL;
>>+ struct prio_tree_iter iter;
>>+ struct prio_tree_root *head = &mapping->i_mmap;
>>+ struct mm_struct *mm;
>>+ unsigned long start, end, offset = uprobe->offset;
>>+
>>+ vma_prio_tree_foreach(vma, &iter, head, offset, offset) {
>>+ mm = vma->vm_mm;
>>+
>>+ spin_lock(&mm->page_table_lock);
Like above, vma->vm_start and vma->vm_end are protected by mm->mmap_sem.
>>+ start = vma->vm_start - (vma->vm_pgoff << PAGE_SHIFT);
>>+ end = vma->vm_end - (vma->vm_pgoff << PAGE_SHIFT);
>>+ spin_unlock(&mm->page_table_lock);
>>+
>>+ if ((start + offset) < end)
>>+ flush_icache_user_range(vma, page,
>>+ (unsigned long)uprobe->kp.addr,
>>+ sizeof(kprobe_opcode_t));
>>+ }
>>+}
>>+
>>+/**
>>+ * Check if the given offset lies within the given page range.
>>+ */
>>+static inline int find_page_probe(unsigned long offset,
>>+ unsigned long page_start)
>>+{
>>+ unsigned long page_end = page_start + PAGE_SIZE;
>>+
>>+ if ((offset >= page_start) && (offset < page_end))
>>+ return 1;
>>+
>>+ return 0;
>>+}
>>+
>>+/**
>>+ * Walk the uprobe_module_list and return the uprobe module with matching
>>+ * inode.
>>+ */
>>+static struct uprobe_module __kprobes *get_module_by_inode(struct inode *inode)
>>+{
>>+ struct uprobe_module *umodule;
>>+
>>+ list_for_each_entry(umodule, &uprobe_module_list, mlist) {
>>+ if (umodule->nd.dentry->d_inode == inode)
>>+ return umodule;
>>+ }
>>+
>>+ return NULL;
>>+}
>>+
>>+/**
>>+ * Get the inode operations. This function leaves with the dentry held
>>+ * and taking with the inode writelock held to ensure that the file on
>>+ * which probes are currently active does not change from under us. Add uprobe
>>+ * and uprobe_module to the appropriate hash list. Also swithces i_op to
>>+ * hooks into readpage and readpages().
>>+ */
>>+static int __kprobes get_inode_ops(struct uprobe *uprobe,
>>+ struct uprobe_module *umodule)
The function looks incomplete and unclean.
>>+{
>>+ struct address_space *as;
>>+ int error = 0;
>>+
>>+ INIT_HLIST_HEAD(&umodule->ulist_head);
>>+ hlist_add_head(&uprobe->ulist, &umodule->ulist_head);
>>+ list_add(&umodule->mlist, &uprobe_module_list);
>>+ as = umodule->nd.dentry->d_inode->i_mapping;
>>+
>>+ return error;
>>+}
>>+
>>+/**
>>+ * Lookup the pathname and the get the inode and dentry.
>>+ */
>>+static inline int get_uprobe_inode(const char *pathname, struct inode **inode)
I suggest to delete this function because there is a race condition. Inode might be released by another
thread just this thread returns from get_uprobe_inode. This function is simple and could be moved
to its caller, register_uprobe.
>>+{
>>+ int error;
>>+ struct nameidata nd;
>>+
>>+ if ((error = path_lookup(pathname, LOOKUP_FOLLOW, &nd))) {
>>+ path_release(&nd);
If error, still need call patch_release?
>>+ return error;
>>+ }
>>+
>>+ *inode = (struct inode *)nd.dentry->d_inode;
>>+ path_release(&nd);
>>+
>>+ return 0;
>>+}
>>+
>>+
>>+int __kprobes remove_kprobe_user(struct uprobe *uprobe, unsigned long *address,
>>+ struct page *page, struct vm_area_struct *vma)
>>+{
>>+ struct kprobe *old_p, *list_p, *p;
>>+ int cleanup_p;
>>+
>>+ p = &uprobe->kp;
>>+ mutex_lock(&kprobe_mutex);
>>+ old_p = get_kprobe_user(uprobe->inode, uprobe->offset);
>>+ if (unlikely(!old_p)) {
>>+ mutex_unlock(&kprobe_mutex);
>>+ return 0;
>>+ }
>>+
>>+ if (p != old_p) {
>>+ list_for_each_entry_rcu(list_p, &old_p->list, list)
>>+ if (list_p == p)
>>+ /* kprobe p is a valid probe */
>>+ goto valid_p;
>>+ mutex_unlock(&kprobe_mutex);
>>+ return 0;
>>+ }
>>+
>>+valid_p:
>>+ if ((old_p == p) || ((old_p->pre_handler == aggr_pre_handler) &&
>>+ (p->list.next == &old_p->list) &&
>>+ (p->list.prev == &old_p->list))) {
>>+ /* Only probe on the hash list */
>>+ arch_disarm_uprobe(p, (kprobe_opcode_t *)address);
>>+ flush_icache_user_range(vma, page, (unsigned long)address,
>>+ sizeof(kprobe_opcode_t));
flush_icache_user_range is not needed here because later flush_vma would do so.
>>+ hlist_del_rcu(&old_p->hlist);
>>+ cleanup_p = 1;
>>+ } else {
>>+ list_del_rcu(&p->list);
>>+ cleanup_p = 0;
>>+ }
>>+
>>+ mutex_unlock(&kprobe_mutex);
>>+
>>+ synchronize_sched();
>>+ if (cleanup_p) {
>>+ if (p != old_p) {
>>+ list_del_rcu(&p->list);
Should this list_del_rcu be moved before synchronize_sched? Suggest to move it to
>>+ kfree(old_p);
>>+ }
>>+ }
>>+
>>+ return 0;
>>+}
>>+
>>+
>>+/**
>>+ * unregister_uprobe: Disarms the probe, removes the kprobe and uprobe
>>+ * pointers from the hash lists. Unhooks readpage routines.
>>+ */
>>+void __kprobes unregister_uprobe(struct uprobe *uprobe)
>>+{
>>+ struct address_space *mapping;
>>+ struct uprobe_module *umodule;
>>+ struct vm_area_struct *vma = NULL;
>>+ struct page *page;
>>+ int ret = 0;
>>+
>>+ if (!uprobe->inode)
>>+ return;
>>+
>>+ mapping = uprobe->inode->i_mapping;
>>+
>>+ page = find_get_page(mapping, uprobe->offset >> PAGE_CACHE_SHIFT);
>>+
>>+ spin_lock(&mapping->i_mmap_lock);
>>+ vma = find_get_vma(uprobe, page, mapping);
>>+ ret = map_uprobe_page(page, vma, uprobe, remove_kprobe_user);
>>+ /*
>>+ * TODO: unregister_uprobe should not fail, need to handle if it fails.
>>+ */
>>+ flush_vma(mapping, page, uprobe);
>>+ spin_unlock(&mapping->i_mmap_lock);
>>+
>>+ if (page)
>>+ page_cache_release(page);
>>+
>>+ mutex_lock(&uprobe_mutex);
>>+ if (!(umodule = get_module_by_inode(uprobe->inode)))
>>+ goto out;
>>+
>>+ hlist_del(&uprobe->ulist);
>>+ if (hlist_empty(&umodule->ulist_head)) {
>>+ list_del(&umodule->mlist);
>>+ path_release(&umodule->nd);
>>+ kfree(umodule);
>>+ }
>>+out:
>>+ mutex_unlock(&uprobe_mutex);
>>+}
>>+
>>+/**
>>+ * register_uprobe(): combination of inode and offset is used to identify each
>>+ * probe uniquely. Each uprobe can be found from the kprobes_hash table by
>>+ * using inode and offset. register_uprobe(), inserts the breakpoint at the
>>+ * given address by locating and mapping the page. return 0 on success and
>>+ * error on failure.
>>+ */
>>+int __kprobes register_uprobe(struct uprobe *uprobe)
>>+{
>>+ struct address_space *mapping;
>>+ struct uprobe_module *umodule = NULL;
>>+ struct vm_area_struct *vma = NULL;
>>+ struct inode *inode;
>>+ struct page *page;
>>+ int error = 0;
>>+
>>+ INIT_HLIST_NODE(&uprobe->ulist);
>>+
>>+ /*
>>+ * TODO: Need to calculate the absolute file offset for dynamic
>>+ * shared libraries.
>>+ * uprobe->offset = (unsigned long)uprobe->kp.addr & UPROBE_OFFSET_MASK;
>>+ */
>>+ if ((error = get_uprobe_inode(uprobe->pathname, &inode)))
>>+ return error;
>>+
>>+ mutex_lock(&uprobe_mutex);
>>+ /*
>>+ * Check if there are probes already on this application and add the
>>+ * corresponding uprobe to per application probe's list.
>>+ */
>>+ umodule = get_module_by_inode(inode);
>>+ if (!umodule) {
>>+ /*
>>+ * Allocate a uprobe_module structure for this application
>>+ * if not allocated before.
>>+ */
>>+ umodule = kzalloc(sizeof(struct uprobe_module), GFP_KERNEL);
>>+ if (!umodule) {
>>+ error = -ENOMEM;
>>+ goto out;
>>+ }
>>+
>>+ error = path_lookup(uprobe->pathname, LOOKUP_FOLLOW,
>>+ &umodule->nd);
>>+ if (error) {
>>+ path_release(&umodule->nd);
If error, still need call patch_release?
>>+ goto out;
>>+ }
>>+
>>+ if ((error = get_inode_ops(uprobe, umodule))) {
>>+ path_release(&umodule->nd);
>>+ kfree(umodule);
>>+ goto out;
>>+ }
>>+ } else
>>+ hlist_add_head(&uprobe->ulist, &umodule->ulist_head);
>>+
>>+ uprobe->inode = umodule->nd.dentry->d_inode;
>>+ mapping = umodule->nd.dentry->d_inode->i_mapping;
>>+ mutex_lock(&kprobe_mutex);
>>+ page = find_get_page(mapping, (uprobe->offset >> PAGE_CACHE_SHIFT));
>>+
>>+ spin_lock(&mapping->i_mmap_lock);
>>+ vma = find_get_vma(uprobe, page, mapping);
>>+
>>+ /*
>>+ * If error == -EINVAL, return success, probes will inserted by
>>+ * readpage hooks.
>>+ * TODO: Use a more suitable errno?
>>+ */
>>+ error = map_uprobe_page(page, vma, uprobe, insert_kprobe_user);
>>+ if (error == -EINVAL)
>>+ error = 0;
>>+ flush_vma(mapping, page, uprobe);
>>+ spin_unlock(&mapping->i_mmap_lock);
>>+
>>+ if (page)
>>+ page_cache_release(page);
>>+
>>+ mutex_unlock(&kprobe_mutex);
>>+out:
>>+ mutex_unlock(&uprobe_mutex);
>>+
>>+ return error;
>>+}
1) When to assign a value to uprobe->kp->addr?
2) As for the flush_icache when register/unregister uprobe, I am still hesitating if we should flush icache just for the current cpu or all the cpus which run in the address spaces (vma->mm) of all related vma mapping the uprobed same page. Could guys discuss it on the conference call?