About the MSR IA32_TIME_STAMP_COUNTER (10h) :
Which rules of serialization does it follow ? rdtsc or rdtscp ? or other ?
If not serialized, should I provide a cpuid "barrier" before any math computations ?
-- Edit --
So far I have implemented two kinds of barriers : cpuid and fences.
With cpuid :
#define RDCOUNTER(_val, _cnt) \
asm volatile \
( \
"xorq %%rax, %%rax \n\t" \
"cpuid \n\t" \
"movq %1, %%rcx \n\t" \
"rdmsr \n\t" \
"push %%rax \n\t" \
"push %%rdx \n\t" \
"xorq %%rax, %%rax \n\t" \
"cpuid \n\t" \
"pop %%rdx \n\t" \
"pop %%rax \n\t" \
"shlq $32, %%rdx \n\t" \
"orq %%rdx, %%rax \n\t" \
"movq %%rax, %0" \
: "=m" (_val) \
: "i" (_cnt) \
: "%rax", "%rbx", "%rcx", "%rdx", "memory" \
)
With fence :
#define RDCOUNTER(_val, _cnt) \
asm volatile \
( \
"movq %1, %%rcx \n\t" \
"mfence \n\t" \
"rdmsr \n\t" \
"mfence \n\t" \
"shlq $32, %%rdx \n\t" \
"orq %%rdx, %%rax \n\t" \
"movq %%rax, %0" \
: "=m" (_val) \
: "i" (_cnt) \
: "%rax", "%rbx", "%rcx", "%rdx", "memory" \
)
Bellow part of my project is trying to estimate the processor external clock frequency (FSB or BCLK).
Algorithm allocates an array of a structured memory to read and measure deltas of the Time Stamp Counter.
This slab of memory is allocated to be resident in the processor cache.
A cpu affinity is made with the BSP, scheduler and interrupts are suspended the time of computation.
Several loops of the TSC reads are done to force cache residency; and the most occurrences of same result is declared as the best frequency.
What I expect is to get a constant frequency after several run.
Unfortunately, I still have variance whatever the barrier instruction is employed or not.
Results are pretty closed, at least 3 decimals past period, but never constant.
(this is tested on a Core 2 and Core i7)
DECLARE_COMPLETION(bclk_job_complete);
typedef struct {
unsigned long long V[2], D;
} TSC_STRUCT;
#define OCCURENCES 32
signed int Compute_Clock(void *arg)
{
CLOCK *clock=(CLOCK *) arg;
unsigned int ratio=clock->Q;
unsigned long long overhead=0;
struct kmem_cache *hardwareCache=kmem_cache_create(
"IntelClockCache",
OCCURENCES * sizeof(TSC_STRUCT), 0,
SLAB_HWCACHE_ALIGN, NULL);
TSC_STRUCT *TSC=kmem_cache_alloc(hardwareCache, GFP_KERNEL);
unsigned int loop=0, best=0, top=0;
// No preemption, no interrupt.
unsigned long flags;
preempt_disable();
raw_local_irq_save(flags);
// Warm-up
RDCOUNTER(TSC[loop].V[0], MSR_IA32_TSC);
RDCOUNTER(TSC[loop].V[1], MSR_IA32_TSC);
// Overhead
RDCOUNTER(TSC[loop].V[0], MSR_IA32_TSC);
RDCOUNTER(TSC[loop].V[1], MSR_IA32_TSC);
overhead=TSC[loop].V[1] - TSC[loop].V[0];
// Pick-up
for(loop=0; loop < OCCURENCES; loop++)
{
RDCOUNTER(TSC[loop].V[0], MSR_IA32_TSC);
udelay(100);
RDCOUNTER(TSC[loop].V[1], MSR_IA32_TSC);
}
// Restore interrupt and preemption.
raw_local_irq_restore(flags);
preempt_enable();
for(loop=0; loop < OCCURENCES; loop++)
TSC[loop].D=TSC[loop].V[1] - TSC[loop].V[0] - overhead;
for(loop=0; loop < OCCURENCES; loop++) {
unsigned int inner=0, count=0;
for(inner=loop; inner < OCCURENCES; inner++)
if(TSC[loop].D == TSC[inner].D)
count++;
if((count > top)
||((count == top) && (TSC[loop].D < TSC[best].D))) {
top=count;
best=loop;
}
/* printk("%3u x D[%02u]=%llu\t%llu - %llu\n",
count, loop, TSC[loop].D, TSC[loop].V[1], TSC[loop].V[0]); */
}
printk("Overhead=%llu\tBest=%llu\n", overhead, TSC[best].D);
clock->Q=TSC[best].D / (ratio * PRECISION);
clock->R=TSC[best].D % (ratio * PRECISION);
kmem_cache_free(hardwareCache, TSC);
kmem_cache_destroy(hardwareCache);
complete_and_exit(&bclk_job_complete, 0);
}
Related
I know that in Questa or Riviera UVM error messages can be indicated in the waveform as well.
With system verilog display macros (but not UVM!), how can an indicator be shown in the Synopsis Verdi waveform when the display happens?
For example lets say something like this macro is used:
`define DELIM
`define DEBUG_PRINT(p0, p1=ELIM, p2=ELIM, p3=ELIM, p4=ELIM, p5=ELIM) \
`ifdef D``p1 \
my_debug($psprintf(p0)); \
`else \
`ifdef D``p2 \
my_debug($psprintf(p0, p1)); \
`else \
`ifdef D``p3 \
my_debug($psprintf(p0, p1, p2)); \
`else \
`ifdef D``p4 \
my_debug($psprintf(p0, p1, p2, p3)); \
`else \
`ifdef D``p5 \
my_debug($psprintf(p0, p1, p2, p3, p4)); \
`else \
my_debug($psprintf(p0, p1, p2, p3, p4, p5)); \
`endif \
`endif \
`endif \
`endif \
`endif
Ideas that may be relevant here to some solution:
I am aware of the TMR capability briefly but I'm not sure if that should be applied here. Neither if it is propagating to the Wave window as an indicator.
If it would be necessary to use some workaround like below (flag/event), since it is not UVM, I'm not sure how to get around the problem (such as discussed here) of applying the semicolon in the middle :
my_debug($psprintf(p0)); \ -> tb.event \
This is the relevant part of XS, which should convert an Perl string from UTF-8 to codepoints (unsigned 32-bit integers):
UV *
text2UV (SV *sv, STRLEN *lenp)
{
STRLEN len;
// char *str = SvPV(foo_sv, strlen);
// char *s = SvPV (sv, len); // This original version warns
U8 *s = (U8 *)SvPV (sv, len); // This casts without warning
UV *r = (UV *)SvPVX (sv_2mortal (NEWSV (0, (len + 1) * sizeof (UV))));
UV *p = r;
if (SvUTF8 (sv))
{
STRLEN clen;
while (len)
{
// UV utf8_to_uvchr_buf(const U8 *s, const U8 *send, STRLEN *retlen)
*p++ = utf8n_to_uvchr (s, len, &clen, 0);
if (clen < 0)
croak ("illegal unicode character in string");
s += clen;
len -= clen;
}
}
else
while (len--)
*p++ = *(unsigned char *)s++;
*lenp = p - r;
return r;
}
It throws this warning:
~/github/perl/Text-Levenshtein-BVXS$ make
cp BVXS.pm blib/lib/Text/Levenshtein/BVXS.pm
Running Mkbootstrap for BVXS ()
chmod 644 "BVXS.bs"
"/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/bin/perl" -MExtUtils::Command::MM -e 'cp_nonempty' -- BVXS.bs blib/arch/auto/Text/Levenshtein/BVXS/BVXS.bs 644
"/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/bin/perl" "/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/lib/5.32.0/ExtUtils/xsubpp" -typemap '/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/lib/5.32.0/ExtUtils/typemap' BVXS.xs > BVXS.xsc
mv BVXS.xsc BVXS.c
cc -c -I. -fno-common -DPERL_DARWIN -mmacosx-version-min=10.14 -fno-strict-aliasing -pipe -fstack-protector-strong -I/usr/local/include -I/opt/local/include -DPERL_USE_SAFE_PUTENV -O3 -DVERSION=\"0.01\" -DXS_VERSION=\"0.01\" "-I/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/lib/5.32.0/darwin-2level/CORE" BVXS.c
BVXS.xs:26:35: warning: passing 'char *' to parameter of type 'const U8 *' (aka 'const unsigned char *') converts between pointers to integer types with different sign [-Wpointer-sign]
*p++ = utf8n_to_uvchr (s, len, &clen, 0);
^
/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/lib/5.32.0/darwin-2level/CORE/utf8.h:74:54: note: expanded from macro 'utf8n_to_uvchr'
utf8n_to_uvchr_error(s, len, lenp, flags, 0)
^
/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/lib/5.32.0/darwin-2level/CORE/utf8.h:76:45: note: expanded from macro 'utf8n_to_uvchr_error'
utf8n_to_uvchr_msgs(s, len, lenp, flags, errors, 0)
^
/Users/helmut/perl5/perlbrew/perls/perl-5.32.0/lib/5.32.0/darwin-2level/CORE/inline.h:1781:36: note: passing argument to parameter 's' here
Perl_utf8n_to_uvchr_msgs(const U8 *s,
^
1 warning generated.
rm -f blib/arch/auto/Text/Levenshtein/BVXS/BVXS.bundle
cc -mmacosx-version-min=10.14 -bundle -undefined dynamic_lookup -L/usr/local/lib -L/opt/local/lib -fstack-protector-strong BVXS.o -o blib/arch/auto/Text/Levenshtein/BVXS/BVXS.bundle \
\
It works and passes my tests. But if I want to deliver it to CPAN the distribution should not throw warnings.
Decode it with own code in C would be a work-around (and faster).
For me it looks like a bug in the XS macros and/or the example in the documentation are wrong.
The interplay of U8 and char in the API is a bit weird. You might ask #p5p to see why it works that way.
Failing that, though, would some plain typecasting suppress the warnings? Is this in a public repository somewhere?
Aside: SvPV is evil. Its prevalence in XS modules causes quite a lot of pain. Avoid it when possible. See: https://dev.to/fgasper/perl-s-svpv-menace-5515
Update: This looks to be a case where it’s necessary to break the abstraction. Alas.
I have defined a macro for gating gating assertion but while compiling, I am facing failure saying: endmodule expected at endproperty.
Code:
`define gating_check( _name, _clock, _data, txen) \
property _name ; \
#(posedge `TOP.``_clock``) disable iff (~`STIMULUS.RSTN_VEC_GEN) \
(~(txen) |-> ##[1:6] ( |`TOP.``_data`` == 0 ) ); \
endproperty \
``_name``_checker : assert property (_name) else $error("-E- property gating_check failed"); \
``_name``_cover : cover property (_name)
Pls help.
You just have extra space after the "\" in two lines - 4 and 5 , if you remove these trailing space the code should compile.
The "\" is used to escape the end of line but an extra space after the "\" will not do so. Hence now your macro has become a multi-lien statement generating the error. Because its a space character it difficult to observe :) .
Below is the code without the trailing space in line 4 and 5 .
`define gating_check( _name, _clock, _data, txen) \
property _name ; \
#(posedge `TOP.``_clock``) disable iff (~`STIMULUS.RSTN_VEC_GEN) \
(~(txen) |-> ##[1:6] ( |`TOP.``_data`` == 0 ) ); \
endproperty \
``_name``_checker : assert property (_name) else $error("-E- property gating_check failed"); \
``_name``_cover : cover property (_name)
This day found something that caught my attention. is build a simple bare OS
i read how to make multiboot compliant, I'm using NASM and GCC, i need make a loader that has the multiboot header and call the main point of my system for that i have two files loader.asm and loader.ld
loader.asm
[BITS 32]
global start
start:
mov esp, _sys_stack
jmp stublet
ALIGN 4
mboot:
MBOOT_PAGE_ALIGN equ 1<<0
MBOOT_MEMORY_INFO equ 1<<1
MBOOT_HEADER_MAGIC equ 0x1BADB002
MBOOT_HEADER_FLAGS equ MBOOT_PAGE_ALIGN | MBOOT_MEMORY_INFO
MBOOT_CHECKSUM equ -(MBOOT_HEADER_MAGIC + MBOOT_HEADER_FLAGS)
; This is the GRUB Multiboot header. A boot signature
dd MBOOT_HEADER_MAGIC
dd MBOOT_HEADER_FLAGS
dd MBOOT_CHECKSUM
stublet:
EXTERN cmain
call cmain
jmp $
SECTION .bss
resb 8192
_sys_stack:
loader.ld
ENTRY(start)
phys = 0x00100000;
SECTIONS
{
.text phys : AT(phys) {
code = .;
*(.text)
*(.rodata)
. = ALIGN(4096);
}
.data : AT(phys + (data - code))
{
data = .;
*(.data)
. = ALIGN(4096);
}
.bss : AT(phys + (bss - code))
{
bss = .;
*(.bss)
. = ALIGN(4096);
}
end = .;
}
main.c
int GenyKernel_Main()
{
char *str = "Hello world!", *ch;
unsigned short *vidmem = (unsigned short*) VIDEO_MEMORY;
unsigned i;
for (ch = str, i = 0; *ch; ch++, i++) {
vidmem[i] = (unsigned char) *ch | 0x0700;
}
return 0;
}
for build i'm using
# loader.o
nasm -f elf64 -o loader.o loader.asm
# main.o
gcc -fno-stack-protector -fno-builtin -nostdinc -O -g -Wall -I. -c -o main.o main.c
and the finally link
ld -T loader.ld -o kernel loader.o main.o
I've built a simple iso with grub-mkrescue and run with qemu but always I get
I think the problem is in the file loader.ld but i can't found where
After read lots about the same .asm file i finally understood the problem, the following snippet
align 4
multiboot_header:
dd MBOOT_MAGIC
dd MBOOT_FLAGS
dd MBOOT_CHECKSUM
must exist below a section to include appropriately with the linker
section .text ; .multiboot o whatever
align 4
multiboot_header:
dd MBOOT_MAGIC
dd MBOOT_FLAGS
dd MBOOT_CHECKSUM
and in the linker script
ENTRY(your_entry_point)
SECTIONS
{
. = 0x00100000;
.text ALIGN(0x1000) :
{
*(.multiboot)
*(.text)
}
// rest of sections
}
i have the following bit of inline ARM assembly, it works in a debug build but crashes in a release build of iphone sdk 3.1. The problem is the add instructions where i am incrementing the address of the C variables output and x by 4 bytes, this is supposed to increment by the size of a float. I think when i increment at some such stage i am overwriting something, can anyone say which is the best way to handle this
Thanks
C code that the asm is replacing, sum,output and x are all floats
for(int i = 0; i< count; i++)
sum+= output[i]* (*x++)
asm volatile(
".align 4 \n\t"
"mov r4,%3 \n\t"
"flds s0,[%0] \n\t"
"0: \n\t"
"flds s1,[%2] \n\t"
//"add %3,%3,#4 \n\t"
"flds s2,[%1] \n\t"
//"add %2,%2,#4 \n\t"
"subs r4,r4, #1 \n\t"
"fmacs s0, s1, s2 \n\t"
"bne 0b \n\t"
"fsts s0,[%0] \n\t"
:
: "r" (&sum), "r" (output), "r" (x),"r" (count)
: "r0","r4","cc", "memory",
"s0","s1","s2"
);
did you mean %1 to add 4 to and %3? adding to %3 could cause damage if that register is used again after your function.
asm volatile(
".align 4 \n\t"
"mov r4,%3 \n\t"
"flds s0,[%0] \n\t"
"0: \n\t"
"flds s1,[%2] \n\t"
"add %2,%2,#4 \n\t"
"flds s2,[%1] \n\t"
"add %1,%1,#4 \n\t"
"subs r4,r4, #1 \n\t"
"fmacs s0, s1, s2 \n\t"
"bne 0b \n\t"
"fsts s0,[%0] \n\t"
:
: "r" (&sum), "r" (output), "r" (x),"r" (count)
: "r0","r4","cc", "memory",
"s0","s1","s2"
);